Merge branch 'master' into eric/id-hack-ix-scan-refactor

author: Eric Cox <eric.cox@mongodb.com> 2022-06-24 13:52:42 +0000
committer: Eric Cox <eric.cox@mongodb.com> 2022-06-24 13:52:42 +0000
commit: e41eb06388b603a2575e826d87051eebd38d52f5 (patch)
tree: 2fd04f7aa3047bacb6b5f81ea802ae51ecd7b844
parent: e27fb371450c1aecbf3045c13c9a5257560ee615 (diff)
parent: d37641e0439f48745a656272a09eb121636ae7a2 (diff)
download: mongo-e41eb06388b603a2575e826d87051eebd38d52f5.tar.gz
1221 files changed, 28845 insertions, 16455 deletions
diff --git a/.gitignore b/.gitignore
index 0c227d5625b..f3c9a27efae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,6 +62,7 @@ venv
 /buildscripts/libdeps/graph_visualizer_web_stack/node_modules
 package-lock.json
 libdeps.graphml
+build-metrics.json
 config.log
 settings.py
 log_config.py
@@ -172,6 +173,9 @@ CMakeLists.txt
 .vs/
 /x64/
 
+# VSCode JavaScript config files
+**/jsconfig.json
+
 # Windows MSI builder artifacts
 *.msi
 *.wixobj
diff --git a/SConstruct b/SConstruct
index e7017034252..ffee0c3322d 100644
--- a/SConstruct
+++ b/SConstruct
@@ -6,6 +6,7 @@ import errno
 import json
 import os
 import re
+import platform
 import shlex
 import shutil
 import stat
@@ -715,6 +716,19 @@ add_option(
 )
 
 add_option(
+    'build-metrics',
+    metavar="FILE",
+    const='build-metrics.json',
+    default='build-metrics.json',
+    help='Enable tracking of build performance and output data as json.'
+    ' Use "-" to output json to stdout, or supply a path to the desired'
+    ' file to output to. If no argument is supplied, the default log'
+    ' file will be "build-metrics.json".',
+    nargs='?',
+    type=str,
+)
+
+add_option(
     'visibility-support',
     choices=['auto', 'on', 'off'],
     const='auto',
@@ -724,6 +738,13 @@ add_option(
     type='choice',
 )
 
+add_option(
+    'force-macos-dynamic-link',
+    default=False,
+    action='store_true',
+    help='Bypass link-model=dynamic check for macos versions <12.',
+)
+
 try:
     with open("version.json", "r") as version_fp:
         version_data = json.load(version_fp)
@@ -1307,6 +1328,12 @@ env_vars.Add(
         default='$BUILD_ROOT/tmp_test_data',
         validator=PathVariable.PathAccept,
     ), )
+
+env_vars.AddVariables(
+    ("BUILD_METRICS_EVG_TASK_ID", "Evergreen task ID to add to build metrics data."),
+    ("BUILD_METRICS_EVG_BUILD_VARIANT", "Evergreen build variant to add to build metrics data."),
+)
+
 # -- Validate user provided options --
 
 # A dummy environment that should *only* have the variables we have set. In practice it has
@@ -1327,7 +1354,7 @@ variables_only_env = Environment(
 if GetOption('help'):
     try:
         Help('\nThe following variables may also be set like scons VARIABLE=value\n', append=True)
-        Help(env_vars.GenerateHelpText(variables_only_env), append=True)
+        Help(env_vars.GenerateHelpText(variables_only_env, sort=True), append=True)
         Help(
             '\nThe \'list-targets\' target can be built to list useful comprehensive build targets\n',
             append=True)
@@ -1450,6 +1477,11 @@ if get_option('build-tools') == 'next':
 env = Environment(variables=env_vars, **envDict)
 del envDict
 
+if get_option('build-metrics'):
+    env.Tool('build_metrics')
+    env.AddBuildMetricsMetaData('evg_id', env.get("BUILD_METRICS_EVG_TASK_ID", "UNKNOWN"))
+    env.AddBuildMetricsMetaData('variant', env.get("BUILD_METRICS_EVG_BUILD_VARIANT", "UNKNOWN"))
+
 # TODO SERVER-42170 We can remove this Execute call
 # when support for PathIsDirCreate can be used as a validator
 # to the Variable above.
@@ -1834,6 +1866,29 @@ if link_model.startswith('dynamic') and get_option('install-action') == 'symlink
         f"Options '--link-model={link_model}' not supported with '--install-action={get_option('install-action')}'."
     )
 
+if link_model == 'dynamic' and env.TargetOSIs(
+        'darwin') and not get_option('force-macos-dynamic-link'):
+
+    macos_version_message = textwrap.dedent("""\
+        link-model=dynamic us only supported on macos version 12 or higher.
+        This is due to a 512 dylib RUNTIME limit on older macos. See this post for
+        more information: https://developer.apple.com/forums//thread/708366?login=true&page=1#717495022
+        Use '--force-macos-dynamic-link' to bypass this check.
+        """)
+
+    try:
+        macos_version_major = int(platform.mac_ver()[0].split('.')[0])
+        if macos_version_major < 12:
+            env.FatalError(
+                textwrap.dedent(f"""\
+                Macos version detected: {macos_version_major}
+                """) + macos_version_message)
+    except (IndexError, TypeError) as exc:
+        env.FatalError(
+            textwrap.dedent(f"""\
+            Failed to detect macos version: {exc}
+            """) + macos_version_message)
+
 # libunwind configuration.
 # In which the following globals are set and normalized to bool:
 #     - use_libunwind
@@ -2898,15 +2953,25 @@ if get_option("system-boost-lib-search-suffixes") is not None:
 # discover modules, and load the (python) module for each module's build.py
 mongo_modules = moduleconfig.discover_modules('src/mongo/db/modules', get_option('modules'))
 
-if get_option('ninja') != 'disabled':
-    for module in mongo_modules:
-        if hasattr(module, 'NinjaFile'):
-            env.FatalError(
-                textwrap.dedent("""\
-                ERROR: Ninja tool option '--ninja' should not be used with the ninja module.
-                    Remove the ninja module directory or use '--modules= ' to select no modules.
-                    If using enterprise module, explicitly set '--modules=<name-of-enterprise-module>' to exclude the ninja module."""
-                                ))
+has_ninja_module = False
+for module in mongo_modules:
+    if hasattr(module, 'NinjaFile'):
+        has_ninja_module = True
+        break
+
+if get_option('ninja') != 'disabled' and has_ninja_module:
+    env.FatalError(
+        textwrap.dedent("""\
+        ERROR: Ninja tool option '--ninja' should not be used with the ninja module.
+            Using both options simultaneously may clobber build.ninja files.
+            Remove the ninja module directory or use '--modules= ' to select no modules.
+            If using enterprise module, explicitly set '--modules=<name-of-enterprise-module>' to exclude the ninja module."""
+                        ))
+
+if has_ninja_module:
+    print(
+        "WARNING: You are attempting to use the unsupported/legacy ninja module, instead of the integrated ninja generator. You are strongly encouraged to remove the ninja module from your module list and invoke scons with --ninja generate-ninja"
+    )
 
 # --- check system ---
 ssl_provider = None
@@ -3525,7 +3590,8 @@ def doConfigure(myenv):
 
     if myenv.ToolchainIs('msvc'):
         if get_option('cxx-std') == "17":
-            myenv.AppendUnique(CCFLAGS=['/std:c++17'])
+            myenv.AppendUnique(CCFLAGS=['/std:c++17',
+                                        '/Zc:lambda'])  # /Zc:lambda is implied by /std:c++20
         elif get_option('cxx-std') == "20":
             myenv.AppendUnique(CCFLAGS=['/std:c++20'])
     else:
@@ -4012,7 +4078,17 @@ def doConfigure(myenv):
             # If anything is changed, added, or removed in either asan_options or
             # lsan_options, be sure to make the corresponding changes to the
             # appropriate build variants in etc/evergreen.yml
-            asan_options = "detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1"
+            asan_options_clear = [
+                "detect_leaks=1",
+                "check_initialization_order=true",
+                "strict_init_order=true",
+                "abort_on_error=1",
+                "disable_coredump=0",
+                "handle_abort=1",
+                "strict_string_checks=true",
+                "detect_invalid_pointer_pairs=1",
+            ]
+            asan_options = ":".join(asan_options_clear)
             lsan_options = f"report_objects=1:suppressions={myenv.File('#etc/lsan.suppressions').abspath}"
             env['ENV']['ASAN_OPTIONS'] = asan_options + symbolizer_option
             env['ENV']['LSAN_OPTIONS'] = lsan_options + symbolizer_option
diff --git a/buildscripts/antithesis/README.md b/buildscripts/antithesis/README.md
index 8b0a5de5998..8cff329010c 100644
--- a/buildscripts/antithesis/README.md
+++ b/buildscripts/antithesis/README.md
@@ -13,8 +13,8 @@ use Antithesis today.
 ## Base Images
 The `base_images` directory consists of the building blocks for creating a MongoDB test topology. 
 These images are uploaded to the Antithesis Docker registry weekly during the 
-`antithesis_image_build` task. For more visibility into how these images are built and uploaded to 
-the Antithesis Docker registry, please see `evergreen/antithesis_image_build.sh`.
+`antithesis_image_push` task. For more visibility into how these images are built and uploaded to 
+the Antithesis Docker registry, please see that task.
 
 ### mongo_binaries
 This image contains the latest `mongo`, `mongos` and `mongod` binaries. It can be used to 
@@ -27,7 +27,7 @@ container is not part of the actual toplogy. The purpose of a `workload` contain
 `mongo` commands to complete the topology setup, and to run a test suite on an existing topology 
 like so:
 ```shell
-buildscript/resmoke.py run --suite antithesis_concurrency_sharded_with_stepdowns_and_balancer --shellConnString "mongodb://mongos:27017"
+buildscript/resmoke.py run --suite antithesis_concurrency_sharded_with_stepdowns_and_balancer
 ```
 
 **Every topology must have 1 workload container.**
@@ -46,18 +46,19 @@ consists of a `docker-compose.yml`, a `logs` directory, a `scripts` directory an
 directory. If this is structured properly, you should be able to copy the files & directories 
 from this image and run `docker-compose up` to set up the desired topology.
 
-Example from `buildscripts/antithesis/topologies/replica_set/Dockerfile`:
+Example from `buildscripts/antithesis/topologies/sharded_cluster/Dockerfile`:
 ```Dockerfile
 FROM scratch
 COPY docker-compose.yml /
 ADD scripts /scripts
 ADD logs /logs
 ADD data /data
+ADD debug /debug
 ```
 
 All topology images are built and uploaded to the Antithesis Docker registry during the 
-`antithesis_image_build` task in the `evergreen/antithesis_image_build.sh` script. Some of these 
-directories are created in `evergreen/antithesis_image_build.sh` such as `/data` and `/logs`.
+`antithesis_image_push` task. Some of these directories are created during the 
+`evergreen/antithesis_image_build.sh` script such as `/data` and `/logs`.
 
 Note: These images serve solely as a filesystem containing all necessary files for a topology, 
 therefore use `FROM scratch`.
@@ -66,20 +67,38 @@ therefore use `FROM scratch`.
  This describes how to construct the corresponding topology using the 
  `mongo-binaries` and `workload` images.
 
-Example from `buildscripts/antithesis/topologies/replica_set/docker-compose.yml`:
+Example from `buildscripts/antithesis/topologies/sharded_cluster/docker-compose.yml`:
 ```yml
 version: '3.0'
 
 services:
-        database1:
+        configsvr1:
+                container_name: configsvr1
+                hostname: configsvr1
+                image: mongo-binaries:evergreen-latest-master
+                volumes:
+                  - ./logs/configsvr1:/var/log/mongodb/
+                  - ./scripts:/scripts/
+                  - ./data/configsvr1:/data/configdb/
+                command: /bin/bash /scripts/configsvr_init.sh
+                networks:
+                        antithesis-net:
+                                ipv4_address: 10.20.20.6
+                                # Set the an IPv4 with an address of 10.20.20.130 or higher
+                                # to be ignored by the fault injector
+                                #
+
+        configsvr2: ...
+        configsvr3: ...
+        database1: ...
                 container_name: database1
                 hostname: database1
                 image: mongo-binaries:evergreen-latest-master
-                command: /bin/bash /scripts/database_init.sh
                 volumes:
                   - ./logs/database1:/var/log/mongodb/
                   - ./scripts:/scripts/
                   - ./data/database1:/data/db/
+                command: /bin/bash /scripts/database_init.sh Shard1
                 networks:
                         antithesis-net:
                                 ipv4_address: 10.20.20.3
@@ -88,37 +107,59 @@ services:
                                 #
         database2: ...
         database3: ...
+        database4: ...
+        database5: ...
+        database6: ...
+        mongos:
+                container_name: mongos
+                hostname: mongos
+                image: mongo-binaries:evergreen-latest-master
+                volumes:
+                  - ./logs/mongos:/var/log/mongodb/
+                  - ./scripts:/scripts/
+                command: python3 /scripts/mongos_init.py
+                depends_on:
+                        - "database1"
+                        - "database2"
+                        - "database3"
+                        - "database4"
+                        - "database5"
+                        - "database6"
+                        - "configsvr1"
+                        - "configsvr2"
+                        - "configsvr3"
+                networks:
+                        antithesis-net:
+                                ipv4_address: 10.20.20.9
+                                # The subnet provided here is an example
+                                # An alternative subnet can be used
         workload:
                 container_name: workload
                 hostname: workload
                 image: workload:evergreen-latest-master
-                command: /bin/bash /scripts/workload_init.sh
                 volumes:
                   - ./logs/workload:/var/log/resmoke/
                   - ./scripts:/scripts/
+                command: python3 /scripts/workload_init.py
                 depends_on:
-                        - "database1"
-                        - "database2"
-                        - "database3"
+                        - "mongos"
                 networks:
                         antithesis-net:
                                 ipv4_address: 10.20.20.130
                                 # The subnet provided here is an example
                                 # An alternative subnet can be used
-
 networks:
         antithesis-net:
                 driver: bridge
                 ipam:
                         config:
                         - subnet: 10.20.20.0/24
-
 ```
 
-Each container must have a `command`in `docker-compose.yml` that runs an init script. The init 
+Each container must have a `command` in `docker-compose.yml` that runs an init script. The init 
 script belongs in the `scripts` directory, which is included as a volume. The `command` should be 
-set like so: `/bin/bash /scripts/[script_name].sh`. This is a requirement for the topology to start 
-up properly in Antithesis.
+set like so: `/bin/bash /scripts/[script_name].sh` or `python3 /scripts/[script_name].py`. This is 
+a requirement for the topology to start up properly in Antithesis.
 
 When creating `mongod` or `mongos` instances, route the logs like so: 
 `--logpath /var/log/mongodb/mongodb.log` and utilize `volumes` -- as in `database1`. 
@@ -133,28 +174,24 @@ Use the `evergreen-latest-master` tag for all images. This is updated automatica
 
 ### scripts
 
-Example from `buildscripts/antithesis/topologies/replica_set/scripts/workload_init.sh`:
-```shell
-sleep 5s
-mongo --host database1 --port 27017 --eval "config={\"_id\" : \"RollbackFuzzerTest\",\"protocolVersion\" : 1,\"members\" : [{\"_id\" : 0,\"host\" : \"database1:27017\"}, {\"_id\" : 1,\"host\" : \"database2:27017\"}, {\"_id\" : 2,\"host\" : \"database3:27017\"} ],\"settings\" : {\"chainingAllowed\" : false,\"electionTimeoutMillis\" : 500, \"heartbeatTimeoutSecs\" : 1, \"catchUpTimeoutMillis\": 700}}; rs.initiate(config)"
-
-# this cryptic statement keeps the workload container running.
-tail -f /dev/null
-```
-The `sleep` command can be useful to ensure that other containers have had a chance to start. In 
-this example, the `workload` container waits 5 seconds while the database containers start. 
-After that, it initiates the replica set. The `tail -f /dev/null` is required for `workload` 
-containers otherwise the container shuts down.
+Take a look at `buildscripts/antithesis/topologies/sharded_cluster/scripts/mongos_init.py` to see 
+how to use util methods from `buildscripts/antithesis/topologies/sharded_cluster/scripts/utils.py` 
+to set up the desired topology. You can also use simple shell scripts as in the case of 
+`buildscripts/antithesis/topologies/sharded_cluster/scripts/database_init.py`. These init scripts 
+must not end in order to keep the underlying container alive. You can use an infinite while 
+loop for `python` scripts or you can use `tail -f /dev/null` for shell scripts. 
 
 ## How do I create a new topology for Antithesis testing?
 To create a new topology for Antithesis testing is easy & requires a few simple steps. 
 1. Add a new directory in `buildscripts/antithesis/topologies` to represent your desired topology. 
 You can use existing topologies as an example.
-2. Update the `evergreen/antithesis_image_build.sh` file so that your new topology image is 
+2. Make sure that your workload test suite runs against your topology without any failures. This 
+may require tagging some tests as `antithesis-incompatible`.
+3. Update the `antithesis_image_push` task so that your new topology image is 
 uploaded to the Antithesis Docker registry.
-3. Reach out to #server-testing on Slack & provide the new topology image name as well as the 
+4. Reach out to #server-testing on Slack & provide the new topology image name as well as the 
    desired test suite to run.
-4. Include a member of the STM team on the code review.
+5. Include the SDP team on the code review.
    
 These are the required updates to `evergreen/antithesis_image_build.sh`:
 - Add the following command for each of your `mongos` and `mongod` containers in your topology to 
@@ -169,6 +206,7 @@ cd [your_topology_dir]
 sed -i s/evergreen-latest-master/$tag/ docker-compose.yml
 sudo docker build . -t [your-topology-name]-config:$tag
 ```
+These are the required updates to `evergreen/antithesis_image_push.sh`:
 - Push your new image to the Antithesis Docker registry
 ```shell
 sudo docker tag "[your-topology-name]-config:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/[your-topology-name]-config:$tag"
diff --git a/buildscripts/antithesis/base_images/mongo_binaries/Dockerfile b/buildscripts/antithesis/base_images/mongo_binaries/Dockerfile
index fe93253eef4..ad0709286ed 100644
--- a/buildscripts/antithesis/base_images/mongo_binaries/Dockerfile
+++ b/buildscripts/antithesis/base_images/mongo_binaries/Dockerfile
@@ -12,7 +12,7 @@ RUN mkdir -p /scripts
 
 # Install dependencies of MongoDB Server
 RUN apt-get update
-RUN apt-get install -qy libcurl4 libgssapi-krb5-2 libldap-2.4-2 libwrap0 libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit snmp openssl liblzma5
+RUN apt-get install -qy libcurl4 libgssapi-krb5-2 libldap-2.4-2 libwrap0 libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit openssl liblzma5 python3
 
 # -------------------
 # Everything above this line should be common image setup
diff --git a/buildscripts/antithesis/base_images/workload/Dockerfile b/buildscripts/antithesis/base_images/workload/Dockerfile
index 58f52bda489..b8d46508252 100644
--- a/buildscripts/antithesis/base_images/workload/Dockerfile
+++ b/buildscripts/antithesis/base_images/workload/Dockerfile
@@ -14,7 +14,7 @@ RUN debconf-set-selections /tmp/preseed.txt
 RUN rm /tmp/preseed.txt
 
 RUN apt-get update
-RUN apt-get install -qy libcurl4 libgssapi-krb5-2 libldap-2.4-2 libwrap0 libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit snmp openssl liblzma5 libssl-dev build-essential software-properties-common
+RUN apt-get install -qy libcurl4 libgssapi-krb5-2 libldap-2.4-2 libwrap0 libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit openssl liblzma5 libssl-dev build-essential software-properties-common
 RUN add-apt-repository ppa:deadsnakes/ppa
 RUN apt-get update
 
@@ -30,8 +30,12 @@ COPY src /resmoke
 
 RUN bash -c "cd /resmoke && python3.9 -m venv python3-venv && . python3-venv/bin/activate && pip install --upgrade pip wheel && pip install -r ./buildscripts/requirements.txt && ./buildscripts/antithesis_suite.py generate-all"
 
+# copy the run_suite.py script & mongo binary -- make sure they are executable
+COPY run_suite.py /resmoke
+
 COPY mongo /usr/bin
 RUN chmod +x /usr/bin/mongo
+
 COPY libvoidstar.so /usr/lib/libvoidstar.so
 
 RUN /usr/bin/mongo --version
diff --git a/buildscripts/antithesis/base_images/workload/run_suite.py b/buildscripts/antithesis/base_images/workload/run_suite.py
new file mode 100644
index 00000000000..ca06f3423ad
--- /dev/null
+++ b/buildscripts/antithesis/base_images/workload/run_suite.py
@@ -0,0 +1,23 @@
+"""Script to run suite in Antithesis from the workload container."""
+import subprocess
+from time import sleep
+import pymongo
+
+client = pymongo.MongoClient(host="mongos", port=27017, serverSelectionTimeoutMS=30000)
+
+while True:
+    payload = client.admin.command({"listShards": 1})
+    if len(payload["shards"]) == 2:
+        print("Sharded Cluster available.")
+        break
+    if len(payload["shards"]) < 2:
+        print("Waiting for shards to be added to cluster.")
+        sleep(5)
+        continue
+    if len(payload["shards"]) > 2:
+        raise RuntimeError('More shards in cluster than expected.')
+
+subprocess.run([
+    "./buildscripts/resmoke.py", "run", "--suite",
+    "antithesis_concurrency_sharded_with_stepdowns_and_balancer"
+], check=True)
diff --git a/buildscripts/antithesis/topologies/sharded_cluster/docker-compose.yml b/buildscripts/antithesis/topologies/sharded_cluster/docker-compose.yml
index 767a4d19eb1..8f4a5ce9a62 100644
--- a/buildscripts/antithesis/topologies/sharded_cluster/docker-compose.yml
+++ b/buildscripts/antithesis/topologies/sharded_cluster/docker-compose.yml
@@ -146,8 +146,14 @@ services:
                 volumes:
                   - ./logs/mongos:/var/log/mongodb/
                   - ./scripts:/scripts/
-                command: /bin/bash /scripts/mongos_init.sh
+                command: python3 /scripts/mongos_init.py
                 depends_on:
+                        - "database1"
+                        - "database2"
+                        - "database3"
+                        - "database4"
+                        - "database5"
+                        - "database6"
                         - "configsvr1"
                         - "configsvr2"
                         - "configsvr3"
@@ -163,17 +169,8 @@ services:
                 volumes:
                   - ./logs/workload:/var/log/resmoke/
                   - ./scripts:/scripts/
-                command: /bin/bash /scripts/workload_init.sh
+                command: python3 /scripts/workload_init.py
                 depends_on:
-                        - "database1"
-                        - "database2"
-                        - "database3"
-                        - "database4"
-                        - "database5"
-                        - "database6"
-                        - "configsvr1"
-                        - "configsvr2"
-                        - "configsvr3"
                         - "mongos"
                 networks:
                         antithesis-net:
diff --git a/buildscripts/antithesis/topologies/sharded_cluster/scripts/mongos_init.py b/buildscripts/antithesis/topologies/sharded_cluster/scripts/mongos_init.py
new file mode 100644
index 00000000000..0d3aa7cfc8d
--- /dev/null
+++ b/buildscripts/antithesis/topologies/sharded_cluster/scripts/mongos_init.py
@@ -0,0 +1,158 @@
+"""Script to configure a sharded cluster in Antithesis from the mongos container."""
+import json
+import subprocess
+from utils import mongo_process_running, retry_until_success
+
+CONFIGSVR_CONFIG = {
+    "_id": "ConfigServerReplSet",
+    "configsvr": True,
+    "protocolVersion": 1,
+    "members": [
+        {"_id": 0, "host": "configsvr1:27019"},
+        {"_id": 1, "host": "configsvr2:27019"},
+        {"_id": 2, "host": "configsvr3:27019"},
+    ],
+    "settings": {
+        "chainingAllowed": False,
+        "electionTimeoutMillis": 2000,
+        "heartbeatTimeoutSecs": 1,
+        "catchUpTimeoutMillis": 0,
+    },
+}
+
+SHARD1_CONFIG = {
+    "_id": "Shard1",
+    "protocolVersion": 1,
+    "members": [
+        {"_id": 0, "host": "database1:27018"},
+        {"_id": 1, "host": "database2:27018"},
+        {"_id": 2, "host": "database3:27018"},
+    ],
+    "settings": {
+        "chainingAllowed": False,
+        "electionTimeoutMillis": 2000,
+        "heartbeatTimeoutSecs": 1,
+        "catchUpTimeoutMillis": 0,
+    },
+}
+
+SHARD2_CONFIG = {
+    "_id": "Shard2",
+    "protocolVersion": 1,
+    "members": [
+        {"_id": 0, "host": "database4:27018"},
+        {"_id": 1, "host": "database5:27018"},
+        {"_id": 2, "host": "database6:27018"},
+    ],
+    "settings": {
+        "chainingAllowed": False,
+        "electionTimeoutMillis": 2000,
+        "heartbeatTimeoutSecs": 1,
+        "catchUpTimeoutMillis": 0,
+    },
+}
+
+# Create ConfigServerReplSet once all nodes are up
+retry_until_success(mongo_process_running, {"host": "configsvr1", "port": 27019})
+retry_until_success(mongo_process_running, {"host": "configsvr2", "port": 27019})
+retry_until_success(mongo_process_running, {"host": "configsvr3", "port": 27019})
+retry_until_success(
+    subprocess.run, {
+        "args": [
+            "mongo",
+            "--host",
+            "configsvr1",
+            "--port",
+            "27019",
+            "--eval",
+            f"rs.initiate({json.dumps(CONFIGSVR_CONFIG)})",
+        ],
+        "check": True,
+    })
+
+# Create Shard1 once all nodes are up
+retry_until_success(mongo_process_running, {"host": "database1", "port": 27018})
+retry_until_success(mongo_process_running, {"host": "database2", "port": 27018})
+retry_until_success(mongo_process_running, {"host": "database3", "port": 27018})
+retry_until_success(
+    subprocess.run, {
+        "args": [
+            "mongo",
+            "--host",
+            "database1",
+            "--port",
+            "27018",
+            "--eval",
+            f"rs.initiate({json.dumps(SHARD1_CONFIG)})",
+        ],
+        "check": True,
+    })
+
+# Create Shard2 once all nodes are up
+retry_until_success(mongo_process_running, {"host": "database4", "port": 27018})
+retry_until_success(mongo_process_running, {"host": "database5", "port": 27018})
+retry_until_success(mongo_process_running, {"host": "database6", "port": 27018})
+retry_until_success(
+    subprocess.run, {
+        "args": [
+            "mongo",
+            "--host",
+            "database4",
+            "--port",
+            "27018",
+            "--eval",
+            f"rs.initiate({json.dumps(SHARD2_CONFIG)})",
+        ],
+        "check": True,
+    })
+
+# Start mongos
+retry_until_success(
+    subprocess.run, {
+        "args": [
+            "mongos",
+            "--bind_ip",
+            "0.0.0.0",
+            "--configdb",
+            "ConfigServerReplSet/configsvr1:27019,configsvr2:27019,configsvr3:27019",
+            "--logpath",
+            "/var/log/mongodb/mongodb.log",
+            "--setParameter",
+            "enableTestCommands=1",
+            "--setParameter",
+            "fassertOnLockTimeoutForStepUpDown=0",
+            "--fork",
+        ],
+        "check": True,
+    })
+
+# Add shards to cluster
+retry_until_success(
+    subprocess.run, {
+        "args": [
+            "mongo",
+            "--host",
+            "mongos",
+            "--port",
+            "27017",
+            "--eval",
+            'sh.addShard("Shard1/database1:27018,database2:27018,database3:27018")',
+        ],
+        "check": True,
+    })
+retry_until_success(
+    subprocess.run, {
+        "args": [
+            "mongo",
+            "--host",
+            "mongos",
+            "--port",
+            "27017",
+            "--eval",
+            'sh.addShard("Shard2/database4:27018,database5:27018,database6:27018")',
+        ],
+        "check": True,
+    })
+
+while True:
+    continue
diff --git a/buildscripts/antithesis/topologies/sharded_cluster/scripts/mongos_init.sh b/buildscripts/antithesis/topologies/sharded_cluster/scripts/mongos_init.sh
deleted file mode 100755
index 49df9e67490..00000000000
--- a/buildscripts/antithesis/topologies/sharded_cluster/scripts/mongos_init.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-sleep 5s
-mongo --host configsvr1 --port 27019 --eval "config={\"_id\" : \"ConfigServerReplSet\",\"configsvr\" : true,\"protocolVersion\" : 1,\"members\" : [{\"_id\" : 0,\"host\" : \"configsvr1:27019\"}, {\"_id\" : 1,\"host\" : \"configsvr2:27019\"}, {\"_id\" : 2,\"host\" : \"configsvr3:27019\"} ],\"settings\" : {\"chainingAllowed\" : false,\"electionTimeoutMillis\" : 2000, \"heartbeatTimeoutSecs\" : 1, \"catchUpTimeoutMillis\": 0}}; rs.initiate(config)"
-
-mongos --bind_ip 0.0.0.0 --configdb ConfigServerReplSet/configsvr1:27019,configsvr2:27019,configsvr3:27019 --logpath /var/log/mongodb/mongodb.log --setParameter enableTestCommands=1 --setParameter fassertOnLockTimeoutForStepUpDown=0
-
-# this cryptic statement keeps the container running.
-tail -f /dev/null
diff --git a/buildscripts/antithesis/topologies/sharded_cluster/scripts/utils.py b/buildscripts/antithesis/topologies/sharded_cluster/scripts/utils.py
new file mode 100644
index 00000000000..3338c68e7e0
--- /dev/null
+++ b/buildscripts/antithesis/topologies/sharded_cluster/scripts/utils.py
@@ -0,0 +1,25 @@
+"""Util functions to assist in setting up a sharded cluster topology in Antithesis."""
+import subprocess
+import time
+
+
+def mongo_process_running(host, port):
+    """Check to see if the process at the given host & port is running."""
+    return subprocess.run(['mongo', '--host', host, '--port',
+                           str(port), '--eval', '"db.stats()"'], check=True)
+
+
+def retry_until_success(func, kwargs=None, wait_time=1, timeout_period=30):
+    """Retry the function periodically until timeout."""
+    kwargs = {} if kwargs is None else kwargs
+    timeout = time.time() + timeout_period
+    while True:
+        if time.time() > timeout:
+            raise TimeoutError(
+                f"{func.__name__} called with {kwargs} timed out after {timeout_period} second(s).")
+        try:
+            func(**kwargs)
+            break
+        except:  # pylint: disable=bare-except
+            print(f"Retrying {func.__name__} called with {kwargs} after {wait_time} second(s).")
+            time.sleep(wait_time)
diff --git a/buildscripts/antithesis/topologies/sharded_cluster/scripts/workload_init.py b/buildscripts/antithesis/topologies/sharded_cluster/scripts/workload_init.py
new file mode 100644
index 00000000000..3784825ba54
--- /dev/null
+++ b/buildscripts/antithesis/topologies/sharded_cluster/scripts/workload_init.py
@@ -0,0 +1,3 @@
+"""Script to initialize a workload container in Antithesis."""
+while True:
+    continue
diff --git a/buildscripts/antithesis/topologies/sharded_cluster/scripts/workload_init.sh b/buildscripts/antithesis/topologies/sharded_cluster/scripts/workload_init.sh
deleted file mode 100755
index acaf30ea036..00000000000
--- a/buildscripts/antithesis/topologies/sharded_cluster/scripts/workload_init.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-sleep 10s
-
-mongo --host database1 --port 27018 --eval "config={\"_id\" : \"Shard1\",\"protocolVersion\" : 1,\"members\" : [{\"_id\" : 0,\"host\" : \"database1:27018\"}, {\"_id\" : 1,\"host\" : \"database2:27018\"}, {\"_id\" : 2,\"host\" : \"database3:27018\"} ],\"settings\" : {\"chainingAllowed\" : false,\"electionTimeoutMillis\" : 2000, \"heartbeatTimeoutSecs\" : 1, \"catchUpTimeoutMillis\": 0}}; rs.initiate(config)"
-
-sleep 5s
-
-mongo --host database4 --port 27018 --eval "config={\"_id\" : \"Shard2\",\"protocolVersion\" : 1,\"members\" : [{\"_id\" : 0,\"host\" : \"database4:27018\"}, {\"_id\" : 1,\"host\" : \"database5:27018\"}, {\"_id\" : 2,\"host\" : \"database6:27018\"} ],\"settings\" : {\"chainingAllowed\" : false,\"electionTimeoutMillis\" : 2000, \"heartbeatTimeoutSecs\" : 1, \"catchUpTimeoutMillis\": 0}}; rs.initiate(config)"
-
-sleep 5s
-
-mongo --host mongos --port 27017 --eval 'sh.addShard("Shard1/database1:27018,database2:27018,database3:27018")'
-
-sleep 5s
-
-mongo --host mongos --port 27017 --eval 'sh.addShard("Shard2/database4:27018,database5:27018,database6:27018")'
-
-# this cryptic statement keeps the workload container running.
-tail -f /dev/null
diff --git a/buildscripts/antithesis_suite.py b/buildscripts/antithesis_suite.py
index 8397a992ee7..b8722ada418 100755
--- a/buildscripts/antithesis_suite.py
+++ b/buildscripts/antithesis_suite.py
@@ -3,7 +3,6 @@
 
 import os.path
 import sys
-import pathlib
 
 import click
 import yaml
@@ -12,35 +11,74 @@ import yaml
 if __name__ == "__main__" and __package__ is None:
     sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-SUITE_BLACKLIST = [
-    "CheckReplDBHash",
-    "CheckReplOplogs",
+HOOKS_BLACKLIST = [
     "CleanEveryN",
     "ContinuousStepdown",
-    "ValidateCollections",
     "CheckOrphansDeleted",
 ]
 
+_SUITES_PATH = os.path.join("buildscripts", "resmokeconfig", "suites")
 
-def _sanitize_hooks(hooks):
-    if len(hooks) == 0:
-        return hooks
-    # it's either a list of strings, or a list of dicts, each with key 'class'
-    if isinstance(hooks[0], str):
-        return list(filter(lambda x: x not in SUITE_BLACKLIST, hooks))
-    elif isinstance(hooks[0], dict):
-        return list(filter(lambda x: x['class'] not in SUITE_BLACKLIST, hooks))
-    else:
-        raise RuntimeError('Unknown structure in hook. File a TIG ticket.')
 
+def delete_archival(suite):
+    """Remove archival for Antithesis environment."""
+    suite.pop("archive", None)
+    suite.get("executor", {}).pop("archive", None)
 
-def _sanitize_test_data(test_data):
-    if test_data.get("useStepdownPermittedFile", None):
-        test_data["useStepdownPermittedFile"] = False
-    return test_data
 
+def make_hooks_compatible(suite):
+    """Make hooks compatible in Antithesis environment."""
+    if suite.get("executor", {}).get("hooks", None):
+        # it's either a list of strings, or a list of dicts, each with key 'class'
+        if isinstance(suite["executor"]["hooks"][0], str):
+            suite["executor"]["hooks"] = ["AntithesisLogging"] + [
+                hook for hook in suite["executor"]["hooks"] if hook not in HOOKS_BLACKLIST
+            ]
+        elif isinstance(suite["executor"]["hooks"][0], dict):
+            suite["executor"]["hooks"] = [{"class": "AntithesisLogging"}] + [
+                hook for hook in suite["executor"]["hooks"] if hook["class"] not in HOOKS_BLACKLIST
+            ]
+        else:
+            raise RuntimeError('Unknown structure in hook. File a TIG ticket.')
 
-_SUITES_PATH = os.path.join("buildscripts", "resmokeconfig", "suites")
+
+def use_external_fixture(suite):
+    """Use external version of this fixture."""
+    if suite.get("executor", {}).get("fixture", None):
+        suite["executor"]["fixture"] = {
+            "class": f"External{suite['executor']['fixture']['class']}",
+            "shell_conn_string": "mongodb://mongos:27017"
+        }
+
+
+def update_test_data(suite):
+    """Update TestData to be compatible with antithesis."""
+    suite.setdefault("executor", {}).setdefault(
+        "config", {}).setdefault("shell_options", {}).setdefault("global_vars", {}).setdefault(
+            "TestData", {}).update({"useStepdownPermittedFile": False})
+
+
+def update_shell(suite):
+    """Update shell for when running in Antithesis."""
+    suite.setdefault("executor", {}).setdefault("config", {}).setdefault("shell_options",
+                                                                         {}).setdefault("eval", "")
+    suite["executor"]["config"]["shell_options"]["eval"] += "jsTestLog = Function.prototype;"
+
+
+def update_exclude_tags(suite):
+    """Update the exclude tags to exclude antithesis incompatible tests."""
+    suite.setdefault('selector', {}).setdefault('exclude_with_any_tags',
+                                                []).append("antithesis_incompatible")
+
+
+def make_suite_antithesis_compatible(suite):
+    """Modify suite in-place to be antithesis compatible."""
+    delete_archival(suite)
+    make_hooks_compatible(suite)
+    use_external_fixture(suite)
+    update_test_data(suite)
+    update_shell(suite)
+    update_exclude_tags(suite)
 
 
 @click.group()
@@ -50,54 +88,13 @@ def cli():
 
 
 def _generate(suite_name: str) -> None:
-    with open(os.path.join(_SUITES_PATH, "{}.yml".format(suite_name))) as fstream:
+    with open(os.path.join(_SUITES_PATH, f"{suite_name}.yml")) as fstream:
         suite = yaml.safe_load(fstream)
 
-    try:
-        suite["archive"]["hooks"] = _sanitize_hooks(suite["archive"]["hooks"])
-    except KeyError:
-        # pass, don't care
-        pass
-    except TypeError:
-        pass
-
-    try:
-        suite["executor"]["archive"]["hooks"] = _sanitize_hooks(
-            suite["executor"]["archive"]["hooks"])
-    except KeyError:
-        # pass, don't care
-        pass
-    except TypeError:
-        pass
-
-    try:
-        suite["executor"]["hooks"] = _sanitize_hooks(suite["executor"]["hooks"])
-    except KeyError:
-        # pass, don't care
-        pass
-    except TypeError:
-        pass
-
-    try:
-        suite["executor"]["config"]["shell_options"]["global_vars"][
-            "TestData"] = _sanitize_test_data(
-                suite["executor"]["config"]["shell_options"]["global_vars"]["TestData"])
-    except KeyError:
-        # pass, don't care
-        pass
-    except TypeError:
-        pass
-
-    try:
-        suite["executor"]["config"]["shell_options"]["eval"] += "jsTestLog = Function.prototype;"
-    except KeyError:
-        # pass, don't care
-        pass
-    except TypeError:
-        pass
+    make_suite_antithesis_compatible(suite)
 
     out = yaml.dump(suite)
-    with open(os.path.join(_SUITES_PATH, "antithesis_{}.yml".format(suite_name)), "w") as fstream:
+    with open(os.path.join(_SUITES_PATH, f"antithesis_{suite_name}.yml"), "w") as fstream:
         fstream.write(
             "# this file was generated by buildscripts/antithesis_suite.py generate {}\n".format(
                 suite_name))
diff --git a/buildscripts/burn_in_tests.py b/buildscripts/burn_in_tests.py
index 27da0996b63..84c7d98aab0 100755
--- a/buildscripts/burn_in_tests.py
+++ b/buildscripts/burn_in_tests.py
@@ -13,6 +13,7 @@ from typing import Optional, Set, Tuple, List, Dict, NamedTuple
 import click
 import yaml
 from git import Repo
+from pydantic import BaseModel
 import structlog
 from structlog.stdlib import LoggerFactory
 
@@ -352,7 +353,8 @@ def create_task_list_for_tests(changed_tests: Set[str], build_variant: str,
 
 
 def create_tests_by_task(build_variant: str, evg_conf: EvergreenProjectConfig,
-                         changed_tests: Set[str], install_dir: str) -> Dict[str, TaskInfo]:
+                         changed_tests: Set[str],
+                         install_dir: Optional[str]) -> Dict[str, TaskInfo]:
     """
     Create a list of tests by task.
 
@@ -367,7 +369,11 @@ def create_tests_by_task(build_variant: str, evg_conf: EvergreenProjectConfig,
         exclude_tests.append(f"{ENTERPRISE_MODULE_PATH}/**/*")
     changed_tests = filter_tests(changed_tests, exclude_tests)
 
-    buildscripts.resmokelib.parser.set_run_options(f"--installDir={shlex.quote(install_dir)}")
+    run_options = ""
+    if install_dir is not None:
+        run_options = f"--installDir={shlex.quote(install_dir)}"
+    buildscripts.resmokelib.parser.set_run_options(run_options)
+
     if changed_tests:
         return create_task_list_for_tests(changed_tests, build_variant, evg_conf, exclude_suites,
                                           exclude_tasks)
@@ -408,7 +414,7 @@ def _configure_logging(verbose: bool):
     logging.basicConfig(
         format="[%(asctime)s - %(name)s - %(levelname)s] %(message)s",
         level=level,
-        stream=sys.stdout,
+        stream=sys.stderr,
     )
     for log_name in EXTERNAL_LOGGERS:
         logging.getLogger(log_name).setLevel(logging.WARNING)
@@ -537,11 +543,45 @@ class LocalBurnInExecutor(BurnInExecutor):
         run_tests(tests_by_task, resmoke_cmd)
 
 
+class DiscoveredTask(BaseModel):
+    """
+    Model for a discovered task to run.
+
+    * task_name: Name of discovered task.
+    * test_list: List of tests to run under discovered task.
+    """
+
+    task_name: str
+    test_list: List[str]
+
+
+class DiscoveredTaskList(BaseModel):
+    """Model for a list of discovered tasks."""
+
+    discovered_tasks: List[DiscoveredTask]
+
+
+class YamlBurnInExecutor(BurnInExecutor):
+    """A burn-in executor that outputs discovered tasks as YAML."""
+
+    def execute(self, tests_by_task: Dict[str, TaskInfo]) -> None:
+        """
+        Report the given tasks and their tests to stdout.
+
+        :param tests_by_task: Dictionary of tasks to run with tests to run in each.
+        """
+        discovered_tasks = DiscoveredTaskList(discovered_tasks=[
+            DiscoveredTask(task_name=task_name, test_list=task_info.tests)
+            for task_name, task_info in tests_by_task.items()
+        ])
+        print(yaml.safe_dump(discovered_tasks.dict()))
+
+
 class BurnInOrchestrator:
     """Orchestrate the execution of burn_in_tests."""
 
     def __init__(self, change_detector: FileChangeDetector, burn_in_executor: BurnInExecutor,
-                 evg_conf: EvergreenProjectConfig) -> None:
+                 evg_conf: EvergreenProjectConfig, install_dir: Optional[str]) -> None:
         """
         Create a new orchestrator.
 
@@ -552,8 +592,9 @@ class BurnInOrchestrator:
         self.change_detector = change_detector
         self.burn_in_executor = burn_in_executor
         self.evg_conf = evg_conf
+        self.install_dir = install_dir
 
-    def burn_in(self, repos: List[Repo], build_variant: str, install_dir: str) -> None:
+    def burn_in(self, repos: List[Repo], build_variant: str) -> None:
         """
         Execute burn in tests for the given git repositories.
 
@@ -564,7 +605,7 @@ class BurnInOrchestrator:
         LOGGER.info("Found changed tests", files=changed_tests)
 
         tests_by_task = create_tests_by_task(build_variant, self.evg_conf, changed_tests,
-                                             install_dir)
+                                             self.install_dir)
         LOGGER.debug("tests and tasks found", tests_by_task=tests_by_task)
 
         self.burn_in_executor.execute(tests_by_task)
@@ -584,18 +625,20 @@ class BurnInOrchestrator:
               help="The maximum number of times to repeat tests if time option is specified.")
 @click.option("--repeat-tests-secs", "repeat_tests_secs", default=None, type=int, metavar="SECONDS",
               help="Repeat tests for the given time (in secs).")
+@click.option("--yaml", "use_yaml", is_flag=True, default=False,
+              help="Output discovered tasks in YAML. Tests will not be run.")
 @click.option("--verbose", "verbose", default=False, is_flag=True, help="Enable extra logging.")
 @click.option(
     "--origin-rev", "origin_rev", default=None,
     help="The revision in the mongo repo that changes will be compared against if specified.")
-@click.option("--install-dir", "install_dir", required=True, type=str,
+@click.option("--install-dir", "install_dir", type=str,
               help="Path to bin directory of a testable installation")
 @click.argument("resmoke_args", nargs=-1, type=click.UNPROCESSED)
 # pylint: disable=too-many-arguments,too-many-locals
 def main(build_variant: str, no_exec: bool, repeat_tests_num: Optional[int],
          repeat_tests_min: Optional[int], repeat_tests_max: Optional[int],
          repeat_tests_secs: Optional[int], resmoke_args: str, verbose: bool,
-         origin_rev: Optional[str], install_dir: str) -> None:
+         origin_rev: Optional[str], install_dir: Optional[str], use_yaml: bool) -> None:
     """
     Run new or changed tests in repeated mode to validate their stability.
 
@@ -639,11 +682,13 @@ def main(build_variant: str, no_exec: bool, repeat_tests_num: Optional[int],
 
     change_detector = LocalFileChangeDetector(origin_rev)
     executor = LocalBurnInExecutor(resmoke_args, repeat_config)
-    if no_exec:
+    if use_yaml:
+        executor = YamlBurnInExecutor()
+    elif no_exec:
         executor = NopBurnInExecutor()
 
-    burn_in_orchestrator = BurnInOrchestrator(change_detector, executor, evg_conf)
-    burn_in_orchestrator.burn_in(repos, build_variant, install_dir)
+    burn_in_orchestrator = BurnInOrchestrator(change_detector, executor, evg_conf, install_dir)
+    burn_in_orchestrator.burn_in(repos, build_variant)
 
 
 if __name__ == "__main__":
diff --git a/buildscripts/errorcodes.py b/buildscripts/errorcodes.py
index 8d9330b4240..3ff08ca8a1e 100755
--- a/buildscripts/errorcodes.py
+++ b/buildscripts/errorcodes.py
@@ -23,7 +23,6 @@ except ImportError:
     print("*** Run 'pip3 install --user regex' to speed up error code checking")
     import re  # type: ignore
 
-ASSERT_NAMES = ["uassert", "massert", "fassert", "fassertFailed"]
 MAXIMUM_CODE = 9999999  # JIRA Ticket + XX
 
 # pylint: disable=invalid-name
@@ -306,7 +305,7 @@ def main():
     parser.add_option("--list-files", dest="list_files", action="store_true", default=False,
                       help="Print the name of each file as it is scanned [default: %default]")
     parser.add_option(
-        "--ticket", dest="ticket", type="str", action="store", default=0,
+        "--ticket", dest="ticket", type="str", action="store", default=None,
         help="Generate error codes for a given SERVER ticket number. Inputs can be of"
         " the form: `--ticket=12345` or `--ticket=SERVER-12345`.")
     options, extra = parser.parse_args()
@@ -322,11 +321,13 @@ def main():
     if ok and options.quiet:
         return
 
-    next_code_gen = get_next_code(seen, coerce_to_number(options.ticket))
-
     print("ok: %s" % ok)
-    if not options.replace:
+
+    if options.ticket:
+        next_code_gen = get_next_code(seen, coerce_to_number(options.ticket))
         print("next: %s" % next(next_code_gen))
+    else:
+        next_code_gen = get_next_code(seen, 0)
 
     if ok:
         sys.exit(0)
diff --git a/buildscripts/evergreen_burn_in_tests.py b/buildscripts/evergreen_burn_in_tests.py
index 37a0d2874ff..a628a9a5d20 100644
--- a/buildscripts/evergreen_burn_in_tests.py
+++ b/buildscripts/evergreen_burn_in_tests.py
@@ -436,8 +436,8 @@ def burn_in(task_id: str, build_variant: str, generate_config: GenerateConfig,
     change_detector = EvergreenFileChangeDetector(task_id, evg_api, os.environ)
     executor = GenerateBurnInExecutor(generate_config, repeat_config, evg_api, generate_tasks_file)
 
-    burn_in_orchestrator = BurnInOrchestrator(change_detector, executor, evg_conf)
-    burn_in_orchestrator.burn_in(repos, build_variant, install_dir)
+    burn_in_orchestrator = BurnInOrchestrator(change_detector, executor, evg_conf, install_dir)
+    burn_in_orchestrator.burn_in(repos, build_variant)
 
 
 @click.command()
diff --git a/buildscripts/generate_compile_expansions.py b/buildscripts/generate_compile_expansions.py
index d29777f9d35..c4ca320641f 100755
--- a/buildscripts/generate_compile_expansions.py
+++ b/buildscripts/generate_compile_expansions.py
@@ -7,9 +7,7 @@ $ python generate_compile_expansions.py --out compile_expansions.yml
 """
 
 import argparse
-import json
 import os
-import re
 import sys
 import shlex
 import yaml
@@ -25,7 +23,6 @@ def generate_expansions():
     """
     args = parse_args()
     expansions = {}
-    expansions.update(generate_version_expansions())
     expansions.update(generate_scons_cache_expansions())
 
     with open(args.out, "w") as out:
@@ -40,39 +37,6 @@ def parse_args():
     return parser.parse_args()
 
 
-def generate_version_expansions():
-    """Generate expansions from a version.json file if given, or $MONGO_VERSION."""
-    expansions = {}
-
-    if os.path.exists(VERSION_JSON):
-        with open(VERSION_JSON, "r") as fh:
-            data = fh.read()
-            version_data = json.loads(data)
-        version_line = version_data['version']
-        version_parts = match_verstr(version_line)
-        if not version_parts:
-            raise ValueError("Unable to parse version.json")
-    else:
-        if not os.getenv("MONGO_VERSION"):
-            raise Exception("$MONGO_VERSION not set and no version.json provided")
-        version_line = os.getenv("MONGO_VERSION").lstrip("r")
-        version_parts = match_verstr(version_line)
-        if not version_parts:
-            raise ValueError("Unable to parse version from stdin and no version.json provided")
-
-    if version_parts[0]:
-        expansions["suffix"] = "latest"
-        expansions["src_suffix"] = "latest"
-        expansions["is_release"] = "false"
-    else:
-        expansions["suffix"] = version_line
-        expansions["src_suffix"] = "r{0}".format(version_line)
-        expansions["is_release"] = "true"
-    expansions["version"] = version_line
-
-    return expansions
-
-
 def generate_scons_cache_expansions():
     """Generate scons cache expansions from some files and environment variables."""
     expansions = {}
@@ -101,21 +65,5 @@ def generate_scons_cache_expansions():
     return expansions
 
 
-def match_verstr(verstr):
-    """Match a version string and capture the "extra" part.
-
-    If the version is a release like "2.3.4" or "2.3.4-rc0", this will return
-    None. If the version is a pre-release like "2.3.4-325-githash" or
-    "2.3.4-pre-", this will return "-pre-" or "-325-githash" If the version
-    begins with the letter 'r', it will also match, e.g. r2.3.4, r2.3.4-rc0,
-    r2.3.4-git234, r2.3.4-rc0-234-githash If the version is invalid (i.e.
-    doesn't start with "2.3.4" or "2.3.4-rc0", this will return False.
-    """
-    res = re.match(r'^r?(?:\d+\.\d+\.\d+(?:-rc\d+|-alpha\d+)?)(-.*)?', verstr)
-    if not res:
-        return False
-    return res.groups()
-
-
 if __name__ == "__main__":
     generate_expansions()
diff --git a/buildscripts/generate_compile_expansions_shared_cache.py b/buildscripts/generate_compile_expansions_shared_cache.py
index 337e9669c0f..633d8a56069 100755
--- a/buildscripts/generate_compile_expansions_shared_cache.py
+++ b/buildscripts/generate_compile_expansions_shared_cache.py
@@ -7,9 +7,7 @@ $ python generate_compile_expansions.py --out compile_expansions.yml
 """
 
 import argparse
-import json
 import os
-import re
 import sys
 import shlex
 import yaml
@@ -25,7 +23,6 @@ def generate_expansions():
     """
     args = parse_args()
     expansions = {}
-    expansions.update(generate_version_expansions())
     expansions.update(generate_scons_cache_expansions())
 
     with open(args.out, "w") as out:
@@ -40,39 +37,6 @@ def parse_args():
     return parser.parse_args()
 
 
-def generate_version_expansions():
-    """Generate expansions from a version.json file if given, or $MONGO_VERSION."""
-    expansions = {}
-
-    if os.path.exists(VERSION_JSON):
-        with open(VERSION_JSON, "r") as fh:
-            data = fh.read()
-            version_data = json.loads(data)
-        version_line = version_data['version']
-        version_parts = match_verstr(version_line)
-        if not version_parts:
-            raise ValueError("Unable to parse version.json")
-    else:
-        if not os.getenv("MONGO_VERSION"):
-            raise Exception("$MONGO_VERSION not set and no version.json provided")
-        version_line = os.getenv("MONGO_VERSION").lstrip("r")
-        version_parts = match_verstr(version_line)
-        if not version_parts:
-            raise ValueError("Unable to parse version from stdin and no version.json provided")
-
-    if version_parts[0]:
-        expansions["suffix"] = "latest"
-        expansions["src_suffix"] = "latest"
-        expansions["is_release"] = "false"
-    else:
-        expansions["suffix"] = version_line
-        expansions["src_suffix"] = "r{0}".format(version_line)
-        expansions["is_release"] = "true"
-    expansions["version"] = version_line
-
-    return expansions
-
-
 def generate_scons_cache_expansions():
     """Generate scons cache expansions from some files and environment variables."""
     expansions = {}
@@ -125,21 +89,5 @@ def generate_scons_cache_expansions():
     return expansions
 
 
-def match_verstr(verstr):
-    """Match a version string and capture the "extra" part.
-
-    If the version is a release like "2.3.4" or "2.3.4-rc0", this will return
-    None. If the version is a pre-release like "2.3.4-325-githash" or
-    "2.3.4-pre-", this will return "-pre-" or "-325-githash" If the version
-    begins with the letter 'r', it will also match, e.g. r2.3.4, r2.3.4-rc0,
-    r2.3.4-git234, r2.3.4-rc0-234-githash If the version is invalid (i.e.
-    doesn't start with "2.3.4" or "2.3.4-rc0", this will return False.
-    """
-    res = re.match(r'^r?(?:\d+\.\d+\.\d+(?:-rc\d+|-alpha\d+)?)(-.*)?', verstr)
-    if not res:
-        return False
-    return res.groups()
-
-
 if __name__ == "__main__":
     generate_expansions()
diff --git a/buildscripts/generate_version_expansions.py b/buildscripts/generate_version_expansions.py
new file mode 100755
index 00000000000..f9439d90bc9
--- /dev/null
+++ b/buildscripts/generate_version_expansions.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+"""
+Generate the version expansions file used by Evergreen as part of the push/release process.
+
+Invoke by specifying an output file.
+$ python generate_build_expansions.py --out version_expansions.yml
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+import yaml
+
+VERSION_JSON = "version.json"
+
+
+def generate_expansions():
+    """Entry point for the script.
+
+    This calls functions to generate version and scons cache expansions and
+    writes them to a file.
+    """
+    args = parse_args()
+    expansions = {}
+    expansions.update(generate_version_expansions())
+
+    with open(args.out, "w") as out:
+        print("saving compile expansions to {0}: ({1})".format(args.out, expansions))
+        yaml.safe_dump(expansions, out, default_flow_style=False)
+
+
+def parse_args():
+    """Parse program arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--out", required=True)
+    return parser.parse_args()
+
+
+def generate_version_expansions():
+    """Generate expansions from a version.json file if given, or $MONGO_VERSION."""
+    expansions = {}
+
+    if os.path.exists(VERSION_JSON):
+        with open(VERSION_JSON, "r") as fh:
+            data = fh.read()
+            version_data = json.loads(data)
+        version_line = version_data['version']
+        version_parts = match_verstr(version_line)
+        if not version_parts:
+            raise ValueError("Unable to parse version.json")
+    else:
+        version_line = os.getenv("MONGO_VERSION")
+        if not version_line:
+            raise Exception("$MONGO_VERSION not set and no version.json provided")
+
+        version_line = version_line.lstrip("r")
+        version_parts = match_verstr(version_line)
+        if not version_parts:
+            raise ValueError("Unable to parse version from stdin and no version.json provided")
+
+    if version_parts[0]:
+        expansions["suffix"] = "latest"
+        expansions["src_suffix"] = "latest"
+        expansions["is_release"] = "false"
+    else:
+        expansions["suffix"] = version_line
+        expansions["src_suffix"] = "r{0}".format(version_line)
+        expansions["is_release"] = "true"
+    expansions["version"] = version_line
+
+    return expansions
+
+
+def match_verstr(verstr):
+    """Match a version string and capture the "extra" part.
+
+    If the version is a release like "2.3.4" or "2.3.4-rc0", this will return
+    None. If the version is a pre-release like "2.3.4-325-githash" or
+    "2.3.4-pre-", this will return "-pre-" or "-325-githash" If the version
+    begins with the letter 'r', it will also match, e.g. r2.3.4, r2.3.4-rc0,
+    r2.3.4-git234, r2.3.4-rc0-234-githash If the version is invalid (i.e.
+    doesn't start with "2.3.4" or "2.3.4-rc0", this will return False.
+    """
+    res = re.match(r'^r?(?:\d+\.\d+\.\d+(?:-rc\d+|-alpha\d+)?)(-.*)?', verstr)
+    if not res:
+        return False
+    return res.groups()
+
+
+if __name__ == "__main__":
+    generate_expansions()
diff --git a/buildscripts/idl/idl/bson.py b/buildscripts/idl/idl/bson.py
index 3f8e5190f9d..8216b5d743d 100644
--- a/buildscripts/idl/idl/bson.py
+++ b/buildscripts/idl/idl/bson.py
@@ -72,6 +72,7 @@ _BINDATA_SUBTYPE = {
     # },
     "uuid": {'scalar': True, 'bindata_enum': 'newUUID'},
     "md5": {'scalar': True, 'bindata_enum': 'MD5Type'},
+    "encrypt": {'scalar': True, 'bindata_enum': 'Encrypt'},
 }
 
 
diff --git a/buildscripts/idl/idl_check_compatibility.py b/buildscripts/idl/idl_check_compatibility.py
index 82428ef7e3b..9b6d0f2aaff 100644
--- a/buildscripts/idl/idl_check_compatibility.py
+++ b/buildscripts/idl/idl_check_compatibility.py
@@ -164,7 +164,29 @@ ALLOW_ANY_TYPE_LIST: List[str] = [
 ]
 
 # Do not add user visible fields already released in earlier versions.
-IGNORE_UNSTABLE_LIST: List[str] = [
+# We generally don't allow changing a field from stable to unstable, but we permit it in special cases,
+# such as when we want to avoid making internal fields part of the stable API.
+IGNORE_STABLE_TO_UNSTABLE_LIST: List[str] = [
+    # This list is only used in unit-tests.
+    'newReplyFieldUnstableIgnoreList-reply-unstableNewFieldIgnoreList',
+    'newTypeFieldUnstableIgnoreList-param-unstableNewFieldIgnoreList',
+    'newTypeEnumOrStructIgnoreList-reply-unstableNewFieldIgnoreList',
+    'commandParameterUnstableIgnoreList-param-newUnstableParameterIgnoreList',
+    'newReplyFieldUnstableOptionalIgnoreList-reply-unstableOptionalNewFieldIgnoreList',
+    'newReplyTypeEnumOrStructIgnoreList-reply-newReplyTypeEnumOrStructIgnoreList',
+    'newReplyFieldVariantNotSubsetIgnoreList-reply-variantNotSubsetReplyFieldIgnoreList',
+    'replyFieldVariantDifferentStructIgnoreList-reply-variantStructRecursiveReplyFieldIgnoreList',
+    'replyFieldNonVariantToVariantIgnoreList-reply-nonVariantToVariantReplyFieldIgnoreList',
+    'replyFieldNonEnumToEnumIgnoreList-reply-nonEnumToEnumReplyIgnoreList',
+    'newUnstableParamTypeChangesIgnoreList-param-newUnstableTypeChangesParamIgnoreList',
+    'newUnstableTypeChangesIgnoreList',
+    'newUnstableTypeChangesIgnoreList-param-newUnstableTypeChangesFieldIgnoreList',
+    'newUnstableReplyFieldTypeChangesIgnoreList-reply-newUnstableTypeChangesReplyFieldIgnoreList',
+    'newReplyFieldTypeStructIgnoreList-reply-structReplyField',
+    'newReplyFieldTypeStructIgnoreList-reply-unstableNewFieldIgnoreList',
+
+    # Real use cases for changing a field from 'stable' to 'unstable'.
+
     # The 'originalSpec' field was introduced in v5.1 behind a disabled feature flag and is not user
     # visible. This is part of the listIndexes output when executed against system.bucket.*
     # collections, which users should avoid doing.
@@ -350,11 +372,13 @@ def check_reply_field_type_recursive(ctxt: IDLCompatibilityContext,
     cmd_name = field_pair.cmd_name
     field_name = field_pair.field_name
 
+    ignore_list_name: str = cmd_name + "-reply-" + field_name
+
     # If the old field is unstable, we only add errors related to the use of 'any' as the
     # bson_serialization_type. For all other errors, we check that the old field is stable
     # before adding an error.
     if not isinstance(new_field_type, syntax.Type):
-        if not old_field.unstable:
+        if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
             ctxt.add_new_reply_field_type_enum_or_struct_error(
                 cmd_name, field_name, new_field_type.name, old_field_type.name,
                 new_field.idl_file_path)
@@ -372,11 +396,9 @@ def check_reply_field_type_recursive(ctxt: IDLCompatibilityContext,
                                                 new_field_type.name, new_field.idl_file_path)
         return
 
-    allow_name: str = cmd_name + "-reply-" + field_name
-
     if "any" in old_field_type.bson_serialization_type:
         # If 'any' is not explicitly allowed as the bson_serialization_type.
-        if allow_name not in ALLOW_ANY_TYPE_LIST:
+        if ignore_list_name not in ALLOW_ANY_TYPE_LIST:
             ctxt.add_old_reply_field_bson_any_not_allowed_error(
                 cmd_name, field_name, old_field_type.name, old_field.idl_file_path)
             return
@@ -387,12 +409,12 @@ def check_reply_field_type_recursive(ctxt: IDLCompatibilityContext,
                                                           new_field.idl_file_path)
 
         # If serializer is changed, it's a potential breaking change.
-        if (not old_field.unstable) and old_field_type.serializer != new_field_type.serializer:
+        if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST and old_field_type.serializer != new_field_type.serializer:
             ctxt.add_reply_field_serializer_not_equal_error(
                 cmd_name, field_name, new_field_type.name, new_field.idl_file_path)
 
         # If deserializer is changed, it's a potential breaking change.
-        if (not old_field.unstable) and old_field_type.deserializer != new_field_type.deserializer:
+        if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST and old_field_type.deserializer != new_field_type.deserializer:
             ctxt.add_reply_field_deserializer_not_equal_error(
                 cmd_name, field_name, new_field_type.name, new_field.idl_file_path)
 
@@ -419,7 +441,7 @@ def check_reply_field_type_recursive(ctxt: IDLCompatibilityContext,
 
             else:
                 # new_variant_type was not found in old_variant_types.
-                if not old_field.unstable:
+                if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
                     ctxt.add_new_reply_field_variant_type_not_subset_error(
                         cmd_name, field_name, new_variant_type.name, new_field.idl_file_path)
 
@@ -427,7 +449,7 @@ def check_reply_field_type_recursive(ctxt: IDLCompatibilityContext,
         # Since enums can't be part of variant types, we don't explicitly check for enums.
         if isinstance(new_field_type,
                       syntax.VariantType) and new_field_type.variant_struct_type is not None:
-            if old_field_type.variant_struct_type is None and not old_field.unstable:
+            if old_field_type.variant_struct_type is None and not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
                 ctxt.add_new_reply_field_variant_type_not_subset_error(
                     cmd_name, field_name, new_field_type.variant_struct_type.name,
                     new_field.idl_file_path)
@@ -437,7 +459,7 @@ def check_reply_field_type_recursive(ctxt: IDLCompatibilityContext,
                                    new_field.idl_file, old_field.idl_file_path,
                                    new_field.idl_file_path)
 
-    elif not old_field.unstable:
+    elif not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
         if isinstance(new_field_type, syntax.VariantType):
             ctxt.add_new_reply_field_variant_type_error(cmd_name, field_name, old_field_type.name,
                                                         new_field.idl_file_path)
@@ -452,6 +474,8 @@ def check_reply_field_type(ctxt: IDLCompatibilityContext, field_pair: FieldCompa
     # pylint: disable=too-many-branches
     old_field = field_pair.old
     new_field = field_pair.new
+    cmd_name = field_pair.cmd_name
+    field_name = field_pair.field_name
     array_check = check_array_type(ctxt, "reply_field", old_field.field_type, new_field.field_type,
                                    field_pair.cmd_name, 'type', old_field.idl_file_path,
                                    new_field.idl_file_path, old_field.unstable)
@@ -475,10 +499,14 @@ def check_reply_field_type(ctxt: IDLCompatibilityContext, field_pair: FieldCompa
         ctxt.errors.dump_errors()
         sys.exit(1)
 
+    ignore_list_name: str = cmd_name + "-reply-" + field_name
+
     if isinstance(old_field_type, syntax.Type):
         check_reply_field_type_recursive(ctxt, field_pair)
 
-    elif isinstance(old_field_type, syntax.Enum) and not old_field.unstable:
+    elif isinstance(
+            old_field_type, syntax.Enum
+    ) and not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
         if isinstance(new_field_type, syntax.Enum):
             check_subset(ctxt, cmd_name, field_name, new_field_type.name, new_field_type.values,
                          old_field_type.values, new_field.idl_file_path)
@@ -491,7 +519,7 @@ def check_reply_field_type(ctxt: IDLCompatibilityContext, field_pair: FieldCompa
             check_reply_fields(ctxt, old_field_type, new_field_type, cmd_name, old_field.idl_file,
                                new_field.idl_file, old_field.idl_file_path, new_field.idl_file_path)
         else:
-            if not old_field.unstable:
+            if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
                 ctxt.add_new_reply_field_type_not_struct_error(
                     cmd_name, field_name, new_field_type.name, old_field_type.name,
                     new_field.idl_file_path)
@@ -536,9 +564,9 @@ def check_reply_field(ctxt: IDLCompatibilityContext, old_field: syntax.Field,
                                                 and old_field_type.name == "optionalBool")
     new_field_optional = new_field.optional or (new_field_type
                                                 and new_field_type.name == "optionalBool")
-    field_name: str = cmd_name + "-reply-" + new_field.name
-    if not old_field.unstable and field_name not in IGNORE_UNSTABLE_LIST:
-        if new_field.unstable and field_name not in IGNORE_UNSTABLE_LIST:
+    ignore_list_name: str = cmd_name + "-reply-" + new_field.name
+    if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
+        if new_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
             ctxt.add_new_reply_field_unstable_error(cmd_name, new_field.name, new_idl_file_path)
         if new_field_optional and not old_field_optional:
             ctxt.add_new_reply_field_optional_error(cmd_name, new_field.name, new_idl_file_path)
@@ -652,19 +680,19 @@ def check_param_or_command_type_recursive(ctxt: IDLCompatibilityContext,
     cmd_name = field_pair.cmd_name
     param_name = field_pair.field_name
 
+    ignore_list_name: str = cmd_name + "-param-" + param_name if is_command_parameter else cmd_name
+
     # If the old field is unstable, we only add errors related to the use of 'any' as the
     # bson_serialization_type. For all other errors, we check that the old field is stable
     # before adding an error.
 
     if not isinstance(new_type, syntax.Type):
-        if not old_field.unstable:
+        if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
             ctxt.add_new_command_or_param_type_enum_or_struct_error(
                 cmd_name, new_type.name, old_type.name, new_field.idl_file_path, param_name,
                 is_command_parameter)
         return
 
-    allow_name: str = cmd_name + "-param-" + param_name if is_command_parameter else cmd_name
-
     # If bson_serialization_type switches from 'any' to non-any type.
     if "any" in old_type.bson_serialization_type and "any" not in new_type.bson_serialization_type:
         ctxt.add_old_command_or_param_type_bson_any_error(cmd_name, old_type.name, new_type.name,
@@ -681,7 +709,7 @@ def check_param_or_command_type_recursive(ctxt: IDLCompatibilityContext,
 
     if "any" in old_type.bson_serialization_type:
         # If 'any' is not explicitly allowed as the bson_serialization_type.
-        if allow_name not in ALLOW_ANY_TYPE_LIST:
+        if ignore_list_name not in ALLOW_ANY_TYPE_LIST:
             ctxt.add_old_command_or_param_type_bson_any_not_allowed_error(
                 cmd_name, old_type.name, old_field.idl_file_path, param_name, is_command_parameter)
             return
@@ -692,18 +720,20 @@ def check_param_or_command_type_recursive(ctxt: IDLCompatibilityContext,
                 cmd_name, new_type.name, new_field.idl_file_path, param_name, is_command_parameter)
 
         # If serializer is changed, it's a potential breaking change.
-        if (not old_field.unstable) and old_type.serializer != new_type.serializer:
+        if (not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST
+            ) and old_type.serializer != new_type.serializer:
             ctxt.add_command_or_param_serializer_not_equal_error(
                 cmd_name, new_type.name, new_field.idl_file_path, param_name, is_command_parameter)
 
         # If deserializer is changed, it's a potential breaking change.
-        if (not old_field.unstable) and old_type.deserializer != new_type.deserializer:
+        if (not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST
+            ) and old_type.deserializer != new_type.deserializer:
             ctxt.add_command_or_param_deserializer_not_equal_error(
                 cmd_name, new_type.name, new_field.idl_file_path, param_name, is_command_parameter)
 
     if isinstance(old_type, syntax.VariantType):
         if not isinstance(new_type, syntax.VariantType):
-            if not old_field.unstable:
+            if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
                 ctxt.add_new_command_or_param_type_not_variant_type_error(
                     cmd_name, new_type.name, new_field.idl_file_path, param_name,
                     is_command_parameter)
@@ -730,7 +760,7 @@ def check_param_or_command_type_recursive(ctxt: IDLCompatibilityContext,
                             is_command_parameter)
                         break
                 else:
-                    if not old_field.unstable:
+                    if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
                         # old_variant_type was not found in new_variant_types.
                         ctxt.add_new_command_or_param_variant_type_not_superset_error(
                             cmd_name, old_variant_type.name, new_field.idl_file_path, param_name,
@@ -746,12 +776,12 @@ def check_param_or_command_type_recursive(ctxt: IDLCompatibilityContext,
                         new_field.idl_file_path, is_command_parameter)
 
                 # If old type has a variant struct type and new type does not have a variant struct type.
-                elif not old_field.unstable:
+                elif not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
                     ctxt.add_new_command_or_param_variant_type_not_superset_error(
                         cmd_name, old_type.variant_struct_type.name, new_field.idl_file_path,
                         param_name, is_command_parameter)
 
-    elif not old_field.unstable:
+    elif not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
         check_superset(ctxt, cmd_name, new_type.name, new_type.bson_serialization_type,
                        old_type.bson_serialization_type, new_field.idl_file_path, param_name,
                        is_command_parameter)
@@ -763,10 +793,12 @@ def check_param_or_command_type(ctxt: IDLCompatibilityContext, field_pair: Field
     # pylint: disable=too-many-branches
     old_field = field_pair.old
     new_field = field_pair.new
+    field_name = field_pair.field_name
+    cmd_name = field_pair.cmd_name
     array_check = check_array_type(
         ctxt, "command_parameter" if is_command_parameter else "command_namespace",
         old_field.field_type, new_field.field_type, field_pair.cmd_name,
-        field_pair.field_name if is_command_parameter else "type", old_field.idl_file_path,
+        field_name if is_command_parameter else "type", old_field.idl_file_path,
         new_field.idl_file_path, old_field.unstable)
     if array_check == ArrayTypeCheckResult.INVALID:
         return
@@ -778,40 +810,42 @@ def check_param_or_command_type(ctxt: IDLCompatibilityContext, field_pair: Field
     old_type = old_field.field_type
     new_type = new_field.field_type
     if old_type is None:
-        ctxt.add_command_or_param_type_invalid_error(field_pair.cmd_name, old_field.idl_file_path,
+        ctxt.add_command_or_param_type_invalid_error(cmd_name, old_field.idl_file_path,
                                                      field_pair.field_name, is_command_parameter)
         ctxt.errors.dump_errors()
         sys.exit(1)
     if new_type is None:
-        ctxt.add_command_or_param_type_invalid_error(field_pair.cmd_name, new_field.idl_file_path,
+        ctxt.add_command_or_param_type_invalid_error(cmd_name, new_field.idl_file_path,
                                                      field_pair.field_name, is_command_parameter)
         ctxt.errors.dump_errors()
         sys.exit(1)
 
+    ignore_list_name: str = cmd_name + "-param-" + field_name
+
     if isinstance(old_type, syntax.Type):
         check_param_or_command_type_recursive(ctxt, field_pair, is_command_parameter)
 
     # Only add type errors if the old field is stable.
-    elif isinstance(old_type, syntax.Enum) and not old_field.unstable:
+    elif isinstance(
+            old_type, syntax.Enum
+    ) and not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
         if isinstance(new_type, syntax.Enum):
-            check_superset(ctxt, field_pair.cmd_name, new_type.name, new_type.values,
-                           old_type.values, new_field.idl_file_path, field_pair.field_name,
-                           is_command_parameter)
+            check_superset(ctxt, cmd_name, new_type.name, new_type.values, old_type.values,
+                           new_field.idl_file_path, field_pair.field_name, is_command_parameter)
         else:
             ctxt.add_new_command_or_param_type_not_enum_error(
-                field_pair.cmd_name, new_type.name, old_type.name, new_field.idl_file_path,
+                cmd_name, new_type.name, old_type.name, new_field.idl_file_path,
                 field_pair.field_name, is_command_parameter)
 
     elif isinstance(old_type, syntax.Struct):
         if isinstance(new_type, syntax.Struct):
             check_command_params_or_type_struct_fields(
-                ctxt, old_type, new_type, field_pair.cmd_name, old_field.idl_file,
-                new_field.idl_file, old_field.idl_file_path, new_field.idl_file_path,
-                is_command_parameter)
+                ctxt, old_type, new_type, cmd_name, old_field.idl_file, new_field.idl_file,
+                old_field.idl_file_path, new_field.idl_file_path, is_command_parameter)
         else:
-            if not old_field.unstable:
+            if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
                 ctxt.add_new_command_or_param_type_not_struct_error(
-                    field_pair.cmd_name, new_type.name, old_type.name, new_field.idl_file_path,
+                    cmd_name, new_type.name, old_type.name, new_field.idl_file_path,
                     field_pair.field_name, is_command_parameter)
 
 
@@ -948,8 +982,8 @@ def check_command_param_or_type_struct_field(
         is_command_parameter: bool):
     """Check compatibility between the old and new command parameter or command type struct field."""
     # pylint: disable=too-many-arguments
-    field_name: str = cmd_name + "-param-" + new_field.name
-    if not old_field.unstable and new_field.unstable and field_name not in IGNORE_UNSTABLE_LIST:
+    ignore_list_name: str = cmd_name + "-param-" + new_field.name
+    if not old_field.unstable and new_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
         ctxt.add_new_param_or_command_type_field_unstable_error(
             cmd_name, old_field.name, old_idl_file_path, type_name, is_command_parameter)
     # If old field is unstable and new field is stable, the new field should either be optional or
@@ -964,11 +998,11 @@ def check_command_param_or_type_struct_field(
         ctxt.add_new_param_or_command_type_field_stable_required_no_default_error(
             cmd_name, old_field.name, old_idl_file_path, type_name, is_command_parameter)
 
-    if old_field_optional and not new_field_optional:
+    if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST and old_field_optional and not new_field_optional:
         ctxt.add_new_param_or_command_type_field_required_error(
             cmd_name, old_field.name, old_idl_file_path, type_name, is_command_parameter)
 
-    if not old_field.unstable:
+    if not old_field.unstable and ignore_list_name not in IGNORE_STABLE_TO_UNSTABLE_LIST:
         check_param_or_type_validator(ctxt, old_field, new_field, cmd_name, new_idl_file_path,
                                       type_name, is_command_parameter)
 
diff --git a/buildscripts/idl/tests/compatibility_test_pass/new/compatibility_test_pass_new.idl b/buildscripts/idl/tests/compatibility_test_pass/new/compatibility_test_pass_new.idl
index da2011a57b9..5302dd01ed0 100644
--- a/buildscripts/idl/tests/compatibility_test_pass/new/compatibility_test_pass_new.idl
+++ b/buildscripts/idl/tests/compatibility_test_pass/new/compatibility_test_pass_new.idl
@@ -100,6 +100,22 @@ structs:
                 type: string
                 unstable: false
 
+    UnstableNewFieldReplyIgnoreList:
+        description: "This reply contains a field that is stable in the old command and is
+                      unstable in the new command, but the change is explicitly allowed."
+        fields:
+            unstableNewFieldIgnoreList:
+                type: string
+                unstable: true
+
+    NewReplyTypeEnumOrStructIgnoreList:
+        description: "the type is a non-enum or struct type in the old command, and an enum or struct
+                      in the new command, but the change is explicitly allowed"
+        fields:
+            newReplyTypeEnumOrStructIgnoreList:
+                type: StructType
+                unstable: true
+
     RequiredNewFieldReply:
         description: "This reply contains a field that is optional in the old command but is
                       required in the new command."
@@ -133,6 +149,15 @@ structs:
                 unstable: true
                 optional: true
 
+    UnstableOptionalNewFieldReplyIgnoreList:
+        description: "This reply contains a field that is stable in the old command and is
+                      unstable and optional in the new command, but the change is allowed"
+        fields:
+            unstableOptionalNewFieldIgnoreList:
+                type: string
+                unstable: true
+                optional: true
+
     EnumSubsetReply:
         description: "This reply contains an enum field where the new enum values is a subset
                       of the old reply type's enum values"
@@ -157,6 +182,14 @@ structs:
                 type: intStringBoolToIntString
                 unstable: false
 
+    StructFieldTypeRecursiveReplyIgnoreList:
+        description: "This reply contains a field whose new type is a struct that is not
+                      compatible with the old field type, but this is explicitly allowed"
+        fields:
+            structReplyField:
+                type: UnstableNewFieldReplyIgnoreList
+                unstable: true
+
     StructFieldTypeRecursiveReplyOne:
         description: "This reply contains a field whose new type is a struct that is
                       compatible with the old field type"
@@ -189,6 +222,15 @@ structs:
                 type: int
                 unstable: false
 
+    NewVariantNotSubsetReplyIgnoreList:
+        description: "This reply contains a field whose new variant types are not a subset
+                      of the old variant types"
+        fields:
+            variantNotSubsetReplyFieldIgnoreList:
+                type:
+                    variant: [int, string]
+                unstable: true
+
     NewVariantSubsetReply:
         description: "This reply contains a field whose new variant types are a subset
                       of the old variant types"
@@ -236,6 +278,32 @@ structs:
                     variant: [int, StructFieldTypeRecursiveReplyTwo,
                               array<StructFieldTypeRecursiveReplyTwo>, array<string>]
 
+    VariantDifferentStructReplyIgnoreList:
+        description: "This reply contains a field that has a new variant struct type that is
+                      different from the old variant struct type"
+        fields:
+            variantStructRecursiveReplyFieldIgnoreList:
+                unstable: true
+                type:
+                    variant: [int, StructFieldTypeRecursiveReplyTwo]
+
+    NonVariantToVariantReplyIgnoreList:
+        description: "This reply contains a field that changes from a non-variant type to a variant
+                      type, but the field is in the ignore list"
+        fields:
+            nonVariantToVariantReplyFieldIgnoreList:
+                unstable: true
+                type:
+                    variant: [int, StructFieldTypeRecursiveReplyOne]
+
+    NonEnumToEnumReplyIgnoreList:
+        description: "This reply contains a field that changes from a non-enum type to an enum
+                      type, but the field is in the ignore list"
+        fields:
+            nonEnumToEnumReplyIgnoreList:
+                type: EnumSubsetReply
+                unstable: true
+
     CommandParamStructRecursiveOne:
         description: "This command parameter struct type contains a stable and optional
                       field while the old struct field is unstable"
@@ -412,6 +480,16 @@ structs:
                 validator:
                     lt: 0
 
+    NewUnstableTypeChangesReplyIgnoreList:
+        description: "This reply contains a field that is stable in the old version and has type changes,
+                      but is also in the ignore list"
+        fields:
+            newUnstableTypeChangesReplyFieldIgnoreList:
+                unstable: true
+                type: intStringToIntStringBool
+                validator:
+                    lt: 0
+
     NewlyAddedBsonSerializationTypeAnyStruct:
         description: "This struct contains a newly added field whose type has a bson_serialization_type
                         that contains 'any' that is explicitly allowed"
@@ -431,6 +509,16 @@ structs:
                 validator:
                     lt: 0
 
+    NewUnstableTypeChangesStructIgnoreList:
+        description: "This struct contains a field that is stable in the old version and has type,
+                      changes but is also in the ignore list"
+        fields:
+            newUnstableTypeChangesFieldIgnoreList:
+                type: intStringBoolToIntString
+                unstable: true
+                validator:
+                    lt: 0
+
     BsonSerializationTypeAnyWithVariantReply:
         description: "This reply contains a new reply field with variant types where one of the
                       bson serialization types is 'any' and is explicitly allowed"
@@ -616,6 +704,20 @@ commands:
                 default: ""
                 unstable: false
 
+    commandParameterUnstableIgnoreList:
+        description: "new unstable command parameter is stable in the corresponding old
+                      command, but the change is explcitly allowed"
+        command_name: commandParameterUnstableIgnoreList
+        namespace: ignored
+        cpp_name: commandParameterUnstableIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: OkReply
+        fields:
+            newUnstableParameterIgnoreList:
+                type: string
+                unstable: true
+
     removeCommandParameterUnstable:
         description: "new command removes parameter that is unstable
                       in the corresponding old command and still passes"
@@ -785,6 +887,22 @@ commands:
                 validator:
                     lt: 0
 
+    newUnstableParamTypeChangesIgnoreList:
+        description: "command has param with incompatible type changes, but is in the stable-to-unstable
+                      ignore list"
+        command_name: newUnstableParamTypeChangesIgnoreList
+        namespace: ignored
+        cpp_name: newUnstableParamTypeChangesIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: OkReply
+        fields:
+            newUnstableTypeChangesParamIgnoreList:
+                type: intStringBoolToIntString
+                unstable: true
+                validator:
+                    lt: 0
+
     newlyAddedTypeFieldBsonAnyAllowList:
         description: "command passes when its type field is newly added and has bson type 'any'
                       that is explicitly allowed"
@@ -806,6 +924,17 @@ commands:
         api_version: "1"
         reply_type: OkReply
 
+    newUnstableTypeChangesIgnoreList:
+        description: "command has type with incompatible changes, but is in the stable-to-unstable
+                      ignore list"
+        command_name: newUnstableTypeChangesIgnoreList
+        namespace: type
+        type: NewUnstableTypeChangesStructIgnoreList
+        cpp_name: newUnstableTypeChangesIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: OkReply
+
     oldCommandParameterValidator:
         description: "new command passes when it contains a parameter that does not contain a validator
                       that is present in the old parameter"
@@ -872,6 +1001,16 @@ commands:
         api_version: "1"
         reply_type: StableNewFieldReply
 
+    newReplyFieldUnstableIgnoreList:
+        description: "new command contains an unstable reply field that is stable
+                      in the corresponding old command but is explicitly allowed"
+        command_name: newReplyFieldUnstableIgnoreList
+        namespace: ignored
+        cpp_name: newReplyFieldUnstableIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: UnstableNewFieldReplyIgnoreList
+
     newReplyFieldRequired:
         description: "new command contains a required reply field that is optional
                       in the corresponding old command and still passes"
@@ -901,6 +1040,16 @@ commands:
         api_version: "1"
         reply_type: UnstableOldFieldReply
 
+    newReplyFieldUnstableOptionalIgnoreList:
+        description: "old reply field is stable but is included in the ignore list so new commmand 
+                      passes even if its new reply field is unstable and optional"
+        command_name: newReplyFieldUnstableOptionalIgnoreList
+        namespace: ignored
+        cpp_name: newReplyFieldUnstableOptionalIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: UnstableOptionalNewFieldReplyIgnoreList
+
     importedReplyCommand:
         description: "reply is imported and should pass"
         command_name: importedReplyCommand
@@ -962,6 +1111,16 @@ commands:
         api_version: "1"
         reply_type: StructFieldTypeRecursiveReplyTwo
 
+    newReplyFieldTypeStructIgnoreList:
+        description: "command has a reply contains a field whose new type is a struct that is not
+                      compatible with the old field type, but this is explicitly allowed"
+        command_name: newReplyFieldTypeStructIgnoreList
+        namespace: ignored
+        cpp_name: newReplyFieldTypeStructIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: StructFieldTypeRecursiveReplyIgnoreList
+
     newNamespaceIgnored:
         description: "new command passes when its namespace is changed to ignored"
         command_name: newNamespaceIgnored
@@ -1032,6 +1191,17 @@ commands:
         api_version: "1"
         reply_type: OkReply
 
+    newReplyTypeEnumOrStructIgnoreList:
+        description: "the type is a non-enum or struct type in the old command, and an enum or struct
+                     in the new command, but the change is explicitly allowed"
+        command_name: newReplyTypeEnumOrStructIgnoreList
+        namespace: type
+        type: namespacestring
+        cpp_name: newReplyTypeEnumOrStructIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NewReplyTypeEnumOrStructIgnoreList
+
     newTypeFieldOptional:
         description: "new command type contains an optional field that is required
                       in the corresponding old command and still passes"
@@ -1075,6 +1245,17 @@ commands:
         api_version: "1"
         reply_type: OkReply
 
+    newTypeFieldUnstableIgnoreList:
+        description: "new command contains an unstable type field that is stable in the corresponding
+                      old command but that is explicitly allowed"
+        command_name: newTypeFieldUnstableIgnoreList
+        namespace: type
+        type: UnstableNewFieldReplyIgnoreList
+        cpp_name: newTypeFieldUnstableIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: OkReply
+
     removeTypeFieldUnstable:
         description: "new command removes type field that is unstable
                       in the corresponding old command and still passes"
@@ -1118,6 +1299,16 @@ commands:
         api_version: "1"
         reply_type: OldVariantTypeReply
 
+    newReplyFieldVariantNotSubsetIgnoreList:
+        description: "the command's reply field type is a variant type that is not a subset of the old reply
+                      field variant types, but it's also on the unstable ignore list"
+        command_name: newReplyFieldVariantNotSubsetIgnoreList
+        namespace: ignored
+        cpp_name: newReplyFieldVariantNotSubsetIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NewVariantNotSubsetReplyIgnoreList
+
     newReplyFieldVariantSubset:
         description: "new command when its reply field type is a variant type that is
                       a subset of the old reply field variant types"
@@ -1168,6 +1359,36 @@ commands:
         api_version: "1"
         reply_type: VariantStructRecursiveReply
 
+    replyFieldVariantDifferentStructIgnoreList:
+        description: "the old field has a non-variant type and the new field has a variant type but the
+                      change is explicitly allowed"
+        command_name: replyFieldVariantDifferentStructIgnoreList
+        namespace: ignored
+        cpp_name: replyFieldVariantDifferentStructIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: VariantDifferentStructReplyIgnoreList
+
+    replyFieldNonVariantToVariantIgnoreList:
+        description: "the old and new field have different variant struct types but the change is
+                      explicitly allowed"
+        command_name: replyFieldNonVariantToVariantIgnoreList
+        namespace: ignored
+        cpp_name: replyFieldNonVariantToVariantIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NonVariantToVariantReplyIgnoreList
+
+    replyFieldNonEnumToEnumIgnoreList:
+        description: "the old field has a non-enum type and the new field has an enum type but the
+                      change is explicitly allowed"
+        command_name: replyFieldNonEnumToEnumIgnoreList
+        namespace: ignored
+        cpp_name: replyFieldNonEnumToEnumIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NonEnumToEnumReplyIgnoreList
+
     newlyAddedReplyFieldTypeBsonAnyAllowed:
         description: "command passes because it has a newly added reply field type has a bson_serialization_type
                       that contains 'any' that is explicitly allowed"
@@ -1187,6 +1408,16 @@ commands:
         api_version: "1"
         reply_type: OldUnstableTypeChangesReply
 
+    newUnstableReplyFieldTypeChangesIgnoreList:
+        description: "command has an old stable reply field with incompatible type changes but it is also
+                      in the ignore list"
+        command_name: newUnstableReplyFieldTypeChangesIgnoreList
+        namespace: ignored
+        cpp_name: newUnstableReplyFieldTypeChangesIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NewUnstableTypeChangesReplyIgnoreList
+
     commandAllowedAnyTypes:
         description: "new command that has parameter and reply type with
                       explicitly allowed 'any' bson serialization type passes"
diff --git a/buildscripts/idl/tests/compatibility_test_pass/old/compatibility_test_pass_old.idl b/buildscripts/idl/tests/compatibility_test_pass/old/compatibility_test_pass_old.idl
index 704ea71b0a1..f442e45a20e 100644
--- a/buildscripts/idl/tests/compatibility_test_pass/old/compatibility_test_pass_old.idl
+++ b/buildscripts/idl/tests/compatibility_test_pass/old/compatibility_test_pass_old.idl
@@ -101,6 +101,22 @@ structs:
                 type: string
                 unstable: true
 
+    UnstableNewFieldReplyIgnoreList:
+        description: "This reply contains a field that is stable in the old command and is
+                      unstable in the new command, but the change is explicitly allowed."
+        fields:
+            unstableNewFieldIgnoreList:
+                type: string
+                unstable: false
+
+    NewReplyTypeEnumOrStructIgnoreList:
+        description: "the type is a non-enum or struct type in the old command, and an enum or struct
+                      in the new command, but the change is explicitly allowed"
+        fields:
+            newReplyTypeEnumOrStructIgnoreList:
+                type: string
+                unstable: false
+
     RequiredNewFieldReply:
         description: "This reply contains a field that is optional in the old command but is
                       required in the new command."
@@ -129,6 +145,14 @@ structs:
                 type: string
                 unstable: true
 
+    UnstableOptionalNewFieldReplyIgnoreList:
+        description: "This reply contains a field that is stable in the old command and is
+                      unstable and optional in the new command, but the change is allowed"
+        fields:
+            unstableOptionalNewFieldIgnoreList:
+                unstable: false
+                type: string
+
     EnumSubsetReply:
         description: "This reply contains an enum field where the new enum values is a subset
                       of the old reply type's enum values"
@@ -153,6 +177,14 @@ structs:
                 type: intStringBoolToIntString
                 unstable: false
 
+    StructFieldTypeRecursiveReplyIgnoreList:
+        description: "This reply contains a field whose new type is a struct that is not
+                      compatible with the old field type, but this is explicitly allowed"
+        fields:
+            structReplyField:
+                type: UnstableNewFieldReplyIgnoreList
+                unstable: false
+
     StructFieldTypeRecursiveReplyOne:
         description: "This reply contains a field whose new type is a struct that is
                       compatible with the old field type"
@@ -177,6 +209,14 @@ structs:
                 type: BsonSubsetReply
                 unstable: false
 
+    StructTypeUnstable:
+        description: "This struct contains a field whose new type is compatible with the
+                      old field type and is unstable in both the old and new versions"
+        fields:
+            fieldOne:
+                type: BsonSubsetReply
+                unstable: true
+
     OldVariantTypeReply:
         description: "This reply contains an old field that has a variant type while the new field
                       is not a variant type"
@@ -186,6 +226,15 @@ structs:
                 type:
                     variant: [int, string, array<string>]
 
+    NewVariantNotSubsetReplyIgnoreList:
+        description: "This reply contains a field whose new variant types are not a subset
+                      of the old variant types"
+        fields:
+            variantNotSubsetReplyFieldIgnoreList:
+                unstable: false
+                type:
+                    variant: [int, bool, string]
+
     NewVariantSubsetReply:
         description: "This reply contains a field whose new variant types are a subset
                       of the old variant types"
@@ -233,6 +282,31 @@ structs:
                     variant: [int, StructFieldTypeRecursiveReplyTwo,
                               array<StructFieldTypeRecursiveReplyTwo>, array<string>]
 
+    VariantDifferentStructReplyIgnoreList:
+        description: "This reply contains a field that has a new variant struct type that is
+                      different from the old variant struct type"
+        fields:
+            variantStructRecursiveReplyFieldIgnoreList:
+                unstable: false
+                type:
+                    variant: [int, StructFieldTypeRecursiveReplyOne]
+
+    NonVariantToVariantReplyIgnoreList:
+        description: "This reply contains a field that changes from a non-variant type to a variant
+                      type, but the field is in the ignore list"
+        fields:
+            nonVariantToVariantReplyFieldIgnoreList:
+                unstable: false
+                type: StructType
+
+    NonEnumToEnumReplyIgnoreList:
+        description: "This reply contains a field that changes from a non-enum type to an enum
+                      type, but the field is in the ignore list"
+        fields:
+            nonEnumToEnumReplyIgnoreList:
+                type: StructTypeUnstable
+                unstable: false
+
     NewCommandParameterStruct:
         description: "The new command parameter's type and the
                      old command parameter's type are both structs"
@@ -396,6 +470,15 @@ structs:
                 unstable: true
                 type: intStringToIntStringBool
 
+    NewUnstableTypeChangesReplyIgnoreList:
+        description: "This reply contains a field that is stable in the old version and has type changes,
+                      but is also in the ignore list"
+        fields:
+            newUnstableTypeChangesReplyFieldIgnoreList:
+                unstable: false
+                type: intStringToIntStringBool
+                optional: true
+
     NewlyAddedBsonSerializationTypeAnyStruct:
         description: "This struct contains a newly added field whose type has a bson_serialization_type
                         that contains 'any' that is explicitly allowed"
@@ -407,6 +490,15 @@ structs:
                 unstable: true
                 type: intStringBoolToIntString
 
+    NewUnstableTypeChangesStructIgnoreList:
+        description: "This struct contains a field that is stable in the old version and has type,
+                      changes but is also in the ignore list"
+        fields:
+            newUnstableTypeChangesFieldIgnoreList:
+                type: intStringBoolToIntString
+                unstable: false
+                optional: true
+
     BsonSerializationTypeAnyWithVariantReply:
         description: "This reply contains a new reply field with variant types where one of the
                       bson serialization types is 'any' and is explicitly allowed"
@@ -577,6 +669,20 @@ commands:
                 type: string
                 unstable: true
 
+    commandParameterUnstableIgnoreList:
+        description: "new unstable command parameter is stable in the corresponding old
+                      command, but the change is explcitly allowed"
+        command_name: commandParameterUnstableIgnoreList
+        namespace: ignored
+        cpp_name: commandParameterUnstableIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: OkReply
+        fields:
+            newUnstableParameterIgnoreList:
+                type: string
+                unstable: false
+
     removeCommandParameterUnstable:
         description: "new command removes parameter that is unstable
                       in the corresponding old command and still passes"
@@ -738,6 +844,21 @@ commands:
                 type: intStringBoolToIntString
                 unstable: true
 
+    newUnstableParamTypeChangesIgnoreList:
+        description: "command has param with incompatible type changes, but is in the stable-to-unstable
+                      ignore list"
+        command_name: newUnstableParamTypeChangesIgnoreList
+        namespace: ignored
+        cpp_name: newUnstableParamTypeChangesIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: OkReply
+        fields:
+            newUnstableTypeChangesParamIgnoreList:
+                type: intStringBoolToIntString
+                unstable: false
+                optional: true
+
     newlyAddedTypeFieldBsonAnyAllowList:
         description: "command passes when its type field is newly added and has bson type 'any'
                       that is explicitly allowed"
@@ -759,6 +880,17 @@ commands:
         api_version: "1"
         reply_type: OkReply
 
+    newUnstableTypeChangesIgnoreList:
+        description: "command has type with incompatible changes, but is in the stable-to-unstable
+                      ignore list"
+        command_name: newUnstableTypeChangesIgnoreList
+        namespace: type
+        type: NewUnstableTypeChangesStructIgnoreList
+        cpp_name: newUnstableTypeChangesIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: OkReply
+
     oldCommandParameterValidator:
         description: "new command passes when it contains a parameter that does not contain a validator
                       that is present in the old parameter"
@@ -827,6 +959,16 @@ commands:
         api_version: "1"
         reply_type: StableNewFieldReply
 
+    newReplyFieldUnstableIgnoreList:
+        description: "new command contains an unstable reply field that is stable
+                      in the corresponding old command but is explicitly allowed"
+        command_name: newReplyFieldUnstableIgnoreList
+        namespace: ignored
+        cpp_name: newReplyFieldUnstableIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: UnstableNewFieldReplyIgnoreList
+
     newReplyFieldRequired:
         description: "new command contains a required reply field that is optional
                       in the corresponding old command and still passes"
@@ -856,6 +998,16 @@ commands:
         api_version: "1"
         reply_type: UnstableOldFieldReply
 
+    newReplyFieldUnstableOptionalIgnoreList:
+        description: "old reply field is stable but is included in the ignore list so new commmand 
+                      passes even if its new reply field is unstable and optional"
+        command_name: newReplyFieldUnstableOptionalIgnoreList
+        namespace: ignored
+        cpp_name: newReplyFieldUnstableOptionalIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: UnstableOptionalNewFieldReplyIgnoreList
+
     importedReplyCommand:
         description: "reply is imported"
         command_name: importedReplyCommand
@@ -917,6 +1069,16 @@ commands:
         api_version: "1"
         reply_type: StructFieldTypeRecursiveReplyTwo
 
+    newReplyFieldTypeStructIgnoreList:
+        description: "command has a reply contains a field whose new type is a struct that is not
+                      compatible with the old field type, but this is explicitly allowed"
+        command_name: newReplyFieldTypeStructIgnoreList
+        namespace: ignored
+        cpp_name: newReplyFieldTypeStructIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: StructFieldTypeRecursiveReplyIgnoreList
+
     newNamespaceIgnored:
         description: "new command passes when its namespace is changed to ignored"
         command_name: newNamespaceIgnored
@@ -990,6 +1152,17 @@ commands:
         api_version: "1"
         reply_type: OkReply
 
+    newReplyTypeEnumOrStructIgnoreList:
+        description: "the type is a non-enum or struct type in the old command, and an enum or struct
+                     in the new command, but the change is explicitly allowed"
+        command_name: newReplyTypeEnumOrStructIgnoreList
+        namespace: type
+        type: namespacestring
+        cpp_name: newReplyTypeEnumOrStructIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NewReplyTypeEnumOrStructIgnoreList
+
     newTypeFieldOptional:
         description: "new command type contains an optional field that is required
                       in the corresponding old command and still passes"
@@ -1033,6 +1206,17 @@ commands:
         api_version: "1"
         reply_type: OkReply
 
+    newTypeFieldUnstableIgnoreList:
+        description: "new command contains an unstable type field that is stable in the corresponding
+                      old command but that is explicitly allowed"
+        command_name: newTypeFieldUnstableIgnoreList
+        namespace: type
+        type: UnstableNewFieldReplyIgnoreList
+        cpp_name: newTypeFieldUnstableIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: OkReply
+
     removeTypeFieldUnstable:
         description: "new command removes type field that is unstable
                       in the corresponding old command and still passes"
@@ -1076,6 +1260,15 @@ commands:
         api_version: "1"
         reply_type: OldVariantTypeReply
 
+    newReplyFieldVariantNotSubsetIgnoreList:
+        description: "the command's reply field type is a variant type that is not a subset of the old reply field variant types"
+        command_name: newReplyFieldVariantNotSubsetIgnoreList
+        namespace: ignored
+        cpp_name: newReplyFieldVariantNotSubsetIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NewVariantNotSubsetReplyIgnoreList
+
     newReplyFieldVariantSubset:
         description: "new command when its reply field type is a variant type that is
                       a subset of the old reply field variant types"
@@ -1126,6 +1319,36 @@ commands:
         api_version: "1"
         reply_type: VariantStructRecursiveReply
 
+    replyFieldVariantDifferentStructIgnoreList:
+        description: "the old field has a non-variant type and the new field has a variant type but the
+                      change is explicitly allowed"
+        command_name: replyFieldVariantDifferentStructIgnoreList
+        namespace: ignored
+        cpp_name: replyFieldVariantDifferentStructIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: VariantDifferentStructReplyIgnoreList
+
+    replyFieldNonVariantToVariantIgnoreList:
+        description: "the old and new field have different variant struct types but the change is
+                      explicitly allowed"
+        command_name: replyFieldNonVariantToVariantIgnoreList
+        namespace: ignored
+        cpp_name: replyFieldNonVariantToVariantIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NonVariantToVariantReplyIgnoreList
+
+    replyFieldNonEnumToEnumIgnoreList:
+        description: "the old field has a non-enum type and the new field has an enum type but the
+                      change is explicitly allowed"
+        command_name: replyFieldNonEnumToEnumIgnoreList
+        namespace: ignored
+        cpp_name: replyFieldNonEnumToEnumIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NonEnumToEnumReplyIgnoreList
+
     newlyAddedReplyFieldTypeBsonAnyAllowed:
         description: "command passes because it has a newly added reply field type has a bson_serialization_type
                       that contains 'any' that is explicitly allowed"
@@ -1145,6 +1368,16 @@ commands:
         api_version: "1"
         reply_type: OldUnstableTypeChangesReply
 
+    newUnstableReplyFieldTypeChangesIgnoreList:
+        description: "command has an old stable reply field with incompatible type changes but it is also
+                      in the ignore list"
+        command_name: newUnstableReplyFieldTypeChangesIgnoreList
+        namespace: ignored
+        cpp_name: newUnstableReplyFieldTypeChangesIgnoreList
+        strict: true
+        api_version: "1"
+        reply_type: NewUnstableTypeChangesReplyIgnoreList
+
     commandAllowedAnyTypes:
         description: "new command that has parameter and reply type with
                       explicitly allowed 'any' bson serialization type passes"
diff --git a/buildscripts/libdeps/graph_visualizer_web_stack/package.json b/buildscripts/libdeps/graph_visualizer_web_stack/package.json
index 4d8196478bb..214f87ca4c8 100644
--- a/buildscripts/libdeps/graph_visualizer_web_stack/package.json
+++ b/buildscripts/libdeps/graph_visualizer_web_stack/package.json
@@ -20,17 +20,18 @@
     "@material-ui/core": "5.0.0-alpha.22",
     "@material-ui/icons": "5.0.0-alpha.22",
     "@material-ui/lab": "5.0.0-alpha.22",
+    "bezier-js": "4.0.3",
     "canvas": "^2.5.0",
     "date-fns": "^2.16.1",
     "dayjs": "^1.9.7",
+    "force-graph": "1.40.0",
+    "http-proxy-middleware": "^2.0.6",
     "http-server": "^0.12.3",
     "luxon": "^1.25.0",
     "moment": "^2.29.1",
     "p-limit": "^3.0.2",
     "react": "^16.8",
     "react-dom": "^16.0.0",
-    "bezier-js": "4.0.3",
-    "force-graph": "1.40.0",
     "react-force-graph-2d": "1.18.1",
     "react-force-graph-3d": "1.18.8",
     "react-indiana-drag-scroll": "^1.8.0",
@@ -56,6 +57,5 @@
       "last 1 firefox version",
       "last 1 safari version"
     ]
-  },
-  "proxy": "http://localhost:5000"
+  }
 }
diff --git a/buildscripts/libdeps/graph_visualizer_web_stack/src/DataGrid.js b/buildscripts/libdeps/graph_visualizer_web_stack/src/DataGrid.js
index 8364b55ff67..dd16b5a33d8 100644
--- a/buildscripts/libdeps/graph_visualizer_web_stack/src/DataGrid.js
+++ b/buildscripts/libdeps/graph_visualizer_web_stack/src/DataGrid.js
@@ -112,7 +112,7 @@ const DataGrid = ({
       style["justifyContent"] = "space-evenly";
       finalCellData = (
         <Checkbox
-          checked={checkBoxes[rowIndex].selected}
+          checked={checkBoxes[rowIndex] ? checkBoxes[rowIndex].selected : false}
           onChange={(event) => {
             setCheckBoxes(
               checkBoxes.map((checkbox, index) => {
diff --git a/buildscripts/libdeps/graph_visualizer_web_stack/src/NodeList.js b/buildscripts/libdeps/graph_visualizer_web_stack/src/NodeList.js
index 96f19a5d071..eed58ee8875 100644
--- a/buildscripts/libdeps/graph_visualizer_web_stack/src/NodeList.js
+++ b/buildscripts/libdeps/graph_visualizer_web_stack/src/NodeList.js
@@ -3,10 +3,12 @@ import React from "react";
 import { connect } from "react-redux";
 import { getNodes } from "./redux/store";
 import { setFindNode } from "./redux/findNode";
+import { setListSearchTerm } from "./redux/listSearchTerm";
 import { socket } from "./connect";
 
 import DataGrid from "./DataGrid";
 import LoadingBar from "./LoadingBar";
+import TextField from "@material-ui/core/TextField";
 
 const columns = [
   { dataKey: "check", label: "Selected", width: 70 },
@@ -14,7 +16,7 @@ const columns = [
   { id: "ID", dataKey: "node", label: "Node", width: 200 },
 ];
 
-const NodeList = ({ nodes, loading, setFindNode }) => {
+const NodeList = ({ nodes, loading, setFindNode, setListSearchTerm}) => {
   function handleCheckBoxes(rowIndex, event) {
     socket.emit("row_selected", {
       data: { node: nodes[rowIndex].node, name: nodes[rowIndex].name },
@@ -26,8 +28,18 @@ const NodeList = ({ nodes, loading, setFindNode }) => {
     setFindNode(event.target.textContent);
   }
 
+  function handleSearchTermChange(event) {
+    setListSearchTerm(event.target.value);
+  }
+
   return (
     <LoadingBar loading={loading} height={"95%"}>
+      <TextField
+        fullWidth
+        onChange={handleSearchTermChange}
+        onClick={(event)=> event.target.select()}
+        label="Search for Node"
+      />
       <DataGrid
         rows={nodes}
         columns={columns}
@@ -40,4 +52,4 @@ const NodeList = ({ nodes, loading, setFindNode }) => {
   );
 };
 
-export default connect(getNodes, { setFindNode })(NodeList);
+export default connect(getNodes, { setFindNode, setListSearchTerm })(NodeList);
diff --git a/buildscripts/libdeps/graph_visualizer_web_stack/src/redux/listSearchTerm.js b/buildscripts/libdeps/graph_visualizer_web_stack/src/redux/listSearchTerm.js
new file mode 100644
index 00000000000..df288f4af47
--- /dev/null
+++ b/buildscripts/libdeps/graph_visualizer_web_stack/src/redux/listSearchTerm.js
@@ -0,0 +1,16 @@
+import { initialState } from "./store";
+
+export const listSearchTerm = (state = initialState, action) => {
+  switch (action.type) {
+    case "setListSearchTerm":
+      return action.payload;
+
+    default:
+      return state;
+  }
+};
+
+export const setListSearchTerm = (listSearchTerm) => ({
+  type: "setListSearchTerm",
+  payload: listSearchTerm,
+});
+\ No newline at end of file
diff --git a/buildscripts/libdeps/graph_visualizer_web_stack/src/redux/store.js b/buildscripts/libdeps/graph_visualizer_web_stack/src/redux/store.js
index 8024c7cb327..e539ef21292 100644
--- a/buildscripts/libdeps/graph_visualizer_web_stack/src/redux/store.js
+++ b/buildscripts/libdeps/graph_visualizer_web_stack/src/redux/store.js
@@ -8,6 +8,7 @@ import { links } from "./links";
 import { graphData } from "./graphData";
 import { findNode } from "./findNode";
 import { graphPaths } from "./graphPaths";
+import { listSearchTerm } from "./listSearchTerm";
 
 export const initialState = {
   loading: false,
@@ -61,6 +62,7 @@ export const initialState = {
       dependencies: [{ node: "test/test2.so", symbols: [] }],
     },
   ],
+  listSearchTerm: "",
 };
 
 export const getLoading = (state) => {
@@ -88,11 +90,12 @@ export const getCounts = (state) => {
 };
 
 export const getRows = (state) => {
+  let searchedNodes = state.nodes.filter(node => node.node.indexOf(state.listSearchTerm) > -1);
   return {
-    rowCount: state.nodes.length,
-    rowGetter: ({ index }) => state.nodes[index],
-    checkBox: ({ index }) => state.nodes[index].selected,
-    nodes: state.nodes,
+    rowCount: searchedNodes.length,
+    rowGetter: ({ index }) => searchedNodes[index],
+    checkBox: ({ index }) => searchedNodes[index].selected,
+    nodes: searchedNodes,
   };
 };
 
@@ -109,6 +112,8 @@ export const getNodes = (state) => {
   return {
     nodes: state.nodes,
     loading: state.loading,
+    listSearchTerm: state.listSearchTerm,
+    searchedNodes: state.nodes.filter(node => node.node.indexOf(state.listSearchTerm) > -1),
   };
 };
 
@@ -137,6 +142,7 @@ const store = createStore(
     graphData,
     findNode,
     graphPaths,
+    listSearchTerm,
   }),
   initialState
 );
diff --git a/buildscripts/libdeps/graph_visualizer_web_stack/src/setupProxy.js b/buildscripts/libdeps/graph_visualizer_web_stack/src/setupProxy.js
new file mode 100644
index 00000000000..d3b4e0dc9ed
--- /dev/null
+++ b/buildscripts/libdeps/graph_visualizer_web_stack/src/setupProxy.js
@@ -0,0 +1,17 @@
+/**
+ * This proxy is intended to allow the visualizer to run in a development environment
+ * which includes SSH tunnels communicating with private remote hosts.
+ */
+
+const { createProxyMiddleware } = require('http-proxy-middleware');
+
+module.exports = function(app) {
+    app.use(
+        createProxyMiddleware('/socket.io', {
+            target: 'http://localhost:5000',
+            ws: true,
+            changeOrigin: true,
+            secure: false
+        })
+    );
+};
+\ No newline at end of file
diff --git a/buildscripts/moduleconfig.py b/buildscripts/moduleconfig.py
index b31a9dbf8db..b4d0bba0490 100644
--- a/buildscripts/moduleconfig.py
+++ b/buildscripts/moduleconfig.py
@@ -33,16 +33,26 @@ import os
 
 
 def discover_modules(module_root, allowed_modules):
+    # pylint: disable=too-many-branches
     """Scan module_root for subdirectories that look like MongoDB modules.
 
     Return a list of imported build.py module objects.
     """
     found_modules = []
+    found_module_names = []
 
     if allowed_modules is not None:
         allowed_modules = allowed_modules.split(',')
+        # When `--modules=` is passed, the split on empty string is represented
+        # in memory as ['']
+        if allowed_modules == ['']:
+            allowed_modules = []
 
     if not os.path.isdir(module_root):
+        if allowed_modules:
+            raise RuntimeError(
+                f"Requested the following modules: {allowed_modules}, but the module root '{module_root}' could not be found. Check the module root, or remove the module from the scons invocation."
+            )
         return found_modules
 
     for name in os.listdir(module_root):
@@ -66,11 +76,17 @@ def discover_modules(module_root, allowed_modules):
                 if getattr(module, "name", None) is None:
                     module.name = name
                 found_modules.append(module)
+                found_module_names.append(name)
             finally:
                 fp.close()
         except (FileNotFoundError, IOError):
             pass
 
+    if allowed_modules is not None:
+        missing_modules = set(allowed_modules) - set(found_module_names)
+        if missing_modules:
+            raise RuntimeError(f"Failed to locate all modules. Could not find: {missing_modules}")
+
     return found_modules
 
 
diff --git a/buildscripts/packager_enterprise.py b/buildscripts/packager_enterprise.py
index fcc260c86be..b8f042f6cf9 100755
--- a/buildscripts/packager_enterprise.py
+++ b/buildscripts/packager_enterprise.py
@@ -24,7 +24,7 @@
 # * Before you run the program on a new host, these are the
 # prerequisites:
 #
-# apt-get install dpkg-dev rpm debhelper fakeroot ia32-libs createrepo git-core libsnmp15
+# apt-get install dpkg-dev rpm debhelper fakeroot ia32-libs createrepo git-core
 # echo "Now put the dist gnupg signing keys in ~root/.gnupg"
 
 import errno
@@ -228,7 +228,7 @@ def unpack_binaries_into(build_os, arch, spec, where):
     try:
         packager.sysassert(["tar", "xvzf", rootdir + "/" + tarfile(build_os, arch, spec)])
         release_dir = glob('mongodb-linux-*')[0]
-        for releasefile in "bin", "snmp", "LICENSE-Enterprise.txt", "README", "THIRD-PARTY-NOTICES", "MPL-2":
+        for releasefile in "bin", "LICENSE-Enterprise.txt", "README", "THIRD-PARTY-NOTICES", "MPL-2":
             os.rename("%s/%s" % (release_dir, releasefile), releasefile)
         os.rmdir(release_dir)
     except Exception:
@@ -257,7 +257,7 @@ def make_package(distro, build_os, arch, spec, srcdir):
             "(cd \"%s\" && git archive %s %s/ ) | (cd \"%s\" && tar xvf -)" %
             (srcdir, spec.metadata_gitspec(), pkgdir, sdir)
         ])
-    # Splat the binaries and snmp files under sdir.  The "build" stages of the
+    # Splat the binaries under sdir.  The "build" stages of the
     # packaging infrastructure will move the files to wherever they
     # need to go.
     unpack_binaries_into(build_os, arch, spec, sdir)
diff --git a/buildscripts/packaging/msi/mongod.yaml b/buildscripts/packaging/msi/mongod.yaml
index ba10e7517ef..dd4bdb39011 100644
--- a/buildscripts/packaging/msi/mongod.yaml
+++ b/buildscripts/packaging/msi/mongod.yaml
@@ -36,5 +36,3 @@ net:
 ## Enterprise-Only Options:
 
 #auditLog:
-
-#snmp:
diff --git a/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml b/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
index c08125ad738..1188d8163cc 100644
--- a/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
+++ b/buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
@@ -8,3 +8,7 @@
 # Disable featureFlagRequireTenantID until all paths pass tenant id to TenantNamespace
 # and TenantDatabase constructors.
 - featureFlagRequireTenantID
+# This flag exists to help users in managed environments that upgraded to 6.0 before 6.0.0-rc8 was
+# released create the transactions collection index and is only meant to be enabled adhoc, so only
+# its targeted tests should enable it.
+- featureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp
diff --git a/buildscripts/resmokeconfig/suites/aggregation_column_store_index_passthrough.yml b/buildscripts/resmokeconfig/suites/aggregation_column_store_index_passthrough.yml
new file mode 100644
index 00000000000..36773338d80
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/aggregation_column_store_index_passthrough.yml
@@ -0,0 +1,41 @@
+# This test suite re-uses the tests in the aggregation suite but will automatically create a column
+# store index on each collection using the failpoint defined in the test fixture's parameters below.
+# For more details and rationale for this approach, see the failpoint's definition.
+test_kind: js_test
+
+selector:
+  roots:
+  - jstests/aggregation/**/*.js
+  exclude_files:
+  - jstests/aggregation/extras/*.js
+  - jstests/aggregation/data/*.js
+  # TODO SERVER-67264 there is a bug in projecting "a" and matching on "a.b".
+  - jstests/aggregation/bugs/match.js
+
+  exclude_with_any_tags:
+  - assumes_no_implicit_index_creation
+  # Column Store Indexes are known to mess up projection field order and it is fundamental to the
+  # design so we accept these tests will fail.
+  - tests_projection_field_order
+
+executor:
+  archive:
+    hooks:
+      - ValidateCollections
+  config:
+    shell_options:
+      # This override method helps some tests remain applicable in the passthrough by hiding the
+      # column store indexes from 'listIndexes' output, so tests can still see only a list of
+      # indexes that the test itself created.
+      eval: load("jstests/libs/override_methods/hide_column_store_indexes_from_get_indexes.js")
+  hooks:
+  - class: ValidateCollections
+  - class: CleanEveryN
+    n: 20
+  fixture:
+    class: MongoDFixture
+    mongod_options:
+      set_parameters:
+        enableTestCommands: 1
+        failpoint.createColumnIndexOnAllCollections:
+          mode: alwaysOn
diff --git a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml
index 17716017fac..3d429f10812 100644
--- a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml
+++ b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml
@@ -69,6 +69,10 @@ selector:
   # This test uses `benchRun` which spawns connections which do not inherit the causal session.
   - jstests/core/benchrun_pipeline_updates.js
 
+  # In the context of auth on mongos, illegal namespaces trigger a different error code than they do
+  # on mongod. To keep the test simple, we avoid running it against a mongos when auth is enabled.
+  - jstests/core/illegal_cmd_namespace.js
+
   exclude_with_any_tags:
   - assumes_against_mongod_not_mongos
   - assumes_standalone_mongod
diff --git a/buildscripts/resmokeconfig/suites/change_streams_downgrade.yml b/buildscripts/resmokeconfig/suites/change_streams_downgrade.yml
index e172efdc0e2..bb56d4e0cc2 100644
--- a/buildscripts/resmokeconfig/suites/change_streams_downgrade.yml
+++ b/buildscripts/resmokeconfig/suites/change_streams_downgrade.yml
@@ -149,7 +149,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $collStats
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml
index b51982cea1c..39e4e4bccbb 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml
@@ -128,13 +128,6 @@ selector:
   # for dbCheck. TODO (SERVER-63951): Remove this exclusion.
   - jstests/concurrency/fsm_workloads/create_collection_and_view.js
 
-  # This suite runs the RunDBCheckInBackground hook and the dbCheck command generates oplog entries
-  # but those oplog entries are not supported by resharding.
-  # TODO (SERVER-66011): Enable internal_transactions_resharding.js in the
-  # concurrency_sharded_multi_stmt_txn_with_balancer suite
-  - jstests/concurrency/fsm_workloads/internal_transactions_resharding.js
-
-
   exclude_with_any_tags:
   - assumes_balancer_off
   - does_not_support_causal_consistency
@@ -158,7 +151,6 @@ selector:
 executor:
   archive:
     hooks:
-      - RunDBCheckInBackground
       - CheckReplDBHashInBackground
       - CheckReplDBHash
       - ValidateCollections
@@ -171,7 +163,6 @@ executor:
           runningWithSessions: true
           traceExceptions: false
   hooks:
-  - class: RunDBCheckInBackground
   - class: CheckReplDBHashInBackground
   - class: CheckReplDBHash
   - class: CheckOrphansDeleted
diff --git a/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml b/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml
index cc78b3c7fbd..495f89be558 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml
@@ -33,12 +33,6 @@ selector:
   # TODO: SERVER-39939.
   - jstests/concurrency/fsm_workloads/snapshot_read_kill_operations.js
 
-  # This workload kills random sessions and a different FSM workload wouldn't be able to handle
-  # the error response from the op being killed.
-  - jstests/concurrency/fsm_workloads/multi_statement_transaction_kill_sessions_atomicity_isolation.js
-  - jstests/concurrency/fsm_workloads/multi_statement_transaction_simple_kill_sessions.js
-  - jstests/concurrency/fsm_workloads/internal_transactions_kill_sessions.js
-
   # This workload may restart running transactions on a different client, causing deadlock if
   # there is a concurrent dropDatabase waiting for the global X lock.
   # TODO: SERVER-37876
@@ -58,6 +52,7 @@ selector:
 
   exclude_with_any_tags:
   - requires_sharding
+  - kills_random_sessions
 
   group_size: 10
   group_count_multiplier: 1.0
diff --git a/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication_wiredtiger_cursor_sweeps.yml b/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication_wiredtiger_cursor_sweeps.yml
index a797c83348c..6368bc5f2a7 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication_wiredtiger_cursor_sweeps.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication_wiredtiger_cursor_sweeps.yml
@@ -33,12 +33,6 @@ selector:
   # TODO: SERVER-39939.
   - jstests/concurrency/fsm_workloads/snapshot_read_kill_operations.js
 
-  # This workload kills random sessions and a different FSM workload wouldn't be able to handle
-  # the error response from the op being killed.
-  - jstests/concurrency/fsm_workloads/multi_statement_transaction_kill_sessions_atomicity_isolation.js
-  - jstests/concurrency/fsm_workloads/multi_statement_transaction_simple_kill_sessions.js
-  - jstests/concurrency/fsm_workloads/internal_transactions_kill_sessions.js
-
   # This workload may restart running transactions on a different client, causing deadlock if
   # there is a concurrent dropDatabase waiting for the global X lock.
   # TODO: SERVER-37876
@@ -69,6 +63,7 @@ selector:
 
   exclude_with_any_tags:
   - requires_sharding
+  - kills_random_sessions
 
   group_size: 10
   group_count_multiplier: 1.0
diff --git a/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication_wiredtiger_eviction_debug.yml b/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication_wiredtiger_eviction_debug.yml
index 7c8c1fd84d9..73c00232bb1 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication_wiredtiger_eviction_debug.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication_wiredtiger_eviction_debug.yml
@@ -32,12 +32,6 @@ selector:
   # TODO: SERVER-39939.
   - jstests/concurrency/fsm_workloads/snapshot_read_kill_operations.js
 
-  # This workload kills random sessions and a different FSM workload wouldn't be able to handle
-  # the error response from the op being killed.
-  - jstests/concurrency/fsm_workloads/multi_statement_transaction_kill_sessions_atomicity_isolation.js
-  - jstests/concurrency/fsm_workloads/multi_statement_transaction_simple_kill_sessions.js
-  - jstests/concurrency/fsm_workloads/internal_transactions_kill_sessions.js
-
   # This workload may restart running transactions on a different client, causing deadlock if
   # there is a concurrent dropDatabase waiting for the global X lock.
   # TODO: SERVER-37876
@@ -63,6 +57,7 @@ selector:
 
   exclude_with_any_tags:
   - requires_sharding
+  - kills_random_sessions
 
   group_size: 10
   group_count_multiplier: 1.0
diff --git a/buildscripts/resmokeconfig/suites/core_minimum_batch_size.yml b/buildscripts/resmokeconfig/suites/core_minimum_batch_size.yml
index a78e544de77..cd7b0275421 100644
--- a/buildscripts/resmokeconfig/suites/core_minimum_batch_size.yml
+++ b/buildscripts/resmokeconfig/suites/core_minimum_batch_size.yml
@@ -13,6 +13,7 @@ selector:
   - jstests/core/profile2.js # Extra operation for a getmore.
   - jstests/core/sortk.js # Negative limit value changes result to batchSize.
   - jstests/core/tailable_skip_limit.js # Negative limit value changes result to batchSize.
+  - jstests/core/exhaust.js # Negative limit value changes result to batchSize.
 
 executor:
   archive:
diff --git a/buildscripts/resmokeconfig/suites/cqf.yml b/buildscripts/resmokeconfig/suites/cqf.yml
index 5c4415228b7..bbf84fdf079 100644
--- a/buildscripts/resmokeconfig/suites/cqf.yml
+++ b/buildscripts/resmokeconfig/suites/cqf.yml
@@ -28,3 +28,5 @@ executor:
         enableTestCommands: 1
         featureFlagCommonQueryFramework: true
         internalQueryEnableCascadesOptimizer: true
+        # This flag disables the fallback path that may hide bugs in CQF.
+        internalQueryForceCommonQueryFramework: true
diff --git a/buildscripts/resmokeconfig/suites/cqf_parallel.yml b/buildscripts/resmokeconfig/suites/cqf_parallel.yml
index 57d55f023a3..b8463c94fa5 100644
--- a/buildscripts/resmokeconfig/suites/cqf_parallel.yml
+++ b/buildscripts/resmokeconfig/suites/cqf_parallel.yml
@@ -28,4 +28,6 @@ executor:
         enableTestCommands: 1
         featureFlagCommonQueryFramework: true
         internalQueryEnableCascadesOptimizer: true
+        # This flag disables the fallback path that may hide bugs in CQF.
+        internalQueryForceCommonQueryFramework: true
         internalQueryDefaultDOP: 5
diff --git a/buildscripts/resmokeconfig/suites/fle2_high_cardinality.yml b/buildscripts/resmokeconfig/suites/fle2_high_cardinality.yml
new file mode 100644
index 00000000000..4e6002ae7d2
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/fle2_high_cardinality.yml
@@ -0,0 +1,32 @@
+test_kind: js_test
+selector:
+  roots:
+  - jstests/fle2/**/*.js
+  - src/mongo/db/modules/*/jstests/fle2/*.js
+  - src/mongo/db/modules/*/jstests/fle2/query/*.js
+  exclude_with_any_tags:
+  # Not compatible with tests the expect fle to always using $in in queries,
+  # i.e. verify explain output
+  - requires_fle_in_always
+
+executor:
+  archive:
+    hooks:
+      - ValidateCollections
+  config:
+    shell_options:
+      eval: "testingReplication = true; testingFLESharding = false;"
+  hooks:
+  # We don't execute dbHash or oplog consistency checks since there is only a single replica set
+  # node.
+  - class: ValidateCollections
+  - class: CleanEveryN
+    n: 20
+  fixture:
+    class: ReplicaSetFixture
+    mongod_options:
+      set_parameters:
+        enableTestCommands: 1
+        internalQueryFLEAlwaysUseHighCardinalityMode: 1
+    # Use a 2-node replica set.
+    num_nodes: 2
diff --git a/buildscripts/resmokeconfig/suites/fle2_sharding_high_cardinality.yml b/buildscripts/resmokeconfig/suites/fle2_sharding_high_cardinality.yml
new file mode 100644
index 00000000000..33a3d4e5c1a
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/fle2_sharding_high_cardinality.yml
@@ -0,0 +1,37 @@
+test_kind: js_test
+selector:
+  roots:
+  - jstests/fle2/*.js
+  - src/mongo/db/modules/*/jstests/fle2/*.js
+  - src/mongo/db/modules/*/jstests/fle2/query/*.js
+  exclude_with_any_tags:
+  # Not compatible with tests the expect fle to always using $in in queries,
+  # i.e. verify explain output
+  - requires_fle_in_always
+
+executor:
+  archive:
+    hooks:
+      - CheckReplDBHash
+      - ValidateCollections
+  config:
+    shell_options:
+      eval: "testingReplication = false; testingFLESharding = true;"
+  hooks:
+  - class: CheckReplDBHash
+  - class: ValidateCollections
+  - class: CleanEveryN
+    n: 20
+  fixture:
+    class: ShardedClusterFixture
+    mongos_options:
+      set_parameters:
+        enableTestCommands: 1
+        internalQueryFLEAlwaysUseHighCardinalityMode: 1
+    mongod_options:
+      set_parameters:
+        enableTestCommands: 1
+        internalQueryFLEAlwaysUseHighCardinalityMode: 1
+    num_rs_nodes_per_shard: 2
+    enable_sharding:
+    - test
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml
index 0194d0b5a60..d44172752fb 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml
@@ -14,7 +14,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # The awaitdata_getmore_cmd.js test tails the oplog and waits for the getMore batch size to equal
   # zero. The CheckReplDBHashInBackground hook consistently runs and creates sessions. At the same
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml
index 47213b4a54c..2633fe3b32e 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml
@@ -14,7 +14,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # The awaitdata_getmore_cmd.js test tails the oplog and waits for the getMore batch size to equal
   # zero. The CheckReplDBHashInBackground hook consistently runs and creates sessions. At the same
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml
index 348d549df2f..ffa1b1c514f 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml
@@ -14,7 +14,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # The awaitdata_getmore_cmd.js test tails the oplog and waits for the getMore batch size to equal
   # zero. The CheckReplDBHashInBackground hook consistently runs and creates sessions. At the same
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml
index 4f1d19d2d48..fd4a882859e 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml
@@ -14,7 +14,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # The awaitdata_getmore_cmd.js test tails the oplog and waits for the getMore batch size to equal
   # zero. The CheckReplDBHashInBackground hook consistently runs and creates sessions. At the same
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_100ms_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_100ms_refresh_jscore_passthrough.yml
index d633a55e6f7..b3e53d6b280 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_100ms_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_100ms_refresh_jscore_passthrough.yml
@@ -57,7 +57,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # TODO: Remove after fixing SERVER-29449. executionStats.nReturned is incorrect for sharded
   # queries with a limit or for distinct commands.
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_10sec_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_10sec_refresh_jscore_passthrough.yml
index 887ef290c4c..2249df645c1 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_10sec_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_10sec_refresh_jscore_passthrough.yml
@@ -57,7 +57,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # TODO: Remove after fixing SERVER-29449. executionStats.nReturned is incorrect for sharded
   # queries with a limit or for distinct commands.
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_1sec_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_1sec_refresh_jscore_passthrough.yml
index b4dae1490eb..cc966fdf43b 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_1sec_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_1sec_refresh_jscore_passthrough.yml
@@ -57,7 +57,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # TODO: Remove after fixing SERVER-29449. executionStats.nReturned is incorrect for sharded
   # queries with a limit or for distinct commands.
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_default_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_default_refresh_jscore_passthrough.yml
index 495de2e43e8..fbf136b1b33 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_default_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_sharding_default_refresh_jscore_passthrough.yml
@@ -57,7 +57,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # TODO: Remove after fixing SERVER-29449. executionStats.nReturned is incorrect for sharded
   # queries with a limit or for distinct commands.
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_100ms_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_100ms_refresh_jscore_passthrough.yml
index 77ee0700640..dae009af297 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_100ms_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_100ms_refresh_jscore_passthrough.yml
@@ -13,7 +13,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # These tests verify that an expected number of update operations were tracked in the server
   # status metrics, but the logical session cache refresh causes additional updates to be recorded.
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_10sec_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_10sec_refresh_jscore_passthrough.yml
index c3ccea25a2d..182b59965a1 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_10sec_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_10sec_refresh_jscore_passthrough.yml
@@ -13,7 +13,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # These tests verify that an expected number of update operations were tracked in the server
   # status metrics, but the logical session cache refresh causes additional updates to be recorded.
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_1sec_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_1sec_refresh_jscore_passthrough.yml
index 110804994a4..8b4db08b8bf 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_1sec_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_1sec_refresh_jscore_passthrough.yml
@@ -13,7 +13,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # These tests verify that an expected number of update operations were tracked in the server
   # status metrics, but the logical session cache refresh causes additional updates to be recorded.
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_default_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_default_refresh_jscore_passthrough.yml
index 962c41c03f3..62886996b51 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_default_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_standalone_default_refresh_jscore_passthrough.yml
@@ -13,7 +13,6 @@ selector:
   # can be triggered deterministically.
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
   # These tests verify that an expected number of update operations were tracked in the server
   # status metrics, but the logical session cache refresh causes additional updates to be recorded.
diff --git a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_jscore_passthrough.yml
index 07e839d9b77..f215abee05a 100644
--- a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_jscore_passthrough.yml
@@ -151,7 +151,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $collStats
diff --git a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml
index 26d4fc14d97..8e21d6b8999 100644
--- a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml
@@ -146,7 +146,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $collStats
diff --git a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml
index 84d46018091..511123c8ce5 100644
--- a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml
@@ -147,7 +147,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $collStats
diff --git a/buildscripts/resmokeconfig/suites/multi_stmt_txn_jscore_passthrough_with_migration.yml b/buildscripts/resmokeconfig/suites/multi_stmt_txn_jscore_passthrough_with_migration.yml
index ccdf9fcec1b..b30c65e8e4c 100644
--- a/buildscripts/resmokeconfig/suites/multi_stmt_txn_jscore_passthrough_with_migration.yml
+++ b/buildscripts/resmokeconfig/suites/multi_stmt_txn_jscore_passthrough_with_migration.yml
@@ -159,7 +159,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $collStats
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml
index 39549ffe941..b6316ac186a 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml
@@ -96,7 +96,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $indexStats
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml
index 019d73a74e6..44bfc60ade7 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml
@@ -87,7 +87,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $collStats
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml
index 74e23f299b6..e5dc35178c4 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml
@@ -86,7 +86,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $collStats
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml
index f2e0ccbadaf..30108b2a315 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml
@@ -84,7 +84,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $collStats
diff --git a/buildscripts/resmokeconfig/suites/shard_split_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/shard_split_multi_stmt_txn_jscore_passthrough.yml
index 928541da5a6..6449f04d830 100644
--- a/buildscripts/resmokeconfig/suites/shard_split_multi_stmt_txn_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/shard_split_multi_stmt_txn_jscore_passthrough.yml
@@ -112,7 +112,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $indexStats
diff --git a/buildscripts/resmokeconfig/suites/sharded_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/sharded_multi_stmt_txn_jscore_passthrough.yml
index 2c0dd75a63c..9c69c04eef2 100644
--- a/buildscripts/resmokeconfig/suites/sharded_multi_stmt_txn_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/sharded_multi_stmt_txn_jscore_passthrough.yml
@@ -120,7 +120,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $indexStats
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
index afdb9005f74..150654cf5c5 100644
--- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
@@ -62,9 +62,6 @@ selector:
   - jstests/sharding/shard2.js
   - jstests/sharding/shard3.js
   - jstests/sharding/shard_collection_basic.js
-  - jstests/sharding/sharding_balance2.js
-  - jstests/sharding/sharding_balance3.js
-  - jstests/sharding/sharding_migrate_cursor1.js
   - jstests/sharding/tag_range.js
   - jstests/sharding/top_chunk_autosplit.js
   - jstests/sharding/count_config_servers.js
@@ -114,7 +111,6 @@ selector:
   # Runs setShardVersion/getShardVersion against the config server and we don't support retries
   # for this command
   - jstests/sharding/major_version_check.js
-  - jstests/sharding/ssv_config_check.js
   # Runs replSetGetStatus -- via awaitLastOpCommitted -- directly against the config server:
   # retries aren't supported.
   - jstests/sharding/coll_epoch_test1.js
diff --git a/buildscripts/resmokeconfig/suites/snmp.yml b/buildscripts/resmokeconfig/suites/snmp.yml
deleted file mode 100644
index a462da0b7af..00000000000
--- a/buildscripts/resmokeconfig/suites/snmp.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-test_kind: js_test
-
-selector:
-  roots:
-  - src/mongo/db/modules/*/jstests/snmp/*.js
-
-# snmp tests start their own mongod's.
-executor:
-  config:
-    shell_options:
-      nodb: ''
diff --git a/buildscripts/resmokeconfig/suites/tenant_migration_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/tenant_migration_multi_stmt_txn_jscore_passthrough.yml
index 9413b35ee3e..c3c81736ea2 100644
--- a/buildscripts/resmokeconfig/suites/tenant_migration_multi_stmt_txn_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/tenant_migration_multi_stmt_txn_jscore_passthrough.yml
@@ -111,7 +111,6 @@ selector:
   # $listSessions
   - jstests/core/list_all_local_sessions.js
   - jstests/core/list_all_sessions.js
-  - jstests/core/list_local_sessions.js
   - jstests/core/list_sessions.js
 
   # $indexStats
diff --git a/buildscripts/resmokelib/core/pipe.py b/buildscripts/resmokelib/core/pipe.py
index c52dbc1235a..168345049f8 100644
--- a/buildscripts/resmokelib/core/pipe.py
+++ b/buildscripts/resmokelib/core/pipe.py
@@ -4,8 +4,13 @@ Helper class to read output of a subprocess.
 Used to avoid deadlocks from the pipe buffer filling up and blocking the subprocess while it's
 being waited on.
 """
-
+from textwrap import wrap
 import threading
+from typing import List
+
+# Logkeeper only support log lines up to 4 MB, we want to be a little under that to account for
+# extra metadata that gets sent along with the log message.
+MAX_LOG_LINE = int(3.5 * 1024 * 1024)
 
 
 class LoggerPipe(threading.Thread):  # pylint: disable=too-many-instance-attributes
@@ -50,17 +55,9 @@ class LoggerPipe(threading.Thread):  # pylint: disable=too-many-instance-attribu
         with self.__pipe_out:
             # Avoid buffering the output from the pipe.
             for line in iter(self.__pipe_out.readline, b""):
-                # Replace null bytes in the output of the subprocess with a literal backslash ('\')
-                # followed by a literal zero ('0') so tools like grep don't treat resmoke.py's
-                # output as binary data.
-                line = line.replace(b"\0", b"\\0")
-
-                # Convert the output of the process from a bytestring to a UTF-8 string, and replace
-                # any characters that cannot be decoded with the official Unicode replacement
-                # character, U+FFFD. The log messages of MongoDB processes are not always valid
-                # UTF-8 sequences. See SERVER-7506.
-                line = line.decode("utf-8", "replace")
-                self.__logger.log(self.__level, line.rstrip())
+                lines = self._format_line_for_logging(line)
+                for entry in lines:
+                    self.__logger.log(self.__level, entry)
 
         with self.__lock:
             self.__finished = True
@@ -85,3 +82,36 @@ class LoggerPipe(threading.Thread):  # pylint: disable=too-many-instance-attribu
         # No need to pass a timeout to join() because the thread should already be done after
         # notifying us it has finished reading output from the pipe.
         LoggerPipe.__join(self)  # Tidy up the started thread.
+
+    @staticmethod
+    def _format_line_for_logging(line_bytes: bytes) -> List[str]:
+        """
+        Convert the given byte array into string(s) to be send to the logger.
+
+        If the size of the input is greater than the max size supported by logkeeper, we will
+        split the input into multiple strings that are under the max supported size.
+
+        :param line_bytes: Byte array of the line to send to the logger.
+        :return: List of strings to send to logger.
+        """
+        # Replace null bytes in the output of the subprocess with a literal backslash ('\')
+        # followed by a literal zero ('0') so tools like grep don't treat resmoke.py's
+        # output as binary data.
+        line_bytes = line_bytes.replace(b"\0", b"\\0")
+
+        # Convert the output of the process from a bytestring to a UTF-8 string, and replace
+        # any characters that cannot be decoded with the official Unicode replacement
+        # character, U+FFFD. The log messages of MongoDB processes are not always valid
+        # UTF-8 sequences. See SERVER-7506.
+        line_str = line_bytes.decode("utf-8", "replace")
+        line_str = line_str.rstrip()
+        if len(line_str) > MAX_LOG_LINE:
+            return wrap(
+                line_str,
+                MAX_LOG_LINE,
+                expand_tabs=False,
+                replace_whitespace=False,
+                drop_whitespace=False,
+                break_on_hyphens=False,
+            )
+        return [line_str]
diff --git a/buildscripts/resmokelib/mongod_fuzzer_configs.py b/buildscripts/resmokelib/mongod_fuzzer_configs.py
index ec84d6c5a4e..a9750997fef 100644
--- a/buildscripts/resmokelib/mongod_fuzzer_configs.py
+++ b/buildscripts/resmokelib/mongod_fuzzer_configs.py
@@ -11,13 +11,16 @@ def generate_eviction_configs(rng):
     eviction_trigger = rng.randint(eviction_target + 1, 99)
 
     # Fuzz eviction_dirty_target and trigger both as relative and absolute values
-    target_bytes_min = 10 * 1024 * 1024  # 10MB
+    target_bytes_min = 50 * 1024 * 1024  # 50MB # 5% of 1GB default cache size on Evergreen
     target_bytes_max = 256 * 1024 * 1024  # 256MB # 1GB default cache size on Evergreen
     eviction_dirty_target = rng.choice(
         [rng.randint(5, 50), rng.randint(target_bytes_min, target_bytes_max)])
     trigger_max = 75 if eviction_dirty_target <= 50 else target_bytes_max
     eviction_dirty_trigger = rng.randint(eviction_dirty_target + 1, trigger_max)
 
+    assert eviction_dirty_trigger > eviction_dirty_target
+    assert eviction_dirty_trigger <= trigger_max
+
     close_idle_time_secs = rng.randint(1, 100)
     close_handle_minimum = rng.randint(0, 1000)
     close_scan_interval = rng.randint(1, 100)
diff --git a/buildscripts/resmokelib/multiversion/__init__.py b/buildscripts/resmokelib/multiversion/__init__.py
index 2c9bae45766..713dedf0f82 100644
--- a/buildscripts/resmokelib/multiversion/__init__.py
+++ b/buildscripts/resmokelib/multiversion/__init__.py
@@ -49,14 +49,14 @@ class MultiversionConfigSubcommand(Subcommand):
             mongo_version=MongoVersion.from_yaml_file(multiversionconstants.MONGO_VERSION_YAML),
             mongo_releases=MongoReleases.from_yaml_file(multiversionconstants.RELEASES_YAML),
         )
-        fcv_constants = multiversion_service.calculate_fcv_constants()
+        version_constants = multiversion_service.calculate_version_constants()
         return MultiversionConfig(
             last_versions=multiversionconstants.OLD_VERSIONS,
-            requires_fcv_tag=fcv_constants.get_fcv_tag_list(),
-            requires_fcv_tag_lts=fcv_constants.get_lts_fcv_tag_list(),
-            requires_fcv_tag_continuous=fcv_constants.get_continuous_fcv_tag_list(),
-            last_lts_fcv=fcv_constants.get_last_lts_fcv(),
-            last_continuous_fcv=fcv_constants.get_last_continuous_fcv(),
+            requires_fcv_tag=version_constants.get_fcv_tag_list(),
+            requires_fcv_tag_lts=version_constants.get_lts_fcv_tag_list(),
+            requires_fcv_tag_continuous=version_constants.get_continuous_fcv_tag_list(),
+            last_lts_fcv=version_constants.get_last_lts_fcv(),
+            last_continuous_fcv=version_constants.get_last_continuous_fcv(),
         )
 
 
diff --git a/buildscripts/resmokelib/multiversion/multiversion_service.py b/buildscripts/resmokelib/multiversion/multiversion_service.py
index 3597cc2177f..621d5ade9ae 100644
--- a/buildscripts/resmokelib/multiversion/multiversion_service.py
+++ b/buildscripts/resmokelib/multiversion/multiversion_service.py
@@ -25,9 +25,9 @@ def version_str(version: Version) -> str:
     return f"{version.major}.{version.minor}"
 
 
-class FcvConstantValues(NamedTuple):
+class VersionConstantValues(NamedTuple):
     """
-    Object to hold the calculated FCV constants.
+    Object to hold the calculated Version constants.
 
     * latest: Latest FCV.
     * last_continuous: Last continuous FCV.
@@ -36,6 +36,7 @@ class FcvConstantValues(NamedTuple):
     * requires_fcv_tag_list_continuous: List of FCVs that we need to generate a tag for against
       continuous versions.
     * fcvs_less_than_latest: List of all FCVs that are less than latest, starting from v4.0.
+    * eols: List of stable MongoDB versions since v2.0 that have been EOL'd.
     """
 
     latest: Version
@@ -44,6 +45,7 @@ class FcvConstantValues(NamedTuple):
     requires_fcv_tag_list: List[Version]
     requires_fcv_tag_list_continuous: List[Version]
     fcvs_less_than_latest: List[Version]
+    eols: List[Version]
 
     def get_fcv_tag_list(self) -> str:
         """Get a comma joined string of all the fcv tags."""
@@ -96,6 +98,10 @@ class FcvConstantValues(NamedTuple):
         last_continuous = self.get_last_continuous_fcv()
         return f"{base_name}-{last_continuous}"
 
+    def get_eols(self) -> List[str]:
+        """Get EOL'd versions as list of strings."""
+        return [version_str(eol) for eol in self.eols]
+
 
 class MongoVersion(BaseModel):
     """
@@ -132,12 +138,14 @@ class MongoReleases(BaseModel):
 
     * feature_compatibility_version: All FCVs starting with 4.0.
     * long_term_support_releases: All LTS releases starting with 4.0.
+    * eol_versions: List of stable MongoDB versions since 2.0 that have been EOL'd.
     * generate_fcv_lower_bound_override: Extend FCV generation down to the previous value of last
       LTS.
     """
 
     feature_compatibility_versions: List[str] = Field(alias="featureCompatibilityVersions")
     long_term_support_releases: List[str] = Field(alias="longTermSupportReleases")
+    eol_versions: List[str] = Field(alias="eolVersions")
     generate_fcv_lower_bound_override: Optional[str] = Field(None,
                                                              alias="generateFCVLowerBoundOverride")
 
@@ -159,7 +167,11 @@ class MongoReleases(BaseModel):
 
     def get_lts_versions(self) -> List[Version]:
         """Get the Version representation of the lts versions."""
-        return [Version(fcv) for fcv in self.long_term_support_releases]
+        return [Version(lts) for lts in self.long_term_support_releases]
+
+    def get_eol_versions(self) -> List[Version]:
+        """Get the Version representation of the EOL versions."""
+        return [Version(eol) for eol in self.eol_versions]
 
 
 class MultiversionService:
@@ -175,11 +187,12 @@ class MultiversionService:
         self.mongo_version = mongo_version
         self.mongo_releases = mongo_releases
 
-    def calculate_fcv_constants(self) -> FcvConstantValues:
+    def calculate_version_constants(self) -> VersionConstantValues:
         """Calculate multiversion constants from data files."""
         latest = self.mongo_version.get_version()
         fcvs = self.mongo_releases.get_fcv_versions()
         lts = self.mongo_releases.get_lts_versions()
+        eols = self.mongo_releases.get_eol_versions()
         lower_bound_override = self.mongo_releases.generate_fcv_lower_bound_override
 
         # Highest release less than latest.
@@ -200,7 +213,12 @@ class MultiversionService:
         # All FCVs less than latest.
         fcvs_less_than_latest = fcvs[:bisect_left(fcvs, latest)]
 
-        return FcvConstantValues(latest=latest, last_continuous=last_continuous, last_lts=last_lts,
-                                 requires_fcv_tag_list=requires_fcv_tag_list,
-                                 requires_fcv_tag_list_continuous=requires_fcv_tag_list_continuous,
-                                 fcvs_less_than_latest=fcvs_less_than_latest)
+        return VersionConstantValues(
+            latest=latest,
+            last_continuous=last_continuous,
+            last_lts=last_lts,
+            requires_fcv_tag_list=requires_fcv_tag_list,
+            requires_fcv_tag_list_continuous=requires_fcv_tag_list_continuous,
+            fcvs_less_than_latest=fcvs_less_than_latest,
+            eols=eols,
+        )
diff --git a/buildscripts/resmokelib/multiversionconstants.py b/buildscripts/resmokelib/multiversionconstants.py
index 0cac4fbc95b..56381531f72 100644
--- a/buildscripts/resmokelib/multiversionconstants.py
+++ b/buildscripts/resmokelib/multiversionconstants.py
@@ -5,22 +5,15 @@ from subprocess import DEVNULL, STDOUT, CalledProcessError, call, check_output
 
 import structlog
 
-try:
-    # when running resmoke
-    from buildscripts.resmokelib.multiversion.multiversion_service import (
-        MongoReleases, MongoVersion, MultiversionService)
-    from buildscripts.resmokelib.multiversionsetupconstants import \
-        USE_EXISTING_RELEASES_FILE
-except ImportError:
-    # when running db-contrib-tool
-    from multiversion.multiversion_service import (MongoReleases, MongoVersion, MultiversionService)
-    from multiversionsetupconstants import USE_EXISTING_RELEASES_FILE
+from buildscripts.resmokelib.multiversion.multiversion_service import (
+    MongoReleases, MongoVersion, MultiversionService, MONGO_VERSION_YAML, RELEASES_YAML)
+from buildscripts.resmokelib.multiversionsetupconstants import \
+    USE_EXISTING_RELEASES_FILE
 
-LOGGER = structlog.getLogger(__name__)
+LAST_LTS = "last_lts"
+LAST_CONTINUOUS = "last_continuous"
 
-# These values must match the include paths for artifacts.tgz in evergreen.yml.
-MONGO_VERSION_YAML = ".resmoke_mongo_version.yml"
-RELEASES_YAML = ".resmoke_mongo_release_values.yml"
+LOGGER = structlog.getLogger(__name__)
 
 
 def generate_mongo_version_file():
@@ -86,33 +79,34 @@ multiversion_service = MultiversionService(
     mongo_releases=MongoReleases.from_yaml_file(RELEASES_YAML),
 )
 
-fcv_constants = multiversion_service.calculate_fcv_constants()
+version_constants = multiversion_service.calculate_version_constants()
 
-LAST_LTS_BIN_VERSION = fcv_constants.get_last_lts_fcv()
-LAST_CONTINUOUS_BIN_VERSION = fcv_constants.get_last_continuous_fcv()
+LAST_LTS_BIN_VERSION = version_constants.get_last_lts_fcv()
+LAST_CONTINUOUS_BIN_VERSION = version_constants.get_last_continuous_fcv()
 
-LAST_LTS_FCV = fcv_constants.get_last_lts_fcv()
-LAST_CONTINUOUS_FCV = fcv_constants.get_last_continuous_fcv()
-LATEST_FCV = fcv_constants.get_latest_fcv()
+LAST_LTS_FCV = version_constants.get_last_lts_fcv()
+LAST_CONTINUOUS_FCV = version_constants.get_last_continuous_fcv()
+LATEST_FCV = version_constants.get_latest_fcv()
 
-LAST_CONTINUOUS_MONGO_BINARY = fcv_constants.build_last_continuous_binary("mongo")
-LAST_CONTINUOUS_MONGOD_BINARY = fcv_constants.build_last_continuous_binary("mongod")
-LAST_CONTINUOUS_MONGOS_BINARY = fcv_constants.build_last_continuous_binary("mongos")
+LAST_CONTINUOUS_MONGO_BINARY = version_constants.build_last_continuous_binary("mongo")
+LAST_CONTINUOUS_MONGOD_BINARY = version_constants.build_last_continuous_binary("mongod")
+LAST_CONTINUOUS_MONGOS_BINARY = version_constants.build_last_continuous_binary("mongos")
 
-LAST_LTS_MONGO_BINARY = fcv_constants.build_last_lts_binary("mongo")
-LAST_LTS_MONGOD_BINARY = fcv_constants.build_last_lts_binary("mongod")
-LAST_LTS_MONGOS_BINARY = fcv_constants.build_last_lts_binary("mongos")
+LAST_LTS_MONGO_BINARY = version_constants.build_last_lts_binary("mongo")
+LAST_LTS_MONGOD_BINARY = version_constants.build_last_lts_binary("mongod")
+LAST_LTS_MONGOS_BINARY = version_constants.build_last_lts_binary("mongos")
 
-REQUIRES_FCV_TAG_LATEST = fcv_constants.get_latest_tag()
+REQUIRES_FCV_TAG_LATEST = version_constants.get_latest_tag()
 
 # Generate tags for all FCVS in (lastLTS, latest], or (lowerBoundOverride, latest] if requested.
 # All multiversion tests should be run with these tags excluded.
-REQUIRES_FCV_TAG = fcv_constants.get_fcv_tag_list()
+REQUIRES_FCV_TAG = version_constants.get_fcv_tag_list()
 
 # Generate evergreen project names for all FCVs less than latest.
 EVERGREEN_PROJECTS = ['mongodb-mongo-master']
-EVERGREEN_PROJECTS.extend([evg_project_str(fcv) for fcv in fcv_constants.fcvs_less_than_latest])
+EVERGREEN_PROJECTS.extend([evg_project_str(fcv) for fcv in version_constants.fcvs_less_than_latest])
 
-OLD_VERSIONS = ["last_lts"]
-if LAST_LTS_FCV != LAST_CONTINUOUS_FCV:
-    OLD_VERSIONS.append("last_continuous")
+OLD_VERSIONS = [
+    LAST_LTS
+] if LAST_CONTINUOUS_FCV == LAST_LTS_FCV or LAST_CONTINUOUS_FCV in version_constants.get_eols(
+) else [LAST_LTS, LAST_CONTINUOUS]
diff --git a/buildscripts/resmokelib/testing/fixtures/shardedcluster.py b/buildscripts/resmokelib/testing/fixtures/shardedcluster.py
index 9bc83f4f062..84574900e34 100644
--- a/buildscripts/resmokelib/testing/fixtures/shardedcluster.py
+++ b/buildscripts/resmokelib/testing/fixtures/shardedcluster.py
@@ -8,6 +8,7 @@ import pymongo
 import pymongo.errors
 
 import buildscripts.resmokelib.testing.fixtures.interface as interface
+import buildscripts.resmokelib.testing.fixtures.external as external
 
 
 class ShardedClusterFixture(interface.Fixture):  # pylint: disable=too-many-instance-attributes
@@ -378,6 +379,53 @@ class ShardedClusterFixture(interface.Fixture):  # pylint: disable=too-many-inst
         client.admin.command({"addShard": connection_string})
 
 
+class ExternalShardedClusterFixture(external.ExternalFixture, ShardedClusterFixture):
+    """Fixture to interact with external sharded cluster fixture."""
+
+    REGISTERED_NAME = "ExternalShardedClusterFixture"
+
+    def __init__(self, logger, job_num, fixturelib, shell_conn_string):
+        """Initialize ExternalShardedClusterFixture."""
+        external.ExternalFixture.__init__(self, logger, job_num, fixturelib, shell_conn_string)
+        ShardedClusterFixture.__init__(self, logger, job_num, fixturelib, mongod_options={})
+
+    def setup(self):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture.setup(self)
+
+    def pids(self):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture.pids(self)
+
+    def await_ready(self):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture.await_ready(self)
+
+    def _do_teardown(self, mode=None):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture._do_teardown(self)
+
+    def _is_process_running(self):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture._is_process_running(self)
+
+    def is_running(self):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture.is_running(self)
+
+    def get_internal_connection_string(self):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture.get_internal_connection_string(self)
+
+    def get_driver_connection_url(self):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture.get_driver_connection_url(self)
+
+    def get_node_info(self):
+        """Use ExternalFixture method."""
+        return external.ExternalFixture.get_node_info(self)
+
+
 class _MongoSFixture(interface.Fixture):
     """Fixture which provides JSTests with a mongos to connect to."""
 
diff --git a/buildscripts/resmokelib/testing/hooks/antithesis_logging.py b/buildscripts/resmokelib/testing/hooks/antithesis_logging.py
new file mode 100644
index 00000000000..49b1a357cc5
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/antithesis_logging.py
@@ -0,0 +1,26 @@
+"""Hook that prints Antithesis commands to be executed in the Antithesis evironment."""
+
+from time import sleep
+from buildscripts.resmokelib.testing.hooks import interface
+
+
+class AntithesisLogging(interface.Hook):  # pylint: disable=too-many-instance-attributes
+    """Prints antithesis commands before & after test run."""
+
+    DESCRIPTION = "Prints antithesis commands before & after test run."
+
+    IS_BACKGROUND = False
+
+    def __init__(self, hook_logger, fixture):
+        """Initialize the AntithesisLogging hook."""
+        interface.Hook.__init__(self, hook_logger, fixture, AntithesisLogging.DESCRIPTION)
+
+    def before_test(self, test, test_report):
+        """Ensure the fault injector is running before a test."""
+        print("ANTITHESIS-COMMAND: Start Fault Injector")
+        sleep(5)
+
+    def after_test(self, test, test_report):
+        """Ensure the fault injector is stopped after a test."""
+        print("ANTITHESIS-COMMAND: Stop Fault Injector")
+        sleep(5)
diff --git a/buildscripts/resmokelib/testing/hooks/jsfile.py b/buildscripts/resmokelib/testing/hooks/jsfile.py
index 66d8c52f59e..4cfa7f5815a 100644
--- a/buildscripts/resmokelib/testing/hooks/jsfile.py
+++ b/buildscripts/resmokelib/testing/hooks/jsfile.py
@@ -77,7 +77,7 @@ class PerClusterDataConsistencyHook(DataConsistencyHook):
                              cluster, cluster.get_driver_connection_url())
             hook_test_case = DynamicJSTestCase.create_after_test(
                 test.logger, test, self, self._js_filename, self._shell_options)
-            hook_test_case.configure(self.fixture)
+            hook_test_case.configure(cluster)
             hook_test_case.run_dynamic_test(test_report)
 
 
diff --git a/buildscripts/resmokelib/testing/hooks/shard_split.py b/buildscripts/resmokelib/testing/hooks/shard_split.py
index d7cb0d05a72..10748f6e1e4 100644
--- a/buildscripts/resmokelib/testing/hooks/shard_split.py
+++ b/buildscripts/resmokelib/testing/hooks/shard_split.py
@@ -473,7 +473,7 @@ class _ShardSplitThread(threading.Thread):  # pylint: disable=too-many-instance-
                 while True:
                     try:
                         res = donor_node_client.config.command({
-                            "count": "tenantSplitDonors",
+                            "count": "shardSplitDonors",
                             "query": {"tenantIds": split_opts.tenant_ids}
                         })
                         if res["n"] == 0:
@@ -502,7 +502,7 @@ class _ShardSplitThread(threading.Thread):  # pylint: disable=too-many-instance-
                 while True:
                     try:
                         res = recipient_node_client.config.command({
-                            "count": "tenantSplitDonors",
+                            "count": "shardSplitDonors",
                             "query": {"tenantIds": split_opts.tenant_ids}
                         })
                         if res["n"] == 0:
diff --git a/buildscripts/resmokelib/testing/hooks/simulate_crash.py b/buildscripts/resmokelib/testing/hooks/simulate_crash.py
index 60e128da306..a0b82a1e98c 100644
--- a/buildscripts/resmokelib/testing/hooks/simulate_crash.py
+++ b/buildscripts/resmokelib/testing/hooks/simulate_crash.py
@@ -107,9 +107,8 @@ class SimulateCrash(bghook.BGHook):
 
             mdb = process.Process(self.logger, [
                 node.mongod_executable, "--dbpath", path, "--port",
-                str(self.validate_port), "--logpath",
-                node.get_dbpath_prefix() + "/simulateCrashes/validate.log", "--setParameter",
-                "enableTestCommands=1", "--setParameter", "testingDiagnosticsEnabled=1"
+                str(self.validate_port), "--setParameter", "enableTestCommands=1", "--setParameter",
+                "testingDiagnosticsEnabled=1"
             ])
             mdb.start()
 
diff --git a/buildscripts/tests/data/errorcodes/regex_matching/regex_matching.cpp b/buildscripts/tests/data/errorcodes/regex_matching/regex_matching.cpp
index 834d2052a98..b45042b4b0f 100644
--- a/buildscripts/tests/data/errorcodes/regex_matching/regex_matching.cpp
+++ b/buildscripts/tests/data/errorcodes/regex_matching/regex_matching.cpp
@@ -27,4 +27,8 @@ LOGV2_ERROR(25,
             "more words");
 LOGV2_ERROR(26,
             "words",
-            "comma, more words words words words words words words words words words words words "
+            "comma, more words words words words words words words words words words words words ");
+iassert(27, "words");
+iasserted(28, "words");
+iassertNoTrace(29, "words");
+iassertedNoTrace(30, "words");
diff --git a/buildscripts/tests/resmokelib/core/test_pipe.py b/buildscripts/tests/resmokelib/core/test_pipe.py
index 69b0e3fb576..6ecece299a7 100644
--- a/buildscripts/tests/resmokelib/core/test_pipe.py
+++ b/buildscripts/tests/resmokelib/core/test_pipe.py
@@ -10,7 +10,7 @@ import mock
 
 from buildscripts.resmokelib.core import pipe as _pipe
 
-# pylint: disable=missing-docstring
+# pylint: disable=missing-docstring,protected-access
 
 
 class TestLoggerPipe(unittest.TestCase):
@@ -43,3 +43,40 @@ class TestLoggerPipe(unittest.TestCase):
     def test_escapes_null_bytes(self):
         calls = self._get_log_calls(b"a\0b")
         self.assertEqual(calls, [mock.call(self.LOG_LEVEL, u"a\\0b")])
+
+
+class TestFormatLineForLogging(unittest.TestCase):
+    def test_strips_trailing_whitespace(self):
+        line = b" a "
+
+        line_output = _pipe.LoggerPipe._format_line_for_logging(line)
+
+        self.assertEqual([u" a"], line_output)
+
+    def test_strips_trailing_newlines(self):
+        line = b"a\r\n"
+
+        line_output = _pipe.LoggerPipe._format_line_for_logging(line)
+
+        self.assertEqual([u"a"], line_output)
+
+    def test_handles_invalid_utf8(self):
+        line = b"a\x80b"
+
+        line_output = _pipe.LoggerPipe._format_line_for_logging(line)
+
+        self.assertEqual([u"a\ufffdb"], line_output)
+
+    def test_escapes_null_bytes(self):
+        line = b"a\0b"
+
+        line_output = _pipe.LoggerPipe._format_line_for_logging(line)
+
+        self.assertEqual([u"a\\0b"], line_output)
+
+    def test_long_lines_are_split(self):
+        line = b"a" * 4_000_000
+
+        line_output = _pipe.LoggerPipe._format_line_for_logging(line)
+
+        self.assertEqual(2, len(line_output))
diff --git a/buildscripts/tests/resmokelib/multiversion/test_multiversion_service.py b/buildscripts/tests/resmokelib/multiversion/test_multiversion_service.py
index 2f685f36c97..274b7e5ffbe 100644
--- a/buildscripts/tests/resmokelib/multiversion/test_multiversion_service.py
+++ b/buildscripts/tests/resmokelib/multiversion/test_multiversion_service.py
@@ -38,6 +38,8 @@ class TestCalculateFcvConstants(TestCase):
                     "100.0"
                 ],
                 "longTermSupportReleases": ["4.0", "4.2", "4.4", "5.0"],
+                "eolVersions":
+                    ["2.0", "2.2", "2.4", "2.6", "3.0", "3.2", "3.4", "3.6", "4.0", "5.1", "5.2"],
             })
 
         multiversion_service = under_test.MultiversionService(
@@ -45,15 +47,15 @@ class TestCalculateFcvConstants(TestCase):
             mongo_releases=mongo_releases,
         )
 
-        fcv_constants = multiversion_service.calculate_fcv_constants()
+        version_constants = multiversion_service.calculate_version_constants()
 
-        self.assertEqual(fcv_constants.latest, Version("6.0"))
-        self.assertEqual(fcv_constants.last_continuous, Version("5.3"))
-        self.assertEqual(fcv_constants.last_lts, Version("5.0"))
-        self.assertEqual(fcv_constants.requires_fcv_tag_list,
+        self.assertEqual(version_constants.latest, Version("6.0"))
+        self.assertEqual(version_constants.last_continuous, Version("5.3"))
+        self.assertEqual(version_constants.last_lts, Version("5.0"))
+        self.assertEqual(version_constants.requires_fcv_tag_list,
                          [Version(v) for v in ["5.1", "5.2", "5.3", "6.0"]])
-        self.assertEqual(fcv_constants.requires_fcv_tag_list_continuous, [Version("6.0")])
-        self.assertEqual(fcv_constants.fcvs_less_than_latest, [
+        self.assertEqual(version_constants.requires_fcv_tag_list_continuous, [Version("6.0")])
+        self.assertEqual(version_constants.fcvs_less_than_latest, [
             Version(v)
             for v in ["4.0", "4.2", "4.4", "4.7", "4.8", "4.9", "5.0", "5.1", "5.2", "5.3"]
         ])
@@ -67,6 +69,8 @@ class TestCalculateFcvConstants(TestCase):
                     "6.1", "100.0"
                 ],
                 "longTermSupportReleases": ["4.0", "4.2", "4.4", "5.0", "6.0"],
+                "eolVersions":
+                    ["2.0", "2.2", "2.4", "2.6", "3.0", "3.2", "3.4", "3.6", "4.0", "5.1", "5.2"],
             })
 
         multiversion_service = under_test.MultiversionService(
@@ -74,15 +78,15 @@ class TestCalculateFcvConstants(TestCase):
             mongo_releases=mongo_releases,
         )
 
-        fcv_constants = multiversion_service.calculate_fcv_constants()
+        version_constants = multiversion_service.calculate_version_constants()
 
-        self.assertEqual(fcv_constants.latest, Version("100.0"))
-        self.assertEqual(fcv_constants.last_continuous, Version("6.1"))
-        self.assertEqual(fcv_constants.last_lts, Version("6.0"))
-        self.assertEqual(fcv_constants.requires_fcv_tag_list,
+        self.assertEqual(version_constants.latest, Version("100.0"))
+        self.assertEqual(version_constants.last_continuous, Version("6.1"))
+        self.assertEqual(version_constants.last_lts, Version("6.0"))
+        self.assertEqual(version_constants.requires_fcv_tag_list,
                          [Version(v) for v in ["6.1", "100.0"]])
-        self.assertEqual(fcv_constants.requires_fcv_tag_list_continuous, [Version("100.0")])
-        self.assertEqual(fcv_constants.fcvs_less_than_latest, [
+        self.assertEqual(version_constants.requires_fcv_tag_list_continuous, [Version("100.0")])
+        self.assertEqual(version_constants.fcvs_less_than_latest, [
             Version(v) for v in
             ["4.0", "4.2", "4.4", "4.7", "4.8", "4.9", "5.0", "5.1", "5.2", "5.3", "6.0", "6.1"]
         ])
diff --git a/buildscripts/tests/resmokelib/testing/fixtures/test_api_adherence.py b/buildscripts/tests/resmokelib/testing/fixtures/test_api_adherence.py
index 7f0e1110a32..81b717bb0fe 100644
--- a/buildscripts/tests/resmokelib/testing/fixtures/test_api_adherence.py
+++ b/buildscripts/tests/resmokelib/testing/fixtures/test_api_adherence.py
@@ -8,6 +8,7 @@ import os
 
 DISALLOWED_ROOT = "buildscripts"
 ALLOWED_IMPORTS = [
+    "buildscripts.resmokelib.testing.fixtures.external",
     "buildscripts.resmokelib.testing.fixtures.interface",
     "buildscripts.resmokelib.testing.fixtures.fixturelib",
     "buildscripts.resmokelib.multiversionconstants",
diff --git a/buildscripts/tests/test_burn_in_tests.py b/buildscripts/tests/test_burn_in_tests.py
index c51f8c60ecf..369131eff60 100644
--- a/buildscripts/tests/test_burn_in_tests.py
+++ b/buildscripts/tests/test_burn_in_tests.py
@@ -4,12 +4,14 @@ from __future__ import absolute_import
 
 import collections
 import datetime
+from io import StringIO
 import os
 import sys
 import subprocess
 import unittest
 
 from mock import Mock, patch, MagicMock
+import yaml
 
 import buildscripts.burn_in_tests as under_test
 from buildscripts.ciconfig.evergreen import parse_evergreen_file, VariantTask
@@ -556,3 +558,19 @@ class TestLocalFileChangeDetector(unittest.TestCase):
         self.assertIn(file_list[2], found_tests)
         self.assertNotIn(file_list[1], found_tests)
         self.assertEqual(2, len(found_tests))
+
+
+class TestYamlBurnInExecutor(unittest.TestCase):
+    @patch('sys.stdout', new_callable=StringIO)
+    def test_found_tasks_should_be_reported_as_yaml(self, stdout):
+        n_tasks = 5
+        n_tests = 3
+        tests_by_task = create_tests_by_task_mock(n_tasks, n_tests)
+
+        yaml_executor = under_test.YamlBurnInExecutor()
+        yaml_executor.execute(tests_by_task)
+
+        yaml_raw = stdout.getvalue()
+        results = yaml.safe_load(yaml_raw)
+        self.assertEqual(n_tasks, len(results["discovered_tasks"]))
+        self.assertEqual(n_tests, len(results["discovered_tasks"][0]["test_list"]))
diff --git a/buildscripts/tests/test_errorcodes.py b/buildscripts/tests/test_errorcodes.py
index 2a9c9ce1e3f..d14aa505a92 100644
--- a/buildscripts/tests/test_errorcodes.py
+++ b/buildscripts/tests/test_errorcodes.py
@@ -26,7 +26,7 @@ class TestErrorcodes(unittest.TestCase):
             captured_error_codes.append(code)
 
         errorcodes.parse_source_files(accumulate_files, TESTDATA_DIR + 'regex_matching/')
-        self.assertEqual(26, len(captured_error_codes))
+        self.assertEqual(30, len(captured_error_codes))
 
     def test_dup_checking(self):
         """Test dup checking."""
diff --git a/docs/building.md b/docs/building.md
index 4ac6b75fcb3..278671124ce 100644
--- a/docs/building.md
+++ b/docs/building.md
@@ -95,16 +95,22 @@ tests, etc):
 SCons Targets
 --------------
 
-The following targets can be named on the scons command line to build
-only certain components:
+The following targets can be named on the scons command line to build and
+install a subset of components:
 
 * `install-mongod`
 * `install-mongos`
-* `install-servers` (includes `mongod` and `mongos`)
-* `install-core` (includes `mongod` and `mongos`)
-* `install-devcore` (includes `mongod`, `mongos`, and `jstestshell` (formerly mongo shell))
-* `install-all`
-
+* `install-core` (includes *only* `mongod` and `mongos`)
+* `install-servers` (includes all server components)
+* `install-devcore` (includes `mongod`, `mongos`, and `jstestshell` (formerly `mongo` shell))
+* `install-all` (includes a complete end-user distribution and tests)
+* `install-all-meta` (absolutely everything that can be built and installed)
+
+***NOTE***: The `install-core` and `install-servers` targets are *not*
+guaranteed to be identical. The `install-core` target will only ever include a
+minimal set of "core" server components, while `install-servers` is intended
+for a functional end-user installation. If you are testing, you should use the
+`install-core` or `install-devcore` targets instead.
 
 Where to find Binaries
 ----------------------
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 35b1e05ba9c..fd5b5ed587d 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -68,7 +68,7 @@ last-continuous:
       test_file: jstests/replsets/sessions_collection_reaping.js
     - ticket: SERVER-58203
       test_file: jstests/sharding/query/pipeline_length_limit.js
-    - ticket: SERVER-56887
+    - ticket: SERVER-65101
       test_file: jstests/sharding/database_versioning_all_commands.js
     - ticket: SERVER-57617
       test_file: jstests/replsets/tenant_migration_recipient_fetches_retryable_writes_oplog_entries.js
@@ -260,6 +260,10 @@ last-continuous:
       test_file: jstests/core/or_to_in.js
     - ticket: SERVER-66422
       test_file: jstests/sharding/resharding_metrics.js
+    - ticket: SERVER-50792
+      test_file: jstests/core/check_shard_index.js
+    - ticket: SERVER-50792
+      test_file: jstests/sharding/refine_collection_shard_key_basic.js
 
   # Tests that should only be excluded from particular suites should be listed under that suite.
   suites:
@@ -291,7 +295,7 @@ last-lts:
       test_file: jstests/replsets/check_replication_hello_response_fields.js
     - ticket: SERVER-50640
       test_file: jstests/core/views/views_all_commands.js
-    - ticket: SERVER-56887
+    - ticket: SERVER-65101
       test_file: jstests/sharding/database_versioning_all_commands.js
     - ticket: SERVER-49988
       test_file: jstests/sharding/hello_response_validation.js
@@ -653,6 +657,10 @@ last-lts:
       test_file: jstests/core/or_to_in.js
     - ticket: SERVER-66422
       test_file: jstests/sharding/resharding_metrics.js
+    - ticket: SERVER-50792
+      test_file: jstests/core/check_shard_index.js
+    - ticket: SERVER-50792
+      test_file: jstests/sharding/refine_collection_shard_key_basic.js
 
   # Tests that should only be excluded from particular suites should be listed under that suite.
   suites:
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 47b91f45840..fddf2ff64ab 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -78,7 +78,7 @@ variables:
     additional_package_targets: archive-mongocryptd archive-mongocryptd-debug msi
     exe: ".exe"
     content_type: application/zip
-    compile_flags: --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include c:/snmp/include" LIBPATH="c:/sasl/lib c:/snmp/lib" -j$(( $(grep -c ^processor /proc/cpuinfo) / 2 )) --win-version-min=win10
+    compile_flags: --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(( $(grep -c ^processor /proc/cpuinfo) / 2 )) --win-version-min=win10
     num_scons_link_jobs_available: 0.5
     python: '/cygdrive/c/python/python37/python.exe'
     ext: zip
@@ -88,6 +88,7 @@ variables:
     jstestfuzz_num_generated_files: 35
     large_distro_name: windows-vsCurrent-large
     test_flags: --excludeWithAnyTags=incompatible_with_windows_tls
+    external_auth_jobs_max: 1
   tasks:
   - name: compile_test_and_package_serial_TG
     distros:
@@ -101,7 +102,7 @@ variables:
   - name: .crypt
     distros:
     - windows-vsCurrent-xlarge
-  - name: .encrypt !.aggregation !.gcm
+  - name: .encrypt !.aggregation !.gcm !.feature_flag_guarded
   - name: external_auth
   - name: external_auth_aws
   - name: external_auth_windows
@@ -114,7 +115,6 @@ variables:
   - name: sasl
   - name: sharding_auth_gen
   - name: sharding_auth_audit_gen
-  - name: snmp
 
 # TODO: SERVER-XYZ remove this duplicate task definition.
 - &enterprise-rhel-70-64-bit-template
@@ -182,7 +182,6 @@ variables:
   - name: search_ssl
   - name: sharding_auth_audit_gen
   - name: sharding_auth_gen
-  - name: snmp
   - name: .stitch
   - name: .crypt
   - name: .publish_crypt
@@ -237,7 +236,7 @@ buildvariants:
   - name: compile_test_and_package_parallel_dbtest_stream_TG
     distros:
     - rhel80-xlarge
-  - name: .aggregation !.encrypt
+  - name: .aggregation !.encrypt !.feature_flag_guarded
   - name: .auth !.audit !.multiversion
   - name: .causally_consistent !.wo_snapshot
   - name: .change_streams !.secondary_reads
@@ -317,8 +316,8 @@ buildvariants:
     distros:
     - rhel80-xlarge
   - name: .aggfuzzer .common
-  - name: aggregation
-  - name: aggregation_auth
+  - name: aggregation !.feature_flag_guarded
+  - name: aggregation_auth !.feature_flag_guarded
   - name: auth_gen
   - name: .misc_js !.sharded
   - name: concurrency_gen
@@ -388,7 +387,7 @@ buildvariants:
     - rhel80-xlarge
   - name: test_api_version_compatibility
   - name: .aggfuzzer !.multiversion
-  - name: .aggregation !.multiversion
+  - name: .aggregation !.multiversion !.feature_flag_guarded
   - name: audit
   - name: .auth !.multiversion
   - name: .causally_consistent !.sharding
@@ -435,7 +434,6 @@ buildvariants:
   - name: sharding_api_version_jscore_passthrough_gen
   - name: .sharding .txns !.multiversion
   - name: .sharding .common !.multiversion
-  - name: snmp
   - name: .stitch
   - name: secondary_reads_passthrough_gen
   - name: server_discovery_and_monitoring_json_test_TG
@@ -508,7 +506,7 @@ buildvariants:
   # tasks we would normally include have been disabled. See revert of
   # SERVER-60832
   #- name: .aggfuzzer
-  #- name: .aggregation !.unwind
+  #- name: .aggregation !.unwind !.feature_flag_guarded
   #- name: audit
   #- name: .auth
   #- name: causally_consistent_jscore_txns_passthrough
@@ -546,7 +544,6 @@ buildvariants:
   #- name: session_jscore_passthrough
   #- name: .sharding .jscore !.wo_snapshot
   #- name: .sharding .common
-  #- name: snmp
   #- name: update_fuzzer_gen
 
 - name: enterprise-rhel-80-64-bit-coverage-clang
@@ -729,7 +726,7 @@ buildvariants:
   - name: compile_build_tools_next_TG
     distros:
     - windows-vsCurrent-xlarge
-  - name: .aggregation !.auth !.encrypt
+  - name: .aggregation !.auth !.encrypt !.feature_flag_guarded
   - name: aggregation_expression_multiversion_fuzzer_gen
   - name: aggregation_expression_optimization_fuzzer_gen
   - name: auth_gen
@@ -785,7 +782,7 @@ buildvariants:
     exe: ".exe"
     additional_package_targets: archive-mongocryptd archive-mongocryptd-debug msi archive-mh archive-mh-debug
     content_type: application/zip
-    compile_flags: --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include c:/snmp/include" LIBPATH="c:/sasl/lib c:/snmp/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5") --win-version-min=win10
+    compile_flags: --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5") --win-version-min=win10
     num_scons_link_jobs_available: 0.25
     python: '/cygdrive/c/python/python37/python.exe'
     ext: zip
@@ -801,6 +798,7 @@ buildvariants:
     push_name: windows
     push_arch: x86_64-enterprise
     test_flags: *windows_common_test_excludes
+    external_auth_jobs_max: 1
   tasks:
   - name: compile_test_and_package_serial_TG
     distros:
@@ -832,8 +830,11 @@ buildvariants:
   - name: mqlrun
   - name: noPassthrough_gen
   - name: noPassthroughWithMongod_gen
-  - name: .replica_sets .common
-  - name: .replica_sets .multi_oplog
+  - name: .replica_sets .common !.ignore_non_generated_replica_sets_jscore_passthrough
+  - name: .replica_sets .multi_oplog !.ignore_non_generated_replica_sets_jscore_passthrough
+  - name: replica_sets_jscore_passthrough
+    distros:
+    - windows-vsCurrent-large
   - name: replica_sets_ese_gen
   - name: sasl
   - name: server_discovery_and_monitoring_json_test_TG
@@ -843,7 +844,6 @@ buildvariants:
   - name: sharding_auth_audit_gen
   - name: sharding_ese_gen
   - name: sharding_opportunistic_secondary_targeting_gen
-  - name: snmp
   - name: unittest_shell_hang_analyzer_gen
 
 - <<: *enterprise-windows-required-template
@@ -855,7 +855,7 @@ buildvariants:
     exe: ".exe"
     additional_package_targets: archive-mongocryptd archive-mongocryptd-debug msi archive-mh archive-mh-debug
     content_type: application/zip
-    compile_flags: --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include c:/snmp/include" LIBPATH="c:/sasl/lib c:/snmp/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5")  --win-version-min=win10
+    compile_flags: --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5")  --win-version-min=win10
     num_scons_link_jobs_available: 0.25
     python: '/cygdrive/c/python/python37/python.exe'
     ext: zip
@@ -872,6 +872,7 @@ buildvariants:
       --runAllFeatureFlagTests
       --excludeWithAnyTags=incompatible_with_windows_tls
       --excludeWithAnyTags=incompatible_with_shard_merge
+    external_auth_jobs_max: 1
   tasks:
   - name: cqf
   - name: cqf_parallel
@@ -913,7 +914,6 @@ buildvariants:
   - name: sharding_auth_gen
   - name: sharding_auth_audit_gen
   - name: sharding_ese_gen
-  - name: snmp
 # Disabling as the following tests are not aware of feature flags.
 #    - name: buildscripts_test
 #    - name: unittest_shell_hang_analyzer_gen
@@ -929,7 +929,7 @@ buildvariants:
   modules:
   - enterprise
   expansions:
-    compile_flags: --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include c:/snmp/include" LIBPATH="c:/sasl/lib c:/snmp/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5")  --win-version-min=win10
+    compile_flags: --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5")  --win-version-min=win10
   tasks:
   - name: compile_ninja_next_TG
     distros:
@@ -949,7 +949,7 @@ buildvariants:
     exe: ".exe"
     additional_package_targets: archive-mongocryptd archive-mongocryptd-debug msi archive-mh archive-mh-debug
     content_type: application/zip
-    compile_flags: --dbg=on --opt=on --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include c:/snmp/include" LIBPATH="c:/sasl/lib c:/snmp/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5") --win-version-min=win10 --cxx-std=20
+    compile_flags: --dbg=on --opt=on --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5") --win-version-min=win10 --cxx-std=20
     num_scons_link_jobs_available: 0.25
     python: '/cygdrive/c/python/python37/python.exe'
     ext: zip
@@ -967,7 +967,7 @@ buildvariants:
     distros:
     - windows-vsCurrent-large
   - name: .aggfuzzer
-  - name: .aggregation !.auth !.encrypt !.unwind
+  - name: .aggregation !.auth !.encrypt !.unwind !.feature_flag_guarded
   - name: auth_gen
   - name: causally_consistent_jscore_txns_passthrough
   - name: .misc_js
@@ -1004,7 +1004,7 @@ buildvariants:
     additional_package_targets: archive-mongocryptd archive-mongocryptd-debug
     exe: ".exe"
     content_type: application/zip
-    compile_flags: --dbg=on --opt=off --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include c:/snmp/include" LIBPATH="c:/sasl/lib c:/snmp/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5") --win-version-min=win10
+    compile_flags: --dbg=on --opt=off --ssl MONGO_DISTMOD=windows CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(bc <<< "$(grep -c '^processor' /proc/cpuinfo) / 1.5") --win-version-min=win10
     num_scons_link_jobs_available: 0.25
     python: '/cygdrive/c/python/python37/python.exe'
     ext: zip
@@ -1043,7 +1043,7 @@ buildvariants:
   tasks:
   - name: compile_test_and_package_serial_TG
   - name: compile_build_tools_next_TG
-  - name: aggregation
+  - name: aggregation !.feature_flag_guarded
   - name: auth_gen
   - name: causally_consistent_jscore_txns_passthrough
   - name: disk_wiredtiger
@@ -1264,7 +1264,7 @@ buildvariants:
   - name: check_feature_flag_tags
   - name: check_for_todos
   - name: .aggfuzzer
-  - name: .aggregation
+  - name: .aggregation !.feature_flag_guarded
   - name: audit
   - name: .auth
   - name: burn_in_tags_gen
@@ -1344,7 +1344,6 @@ buildvariants:
   - name: .sharding .txns
   - name: .sharding .common
   - name: sharding_opportunistic_secondary_targeting_gen
-  - name: snmp
   - name: .stitch
   - name: .crypt
     distros:
@@ -1495,7 +1494,6 @@ buildvariants:
   - name: .sharding .txns
   - name: .sharding .common
   - name: sharded_multi_stmt_txn_jscore_passthrough
-  - name: snmp
   - name: .stitch
   - name: .crypt
     distros:
@@ -1541,7 +1539,7 @@ buildvariants:
       --mongodSetParameters="{internalQueryForceClassicEngine: true}"
   tasks:
   - name: .aggfuzzer
-  - name: .aggregation
+  - name: .aggregation !.sbe_only
   - name: .auth
   - name: .causally_consistent !.sharding
   - name: .change_stream_fuzzer
@@ -1629,7 +1627,6 @@ buildvariants:
     - rhel80-xlarge
   - name: session_jscore_passthrough
   - name: sharding_api_version_jscore_passthrough_gen
-  - name: snmp
   - name: test_api_version_compatibility
   - name: unittest_shell_hang_analyzer_gen
 
@@ -1896,7 +1893,6 @@ buildvariants:
   - name: search_ssl
   - name: sharding_auth_audit_gen
   - name: sharding_auth_gen
-  - name: snmp
   - name: .stitch
   - name: test_packages
     distros:
@@ -1986,9 +1982,45 @@ buildvariants:
   - name: search_ssl
   - name: sharding_auth_audit_gen
   - name: sharding_auth_gen
-  - name: snmp
   - name: .stitch
 
+- name: ubuntu1804-sbe-yielding-debug
+  display_name: "Shared Library Ubuntu 18.04 DEBUG Yielding Support for SBE"
+  cron: "0 12 * * *" # Every day starting at 12:00
+  run_on:
+  - ubuntu1804-test
+  expansions:
+    resmoke_jobs_factor: 0.5  # Avoid starting too many mongod's
+    compile_flags: --dbg=on --opt=on -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars --link-model=dynamic
+    scons_cache_scope: shared
+    scons_cache_mode: all
+    num_scons_link_jobs_available: 0.99
+    large_distro_name: ubuntu1804-large
+    test_flags: >-
+      --mongodSetParameters="{maintainValidCursorsAcrossSBEYieldandReadCommands: true, featureFlagSbeFull: true}"
+  tasks:
+  - name: compile_test_and_package_parallel_core_stream_TG
+    distros:
+    - ubuntu1804-xlarge
+  - name: compile_test_and_package_parallel_unittest_stream_TG
+    distros:
+    - ubuntu1804-xlarge
+  - name: compile_test_and_package_parallel_dbtest_stream_TG
+    distros:
+    - ubuntu1804-xlarge
+  - name: jsCore
+  - name: .aggregation !.encrypt !.feature_flag_guarded
+  - name: replica_sets_jscore_passthrough
+  - name: sharded_collections_jscore_passthrough
+  - name: sharding_gen
+  - name: noPassthrough_gen
+  - name: noPassthroughWithMongod_gen
+  # Exclude concurrency tasks which are ubsan-specific, test interaction between causal consistency
+  # and transactions, or involve stepdowns/termination. These should not have any particularly
+  # interesting interaction with the new yielding behavior and may exceed the standard testing time
+  # limits.
+  - name: .concurrency !.ubsan !.no_txns !.stepdowns !.kill_terminate
+
 - name: ubuntu1804-container
   display_name: "Ubuntu 18.04 Container"
   cron: "0 4 * * *" # From the ${project_nightly_cron} parameter.
@@ -2139,7 +2171,7 @@ buildvariants:
     compile_flags: --ssl MONGO_DISTMOD=rhel80 -j$(grep -c ^processor /proc/cpuinfo) --variables-files=etc/scons/mongodbtoolchain_v3_gcc.vars
     scons_cache_scope: shared
   tasks:
-  - name: compile_benchmarks
+  - name: compile_upload_benchmarks_TG
   - name: .benchmarks
 
 
@@ -2162,7 +2194,9 @@ buildvariants:
     lang_environment: LANG=C
     # If you add anything to san_options, make sure the appropriate changes are
     # also made to SConstruct.
-    san_options: LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/${toolchain_version}/bin/llvm-symbolizer" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/${toolchain_version}/bin/llvm-symbolizer"
+    san_options: >-
+      LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/${toolchain_version}/bin/llvm-symbolizer"
+      ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:strict_string_checks=true:detect_invalid_pointer_pairs=1:external_symbolizer_path=/opt/mongodbtoolchain/${toolchain_version}/bin/llvm-symbolizer"
     compile_flags: --variables-files=etc/scons/mongodbtoolchain_${toolchain_version}_clang.vars --dbg=on --opt=on --allocator=system --sanitize=address --ssl --ocsp-stapling=off --enable-free-mon=on -j$(grep -c ^processor /proc/cpuinfo)
     multiversion_platform: rhel80
     multiversion_edition: enterprise
@@ -2181,7 +2215,6 @@ buildvariants:
   - name: cqf
   - name: cqf_parallel
   - name: compile_and_archive_dist_test_then_package_TG
-  - name: compile_benchmarks
   - name: .aggregation
   #  - name: .auth
   - name: audit
@@ -2215,13 +2248,12 @@ buildvariants:
   - name: session_jscore_passthrough
   - name: .sharding .jscore !.wo_snapshot
   - name: .sharding .common !.csrs
-  - name: snmp
   - name: .serverless
   - name: .watchdog
   - name: .stitch
   - name: .shard_split
 # Disabling the following tests as they are not aware of feature flags.
-#  - name: .benchmarks
+#  - name: .benchmarks - building benchmarks is also disabled
 #  - name: unittest_shell_hang_analyzer_gen
 #  - name: server_discovery_and_monitoring_json_test_TG
 #  - name: server_selection_json_test_TG
@@ -2245,7 +2277,9 @@ buildvariants:
     lang_environment: LANG=C
     # If you add anything to san_options, make sure the appropriate changes are
     # also made to SConstruct.
-    san_options: LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+    san_options: >-
+      LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+      ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:strict_string_checks=true:detect_invalid_pointer_pairs=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
     compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --dbg=on --opt=on --allocator=system --sanitize=address --ssl --ocsp-stapling=off --enable-free-mon=on -j$(grep -c ^processor /proc/cpuinfo)
     test_flags: >-
       --mongodSetParameters="{internalQueryForceClassicEngine: true}"
@@ -2259,9 +2293,8 @@ buildvariants:
     separate_debug: off
     large_distro_name: rhel80-build
   tasks:
-  - name: compile_test_and_package_serial_TG
-  - name: compile_benchmarks
-  - name: .aggregation
+  - name: compile_test_benchmark_and_package_serial_TG
+  - name: .aggregation !.sbe_only
   - name: .auth
   - name: audit
   - name: .benchmarks
@@ -2296,7 +2329,6 @@ buildvariants:
   - name: session_jscore_passthrough
   - name: .sharding .jscore !.wo_snapshot
   - name: .sharding .common !.csrs
-  - name: snmp
   - name: .watchdog
   - name: .stitch
   - name: .serverless
@@ -2338,7 +2370,6 @@ buildvariants:
   - name: cqf
   - name: cqf_parallel
   - name: compile_and_archive_dist_test_then_package_TG
-  - name: compile_benchmarks
   - name: .aggregation
   - name: .auth
   - name: audit
@@ -2370,14 +2401,13 @@ buildvariants:
   - name: session_jscore_passthrough
   - name: .sharding .jscore !.wo_snapshot
   - name: .sharding .common !.csrs
-  - name: snmp
   - name: .stitch
   - name: .serverless
   - name: .updatefuzzer
   - name: watchdog_wiredtiger
   - name: .shard_split
 # Disabling the following tests as they are not aware of feature flags.
-#  - name: .benchmarks
+#  - name: .benchmarks - building benchmarks is also disabled
 #  - name: server_discovery_and_monitoring_json_test_TG
 #  - name: server_selection_json_test_TG
 
@@ -2412,9 +2442,8 @@ buildvariants:
     separate_debug: off
     large_distro_name: rhel80-build
   tasks:
-  - name: compile_test_and_package_serial_TG
-  - name: compile_benchmarks
-  - name: .aggregation
+  - name: compile_test_benchmark_and_package_serial_TG
+  - name: .aggregation !.sbe_only
   - name: .auth
   - name: audit
   - name: .benchmarks
@@ -2446,7 +2475,6 @@ buildvariants:
   - name: session_jscore_passthrough
   - name: .sharding .jscore !.wo_snapshot
   - name: .sharding .common !.csrs
-  - name: snmp
   - name: .stitch
   - name: .updatefuzzer
   - name: .serverless
@@ -2468,7 +2496,10 @@ buildvariants:
     lang_environment: LANG=C
     # If you add anything to san_options, make sure the appropriate changes are
     # also made to SConstruct.
-    san_options: UBSAN_OPTIONS="print_stacktrace=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+    san_options: >-
+      UBSAN_OPTIONS="print_stacktrace=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+      LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1"
+      ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:strict_string_checks=true:detect_invalid_pointer_pairs=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
     compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --dbg=on --opt=on --allocator=system --sanitize=undefined,address --ssl --ocsp-stapling=off -j$(grep -c ^processor /proc/cpuinfo) --link-model=dynamic
     test_flags: --excludeWithAnyTags=requires_ocsp_stapling
     resmoke_jobs_factor: 0.3  # Avoid starting too many mongod's under {A,UB}SAN build.
@@ -2503,7 +2534,10 @@ buildvariants:
     lang_environment: LANG=C
     # If you add anything to san_options, make sure the appropriate changes are
     # also made to SConstruct.
-    san_options: UBSAN_OPTIONS="print_stacktrace=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+    san_options: >-
+      UBSAN_OPTIONS="print_stacktrace=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+      LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1"
+      ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:strict_string_checks=true:detect_invalid_pointer_pairs=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
     compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --dbg=on --opt=on --allocator=system --sanitize=undefined,address --ssl --ocsp-stapling=off -j$(grep -c ^processor /proc/cpuinfo) --link-model=dynamic
     # To force disable feature flags even on the all feature flags variant, please use this file:
     # buildscripts/resmokeconfig/fully_disabled_feature_flags.yml
@@ -2644,7 +2678,7 @@ buildvariants:
   - name: compile_test_and_package_parallel_dbtest_stream_TG
     distros:
     - rhel80-build
-  - name: .aggregation !.no_async
+  - name: .aggregation !.no_async !.feature_flag_guarded
   - name: .sharding .auth
   - name: .sharding .causally_consistent !.wo_snapshot
   - name: .concurrency .common !.kill_terminate
@@ -2685,7 +2719,7 @@ buildvariants:
   - name: compile_test_and_package_parallel_dbtest_stream_TG
     distros:
     - rhel80-build
-  - name: .aggregation !.no_async
+  - name: .aggregation !.no_async !.feature_flag_guarded
   - name: .sharding .auth
   - name: .sharding .causally_consistent !.wo_snapshot
   - name: .concurrency .common !.kill_terminate
@@ -2722,7 +2756,7 @@ buildvariants:
   - name: compile_test_and_package_parallel_dbtest_stream_TG
     distros:
     - rhel80-build
-  - name: .aggregation !.no_async
+  - name: .aggregation !.no_async !.feature_flag_guarded
   - name: .sharding .auth
   - name: .sharding .causally_consistent !.wo_snapshot
   - name: .concurrency .common !.kill_terminate
@@ -2760,7 +2794,7 @@ buildvariants:
   - name: compile_test_and_package_parallel_dbtest_stream_TG
     distros:
     - rhel80-build
-  - name: .aggregation !.no_async
+  - name: .aggregation !.no_async !.feature_flag_guarded
   - name: .sharding .auth
   - name: .sharding .causally_consistent !.wo_snapshot
   - name: .concurrency .common !.kill_terminate
@@ -2798,7 +2832,7 @@ buildvariants:
   - name: compile_test_and_package_parallel_dbtest_stream_TG
     distros:
     - rhel80-build
-  - name: .aggregation !.no_async
+  - name: .aggregation !.no_async !.feature_flag_guarded
   - name: .sharding .auth
   - name: .sharding .causally_consistent !.wo_snapshot
   - name: .concurrency .common !.kill_terminate
@@ -2836,7 +2870,7 @@ buildvariants:
   - name: compile_test_and_package_parallel_dbtest_stream_TG
     distros:
     - rhel80-build
-  - name: .aggregation !.no_async
+  - name: .aggregation !.no_async !.feature_flag_guarded
   - name: .sharding .auth
   - name: .sharding .causally_consistent !.wo_snapshot
   - name: .concurrency .common !.kill_terminate
@@ -2867,7 +2901,7 @@ buildvariants:
     distros:
     - rhel80-build
     patch_only: true
-  - name: .aggregation !.no_async
+  - name: .aggregation !.no_async !.feature_flag_guarded
     patch_only: true
   - name: .sharding .auth
     patch_only: true
@@ -2935,7 +2969,7 @@ buildvariants:
   expansions:
     # --link-model=dynamic is set by visiblity_test's task_compile_flags
     # expansion
-    compile_flags: CPPPATH="c:/sasl/include c:/snmp/include" LIBPATH="c:/sasl/lib c:/snmp/lib" -j$(( $(grep -c ^processor /proc/cpuinfo) / 2 )) --win-version-min=win10
+    compile_flags: CPPPATH="c:/sasl/include" LIBPATH="c:/sasl/lib" -j$(( $(grep -c ^processor /proc/cpuinfo) / 2 )) --win-version-min=win10
     python: '/cygdrive/c/python/python37/python.exe'
     ext: zip
     has_packages: false
@@ -2977,9 +3011,33 @@ buildvariants:
       --excludeWithAnyTags=resource_intensive
       --excludeWithAnyTags=incompatible_with_shard_merge
 
+- <<: *enterprise-rhel-80-64-bit-dynamic-all-feature-flags-required-template
+  name: enterprise-rhel-80-64-bit-dynamic-all-feature-flags-required-security-patch-only
+  display_name: "~ Shared Library Enterprise RHEL 8.0 Security Patch Only (all feature flags)"
+  cron: "0 4 * * 0" # From the ${project_weekly_cron} parameter # This is a patch-only variant but we run on mainline to pick up task history.
+  expansions:
+    <<: *enterprise-rhel-80-64-bit-dynamic-all-feature-flags-expansions
+    target_resmoke_time: 15
+    max_sub_suites: 15
+  tasks:
+  - name: .audit .patch_build
+  - name: .sasl .patch_build
+  - name: .encrypt .patch_build
+  - name: .lint
+  - name: lint_fuzzer_sanity_patch
+  - name: compile_test_and_package_parallel_core_stream_TG
+    distros:
+      - rhel80-xlarge
+# Disable unittests for now since they're not aware of feature flags.
+#  - name: compile_test_and_package_parallel_unittest_stream_TG
+#    distros:
+#      - rhel80-xlarge
+#  - name: compile_test_and_package_parallel_dbtest_stream_TG
+#    distros:
+#      - rhel80-xlarge
+
 - name: enterprise-ubuntu1804-64-libvoidstar
   display_name: ~ Enterprise Ubuntu 18.04 w/ libvoidstar
-  cron: "0 4 * * FRI" # Every week at 0400 UTC Friday. This has to be a Friday since we run Antithesis on Fridays.
   modules:
   - enterprise
   run_on:
@@ -2993,11 +3051,10 @@ buildvariants:
     multiversion_edition: enterprise
     repo_edition: enterprise
     large_distro_name: ubuntu1804-build
-    use_scons_cache: false
-    scons_cache_scope: "none"
+    scons_cache_scope: shared
   tasks:
   - name: compile_and_archive_dist_test_TG
-  - name: .antithesis
+  - name: antithesis_image_push
   - name: generate_buildid_to_debug_symbols_mapping
 
 - <<: *enterprise-windows-nopush-template
@@ -3005,7 +3062,7 @@ buildvariants:
   display_name: "~ Enterprise Windows (Benchmarks)"
   cron: "0 4 * * *" # From the ${project_nightly_cron} parameter.
   tasks:
-  - name: compile_benchmarks
+  - name: compile_upload_benchmarks_TG
     distros:
     - windows-vsCurrent-large
   - name: .benchmarks !benchmarks_orphaned
@@ -3046,7 +3103,10 @@ buildvariants:
     toolchain_version: v3
     # If you add anything to san_options, make sure the appropriate changes are
     # also made to SConstruct.
-    san_options: UBSAN_OPTIONS="print_stacktrace=1:external_symbolizer_path=/opt/mongodbtoolchain/${toolchain_version}/bin/llvm-symbolizer" LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/${toolchain_version}/bin/llvm-symbolizer"
+    san_options: >-
+      UBSAN_OPTIONS="print_stacktrace=1:external_symbolizer_path=/opt/mongodbtoolchain/${toolchain_version}/bin/llvm-symbolizer"
+      LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1"
+      ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:strict_string_checks=true:detect_invalid_pointer_pairs=1:external_symbolizer_path=/opt/mongodbtoolchain/${toolchain_version}/bin/llvm-symbolizer"
     compile_flags: LINKFLAGS=-nostdlib++ LIBS=stdc++ --variables-files=etc/scons/mongodbtoolchain_${toolchain_version}_clang.vars --dbg=on --opt=on --allocator=system --sanitize=undefined,address,fuzzer --ssl --ocsp-stapling=off -j$(grep -c ^processor /proc/cpuinfo)
     test_flags: --excludeWithAnyTags=requires_ocsp_stapling
     resmoke_jobs_factor: 0.3  # Avoid starting too many mongod's under {A,UB}SAN build.
diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml
index 289e48cbcc9..e03094af84d 100644
--- a/etc/evergreen_yml_components/definitions.yml
+++ b/etc/evergreen_yml_components/definitions.yml
@@ -1,6 +1,4 @@
 ####################################################
-#               Static Definitions                 #
-####################################################
 # This file contains static Evergreen config definitions used by other YAML files.
 # It is not run directly by any Evergreen project.
 #
@@ -110,7 +108,7 @@ variables:
 - &benchmark_template
   name: benchmark_template
   depends_on:
-  - name: compile_benchmarks
+  - name: compile_upload_benchmarks
   commands:
   - func: "do benchmark setup"
   - func: "run tests"
@@ -118,7 +116,7 @@ variables:
       resmoke_args: --help
       resmoke_jobs_max: 1
   - func: "send benchmark results"
-  - func: "analyze benchmark results"
+#  - func: "analyze benchmark results"
     vars:
       suite: benchmark_suite
 
@@ -498,12 +496,19 @@ functions:
       args:
       - "src/evergreen/functions/binaries_extract.sh"
 
-  "check binary version": &check_binary_version
-    command: subprocess.exec
+  "get version expansions": &get_version_expansions
+    command: s3.get
     params:
-      binary: bash
-      args:
-      - "src/evergreen/functions/binary_version_check.sh"
+      aws_key: ${aws_key}
+      aws_secret: ${aws_secret}
+      remote_file: ${project}/${version_id}/version_expansions.yml
+      bucket: mciuploads
+      local_file: src/version_expansions.yml
+
+  "apply version expansions": &apply_version_expansions
+    command: expansions.update
+    params:
+      file: src/version_expansions.yml
 
   "fetch benchmarks": &fetch_benchmarks
     command: s3.get
@@ -783,7 +788,7 @@ functions:
       remote_file: ${project}/${build_variant}/${revision}/pip-requirements-${task_id}-${execution}.txt
       bucket: mciuploads
       permissions: public-read
-      content_type: atext-plain
+      content_type: text/plain
       display_name: Pip Requirements
 
   "send benchmark results":
@@ -834,7 +839,6 @@ functions:
   - *adjust_venv
   - *fetch_binaries
   - *extract_binaries
-  - *check_binary_version
   - *get_buildnumber
   - *f_expansions_write
   - *set_up_credentials
@@ -1177,6 +1181,8 @@ functions:
       - "src/evergreen/scons_lint.sh"
 
   "scons compile":
+  - *get_version_expansions
+  - *apply_version_expansions
   - *f_expansions_write
   - command: subprocess.exec
     type: test
@@ -1185,6 +1191,24 @@ functions:
       args:
       - "src/evergreen/scons_compile.sh"
 
+  "generate version expansions":
+  - *f_expansions_write
+  - command: subprocess.exec
+    params:
+      binary: bash
+      args:
+      - "src/evergreen/functions/version_expansions_generate.sh"
+  - command: s3.put
+    params:
+      aws_key: ${aws_key}
+      aws_secret: ${aws_secret}
+      local_file: src/version_expansions.yml
+      remote_file: ${project}/${version_id}/version_expansions.yml
+      bucket: mciuploads
+      permissions: public-read
+      content_type: application/x-yaml
+      display_name: version expansions
+
   "generate compile expansions":
   - *f_expansions_write
   - command: subprocess.exec
@@ -2045,9 +2069,29 @@ functions:
       remote_file: ${project}/${build_variant}/${revision}/local-resmoke-invocation-${task_id}-${execution}.txt
       bucket: mciuploads
       permissions: public-read
-      content_type: atext-plain
+      content_type: text/plain
       display_name: Resmoke.py Invocation for Local Usage
 
+  "antithesis image build":
+  - command: subprocess.exec
+    params:
+      binary: bash
+      args:
+      - "./src/evergreen/antithesis_image_build.sh"
+
+  "antithesis image push":
+  - command: subprocess.exec
+    params:
+      binary: bash
+      args:
+      - "./src/evergreen/antithesis_image_push.sh"
+
+  "antithesis dry run":
+  - command: subprocess.exec
+    params:
+      binary: bash
+      args:
+      - "./src/evergreen/antithesis_dry_run.sh"
 
 # Pre task steps
 pre:
@@ -2094,7 +2138,9 @@ tasks:
 ## compile - build all scons targets except unittests ##
 - name: compile_dist_test
   tags: []
-  depends_on: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - func: "scons compile"
     vars:
@@ -2104,6 +2150,37 @@ tasks:
       task_compile_flags: >-
         PREFIX=dist-test
 
+- name: compile_upload_benchmarks
+  tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
+  commands:
+  - func: "scons compile"
+    vars:
+      targets: install-benchmarks
+      compiling_for_test: true
+  - command: archive.targz_pack
+    params:
+      target: "benchmarks.tgz"
+      source_dir: "src"
+      include:
+      - "./build/benchmarks.txt"
+      - "./build/**_bm"
+      - "./build/**_bm.gcno"
+      - "./build/**_bm.exe"
+      - "./build/**_bm.pdb"
+  - command: s3.put
+    params:
+      aws_key: ${aws_key}
+      aws_secret: ${aws_secret}
+      local_file: benchmarks.tgz
+      remote_file: ${project}/${build_variant}/${revision}/benchmarks/${build_id}.tgz
+      bucket: mciuploads
+      permissions: public-read
+      content_type: application/tar
+      display_name: Benchmarks
+
 - name: determine_patch_tests
   commands:
   - *f_expansions_write
@@ -2119,7 +2196,6 @@ tasks:
   depends_on:
   - name: compile_dist_test
   commands:
-  - *f_expansions_write
   - func: "scons compile"
     vars:
       targets: >-
@@ -2175,7 +2251,6 @@ tasks:
       - "./etc/repo_config.yaml"
       - "./etc/scons/**"
       - "buildscripts/**"
-      - "compile_expansions.yml"
       - "all_feature_flags.txt"  # Must correspond to the definition in buildscripts/idl/lib.py.
       - "jstests/**"
       - "patch_files.txt"
@@ -2240,6 +2315,9 @@ tasks:
 
 - name: compile_ninja
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - func: "scons compile"
     vars:
@@ -2260,6 +2338,9 @@ tasks:
 
 - name: compile_ninja_next
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - func: "scons compile"
     vars:
@@ -2281,6 +2362,9 @@ tasks:
 
 - name: compile_build_tools_next
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - func: "scons compile"
     vars:
@@ -2291,6 +2375,9 @@ tasks:
 
 - name: libdeps_graph_linting
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - *f_expansions_write
   - command: subprocess.exec
@@ -2303,6 +2390,7 @@ tasks:
     vars:
       task_compile_flags: >-
         --link-model=dynamic
+        --force-macos-dynamic-link
         --build-tools=next
       targets:
         generate-libdeps-graph
@@ -2350,8 +2438,10 @@ tasks:
 - name: clang_tidy
   tags: []
   exec_timeout_secs: 3600 # 1 hour timeout for the task overall
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
-  - *f_expansions_write
   - func: "scons compile"
     vars:
       targets: generated-sources compiledb
@@ -2398,7 +2488,7 @@ tasks:
 ## run_unittests with UndoDB live-record ##
 #- name: run_unittests_with_recording
 #  depends_on:
-#    - name: compile_unittests_for_recorded_unittest
+#  - name: compile_unittests_for_recorded_unittest
 #  commands:
 #    - *f_expansions_write
 #    - func: "run diskstats"
@@ -2427,6 +2517,9 @@ tasks:
 ##compile_and_archive_libfuzzertests - build libfuzzertests ##
 - name: compile_and_archive_libfuzzertests
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - func: "scons compile"
     vars:
@@ -2457,6 +2550,9 @@ tasks:
 
 - name: server_discovery_and_monitoring_json_test
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - func: "scons compile"
     vars:
@@ -2469,6 +2565,9 @@ tasks:
 
 - name: server_selection_json_test
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - func: "scons compile"
     vars:
@@ -2516,6 +2615,9 @@ tasks:
 
 - name: compile_visibility_test
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - func: "scons compile"
     vars:
@@ -2525,6 +2627,7 @@ tasks:
         --dbg=on
         --opt=on
         --link-model=dynamic
+        --force-macos-dynamic-link
         --visibility-support=on
 
   - command: s3.put
@@ -2543,7 +2646,12 @@ tasks:
 
 - name: embedded_sdk_build_cdriver
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
+  - *get_version_expansions
+  - *apply_version_expansions
   - func: f_expansions_write
   - command: subprocess.exec
     params:
@@ -2578,6 +2686,8 @@ tasks:
   depends_on:
   - name: embedded_sdk_install_dev
   commands:
+  - *get_version_expansions
+  - *apply_version_expansions
   - *f_expansions_write
   - command: subprocess.exec
     params:
@@ -2610,6 +2720,7 @@ tasks:
       task_compile_flags: *embedded_sdk_compile_flags
       task_compile_flags_extra: >-
         --link-model=dynamic
+        --force-macos-dynamic-link
       # Unlike static builds, dynamic builds have no need to
       # constrain the number of link jobs. Unfortunately, --jlink=1
       # means one link job, not 100%. So this is a bit gross but set
@@ -2621,6 +2732,8 @@ tasks:
   depends_on:
   - name: embedded_sdk_install_tests
   commands:
+  - *get_version_expansions
+  - *apply_version_expansions
   - *f_expansions_write
   - command: subprocess.exec
     type: test
@@ -2647,6 +2760,8 @@ tasks:
   depends_on:
   - name: embedded_sdk_install_tests
   commands:
+  - *get_version_expansions
+  - *apply_version_expansions
   - *f_expansions_write
   - command: subprocess.exec
     type: test
@@ -2665,6 +2780,8 @@ tasks:
   depends_on:
   - name: embedded_sdk_run_tests
   commands:
+  - *get_version_expansions
+  - *apply_version_expansions
   # A second put, this time to -latest, to give devs a reasonable
   # way to get the most recent build.
   - command: s3.put
@@ -2684,6 +2801,8 @@ tasks:
   depends_on:
   - name: embedded_sdk_run_tests
   commands:
+  - *get_version_expansions
+  - *apply_version_expansions
   # A second put, this time to -latest, to give devs a reasonable
   # way to get the most recent build.
   - command: s3.put
@@ -2700,6 +2819,9 @@ tasks:
 
 - name: stitch_support_create_lib
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - *f_expansions_write
   - func: "scons compile"
@@ -2750,6 +2872,8 @@ tasks:
   depends_on:
   - name: stitch_support_install_tests
   commands:
+  - func: "get version expansions"
+  - func: "apply version expansions"
   - *f_expansions_write
   - command: subprocess.exec
     type: test
@@ -2760,6 +2884,9 @@ tasks:
 
 - name: crypt_create_lib
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - *f_expansions_write
   - func: "scons compile"
@@ -2793,6 +2920,9 @@ tasks:
 
 - name: crypt_create_debug_lib
   tags: []
+  depends_on:
+  - name: version_expansions_gen
+    variant: generate-tasks-for-version
   commands:
   - *f_expansions_write
   - func: "scons compile"
@@ -2863,6 +2993,8 @@ tasks:
   depends_on:
   - name: crypt_install_tests
   commands:
+  - func: "get version expansions"
+  - func: "apply version expansions"
   - *f_expansions_write
   - command: subprocess.exec
     type: test
@@ -2871,52 +3003,6 @@ tasks:
       args:
       - "src/evergreen/crypt_run_tests.sh"
 
-- name: compile_benchmarks
-  tags: []
-  depends_on: []
-  commands:
-  - command: manifest.load
-  - func: "git get project and add git tag"
-  - *f_expansions_write
-  - *kill_processes
-  - *cleanup_environment
-  - func: "set up venv"
-  - func: "upload pip requirements"
-  - func: "get buildnumber"
-  - func: "f_expansions_write"
-  - func: "set up credentials"
-  - func: "use WiredTiger develop" # noop if ${use_wt_develop} is not "true"
-  - func: "set up win mount script"
-  - func: "generate compile expansions"
-  # Then we load the generated version data into the agent so we can use it in task definitions
-  - func: "apply compile expansions"
-  - func: "scons compile"
-    vars:
-      targets: install-benchmarks
-      compiling_for_test: true
-  - func: "attach scons logs"
-  - func: "send scons cedar report"
-  - command: archive.targz_pack
-    params:
-      target: "benchmarks.tgz"
-      source_dir: "src"
-      include:
-      - "./build/benchmarks.txt"
-      - "./build/**_bm"
-      - "./build/**_bm.gcno"
-      - "./build/**_bm.exe"
-      - "./build/**_bm.pdb"
-  - command: s3.put
-    params:
-      aws_key: ${aws_key}
-      aws_secret: ${aws_secret}
-      local_file: benchmarks.tgz
-      remote_file: ${project}/${build_variant}/${revision}/benchmarks/${build_id}.tgz
-      bucket: mciuploads
-      permissions: public-read
-      content_type: application/tar
-      display_name: Benchmarks
-
 ## lint ##
 - name: lint_pylinters
   tags: ["lint"]
@@ -3136,7 +3222,7 @@ tasks:
       exec_timeout_secs: 14400  # 4 hour timeout.
       resmoke_jobs_max: 1
   - func: "send benchmark results"
-  - func: "analyze benchmark results"
+#  - func: "analyze benchmark results"
     vars:
       suite: benchmarks
 
@@ -3150,7 +3236,7 @@ tasks:
       suite: benchmarks_sharding
       resmoke_jobs_max: 1
   - func: "send benchmark results"
-  - func: "analyze benchmark results"
+#  - func: "analyze benchmark results"
 
 # Disabled under SERVER-64949.
 # - <<: *benchmark_template
@@ -4131,8 +4217,19 @@ tasks:
   - func: "run tests"
 
 - <<: *task_template
+  name: aggregation_column_store_index_passthrough
+  # TODO SERVER-61644 Remove the 'feature_flag_guarded' tag and ensure this suite is run on all
+  # variants.
+  tags: ["aggregation", "sbe_only", "feature_flag_guarded"]
+  depends_on:
+  - name: aggregation
+  commands:
+  - func: "do setup"
+  - func: "run tests"
+
+- <<: *task_template
   name: audit
-  tags: []
+  tags: ["audit", "patch_build"]
   commands:
   - func: "do setup"
   - func: "run tests"
@@ -4175,9 +4272,21 @@ tasks:
   - func: "upload pip requirements"
   - func: "generate version"
 
+- name: version_expansions_gen
+  commands:
+  - command: manifest.load
+  - *git_get_project
+  - *f_expansions_write
+  - *add_git_tag
+  - *kill_processes
+  - *cleanup_environment
+  - func: "set up venv"
+  - func: "upload pip requirements"
+  - func: "generate version expansions"
+
 - <<: *gen_task_template
   name: auth_audit_gen
-  tags: ["auth", "audit"]
+  tags: ["auth", "audit", "patch_build"]
   commands:
   - func: "generate resmoke tasks"
 
@@ -4401,7 +4510,7 @@ tasks:
 
 - <<: *task_template
   name: ese
-  tags: ["encrypt"]
+  tags: ["encrypt", "patch_build"]
   commands:
   - func: "do setup"
   - func: "run tests"
@@ -4525,9 +4634,7 @@ tasks:
       - "src/evergreen/external_auth_pip.sh"
   - func: "run tests"
     vars:
-      # TODO SERVER-64323
-      # restore concurrency for this suite when the issue on windows is resolved.
-      resmoke_jobs_max: 1
+      resmoke_jobs_max: ${external_auth_jobs_max|4}
       resmoke_args: --excludeWithAnyTags=requires_domain_controller
 
 - <<: *task_template
@@ -5622,7 +5729,7 @@ tasks:
 
 - <<: *task_template
   name: sasl
-  tags: []
+  tags: ["sasl", "patch_build"]
   commands:
   - func: "do setup"
   - func: "run tests"
@@ -5746,7 +5853,7 @@ tasks:
 
 - <<: *gen_task_template
   name: ssl_gen
-  tags: ["encrypt", "ssl"]
+  tags: ["encrypt", "ssl", "patch_build"]
   commands:
   - func: "generate resmoke tasks"
     vars:
@@ -5754,7 +5861,7 @@ tasks:
 
 - <<: *gen_task_template
   name: sslSpecial_gen
-  tags: ["encrypt", "ssl"]
+  tags: ["encrypt", "ssl", "patch_build"]
   commands:
   - func: "generate resmoke tasks"
     vars:
@@ -5762,7 +5869,7 @@ tasks:
 
 - <<: *gen_task_template
   name: ssl_x509_gen
-  tags: ["encrypt", "ssl"]
+  tags: ["encrypt", "ssl", "patch_build"]
   commands:
   - func: "generate resmoke tasks"
     vars:
@@ -5843,12 +5950,16 @@ tasks:
   tags: ["multiversion_passthrough", "multiversion", "no_version_combination"]
   commands:
   - func: "generate resmoke tasks"
+    vars:
+      multiversion_exclude_tags_version: last_continuous
 
 - <<: *gen_task_template
   name: retryable_writes_downgrade_last_lts_gen
   tags: ["multiversion_passthrough", "multiversion", "no_version_combination"]
   commands:
   - func: "generate resmoke tasks"
+    vars:
+      multiversion_exclude_tags_version: last_lts
 
 - <<: *gen_task_template
   name: sharded_retryable_writes_downgrade_gen
@@ -5977,7 +6088,7 @@ tasks:
 
 - <<: *task_template
   name: client_encrypt
-  tags: ["ssl", "encrypt"]
+  tags: ["ssl", "encrypt", "patch_build"]
   commands:
   - func: "do setup"
   - func: "run tests"
@@ -5986,7 +6097,7 @@ tasks:
 
 - <<: *task_template
   name: fle
-  tags: ["encrypt"]
+  tags: ["encrypt", "patch_build"]
   commands:
   - func: "do setup"
   - func: "load aws test credentials"
@@ -5996,7 +6107,7 @@ tasks:
 
 - <<: *task_template
   name: fle2_query_analysis
-  tags: ["encrypt"]
+  tags: ["encrypt", "patch_build"]
   commands:
   - func: "do setup"
   - func: "load aws test credentials"
@@ -6004,13 +6115,27 @@ tasks:
 
 - <<: *task_template
   name: fle2
-  tags: ["encrypt"]
+  tags: ["encrypt", "patch_build"]
   commands:
   - func: "do setup"
   - func: "run tests"
 
 - <<: *task_template
   name: fle2_sharding
+  tags: ["encrypt", "patch_build"]
+  commands:
+  - func: "do setup"
+  - func: "run tests"
+
+- <<: *task_template
+  name: fle2_high_cardinality
+  tags: ["encrypt"]
+  commands:
+  - func: "do setup"
+  - func: "run tests"
+
+- <<: *task_template
+  name: fle2_sharding_high_cardinality
   tags: ["encrypt"]
   commands:
   - func: "do setup"
@@ -6018,7 +6143,7 @@ tasks:
 
 - <<: *task_template
   name: ocsp
-  tags: ["ssl", "encrypt", "ocsp"]
+  tags: ["ssl", "encrypt", "ocsp", "patch_build"]
   commands:
   - func: "do setup"
   - func: "run tests"
@@ -6334,7 +6459,6 @@ tasks:
   - *kill_processes
   - *cleanup_environment
   - *set_up_venv
-  - func: "fetch packages"
   - func: "set up remote credentials"
     vars:
       private_key_file: ~/.ssh/kitchen.pem
@@ -7242,16 +7366,16 @@ tasks:
       args:
       - "./src/evergreen/feature_flag_tags_check.sh"
 
-- name: antithesis_image_build
+- name: antithesis_image_push
   tags: ["antithesis"]
   # this is not patchable to avoid hitting the docker registry excessively.
   # When iterating on this task, feel free to make this patchable for
   # testing purposes. Your image changes will be pushed with the
   # evergreen-patch tag, so as to not clobber the waterfall. Use the
   # antithesis_image_tag build parameter to override this if required.
-  patchable: false
   depends_on:
   - name: archive_dist_test_debug
+  exec_timeout_secs: 7200
   commands:
   - *f_expansions_write
   - func: "git get project no modules"
@@ -7259,13 +7383,6 @@ tasks:
   - func: "kill processes"
   - func: "cleanup environment"
   - func: "set up venv"
-  - command: s3.get
-    params:
-      aws_key: ${aws_key}
-      aws_secret: ${aws_secret}
-      remote_file: ${project}/${build_variant}/antithesis_last_push.txt
-      local_file: antithesis_last_push.txt
-      bucket: mciuploads
   - func: "do setup"
   - command: s3.get
     params:
@@ -7274,22 +7391,9 @@ tasks:
       remote_file: ${mongo_debugsymbols}
       bucket: mciuploads
       local_file: src/mongo-debugsymbols.tgz
-  - command: subprocess.exec
-    params:
-      binary: bash
-      args:
-      - "./src/evergreen/antithesis_image_build.sh"
-  - command: s3.put
-    params:
-      optional: true
-      aws_key: ${aws_key}
-      aws_secret: ${aws_secret}
-      local_file: antithesis_next_push.txt
-      remote_file: ${project}/${build_variant}/antithesis_last_push.txt
-      bucket: mciuploads
-      permissions: private
-      content_type: text/plain
-      display_name: Last Push Date (seconds since epoch)
+  - func: "antithesis image build"
+  - func: "antithesis dry run"
+  - func: "antithesis image push"
 
 - name: generate_buildid_to_debug_symbols_mapping
   tags: ["symbolizer"]
@@ -7317,6 +7421,11 @@ task_groups:
   - compile_dist_test
 
 - <<: *compile_task_group_template
+  name: compile_upload_benchmarks_TG
+  tasks:
+  - compile_upload_benchmarks
+
+- <<: *compile_task_group_template
   name: compile_and_archive_dist_test_TG
   tasks:
   - compile_dist_test
@@ -7401,6 +7510,21 @@ task_groups:
   - package
 
 - <<: *compile_task_group_template
+  name: compile_test_benchmark_and_package_serial_TG
+  tasks:
+  - compile_dist_test
+  - archive_dist_test
+  - archive_dist_test_debug
+  - compile_unittests
+  - run_unittests
+  - compile_dbtest
+  - run_dbtest
+  - archive_dbtest
+  - compile_upload_benchmarks
+  - compile_all
+  - package
+
+- <<: *compile_task_group_template
   name: compile_and_test_TG
   tasks:
   - compile_dist_test
@@ -7545,6 +7669,7 @@ task_groups:
   tags: ["stitch"]
   tasks:
   - "stitch_support_create_lib"
+
 - <<: *stitch_support_task_group_template
   name: stitch_support_lib_build_and_test
   tags: ["stitch"]
diff --git a/etc/evergreen_yml_components/variants/in_memory.yml b/etc/evergreen_yml_components/variants/in_memory.yml
index 9cbd9a9065f..552fb5b2df0 100644
--- a/etc/evergreen_yml_components/variants/in_memory.yml
+++ b/etc/evergreen_yml_components/variants/in_memory.yml
@@ -21,7 +21,7 @@ buildvariants:
     distros:
     - rhel80-xlarge
   - name: .aggfuzzer .common
-  - name: .aggregation !.unwind !.encrypt
+  - name: .aggregation !.unwind !.encrypt !.feature_flag_guarded
   - name: audit
   - name: .auth !.multiversion
   - name: .causally_consistent !.wo_snapshot !.durable_history
diff --git a/etc/evergreen_yml_components/variants/misc_release.yml b/etc/evergreen_yml_components/variants/misc_release.yml
index 35fb980d8c0..0657ba155a1 100644
--- a/etc/evergreen_yml_components/variants/misc_release.yml
+++ b/etc/evergreen_yml_components/variants/misc_release.yml
@@ -1266,6 +1266,8 @@ buildvariants:
     repo_edition: enterprise
     scons_cache_scope: shared
     large_distro_name: ubuntu1804-build
+    # TODO SERVER-64479 remove external_auth_jobs_max once resolved
+    external_auth_jobs_max: 1
   tasks:
   - name: compile_test_and_package_serial_TG
     distros:
@@ -1482,6 +1484,8 @@ buildvariants:
     repo_edition: enterprise
     scons_cache_scope: shared
     large_distro_name: ubuntu2004-large
+    # TODO SERVER-64479 remove external_auth_jobs_max once resolved
+    external_auth_jobs_max: 1
   tasks:
   - name: compile_test_and_package_serial_TG
     distros:
diff --git a/etc/evergreen_yml_components/variants/sanitizer.yml b/etc/evergreen_yml_components/variants/sanitizer.yml
index ec0da723723..55999f3f47d 100644
--- a/etc/evergreen_yml_components/variants/sanitizer.yml
+++ b/etc/evergreen_yml_components/variants/sanitizer.yml
@@ -11,7 +11,9 @@ buildvariants:
   stepback: true
   expansions:
     lang_environment: LANG=C
-    san_options: LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+    san_options: >-
+      LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+      ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:strict_string_checks=true:detect_invalid_pointer_pairs=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
     compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --opt=on --allocator=system --sanitize=address --ssl --ocsp-stapling=off -j$(grep -c ^processor /proc/cpuinfo)
     multiversion_platform: rhel80
     multiversion_edition: enterprise
@@ -22,7 +24,7 @@ buildvariants:
     separate_debug: off
   tasks:
     - name: compile_test_and_package_serial_TG
-    - name: .aggfuzzer .common
+    - name: .aggfuzzer .common !.feature_flag_guarded
     - name: free_monitoring
     - name: .jstestfuzz !.initsync
 
@@ -39,7 +41,9 @@ buildvariants:
     lang_environment: LANG=C
     # If you add anything to san_options, make sure the appropriate changes are
     # also made to SConstruct.
-    san_options: LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer" ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+    san_options: >-
+      LSAN_OPTIONS="suppressions=etc/lsan.suppressions:report_objects=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
+      ASAN_OPTIONS="detect_leaks=1:check_initialization_order=true:strict_init_order=true:abort_on_error=1:disable_coredump=0:handle_abort=1:strict_string_checks=true:detect_invalid_pointer_pairs=1:external_symbolizer_path=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer"
     compile_flags: --variables-files=etc/scons/mongodbtoolchain_v3_clang.vars --dbg=on --opt=on --allocator=system --sanitize=address --ssl --ocsp-stapling=off --enable-free-mon=on -j$(grep -c ^processor /proc/cpuinfo)
     test_flags: --excludeWithAnyTags=requires_fast_memory,requires_ocsp_stapling
     multiversion_platform: rhel80
@@ -51,9 +55,8 @@ buildvariants:
     separate_debug: off
     large_distro_name: rhel80-build
   tasks:
-  - name: compile_test_and_package_serial_TG
-  - name: compile_benchmarks
-  - name: .aggregation
+  - name: compile_test_benchmark_and_package_serial_TG
+  - name: .aggregation !.feature_flag_guarded
   - name: .auth
   - name: audit
   - name: .benchmarks
@@ -124,9 +127,8 @@ buildvariants:
     separate_debug: off
     large_distro_name: rhel80-build
   tasks:
-  - name: compile_test_and_package_serial_TG
-  - name: compile_benchmarks
-  - name: .aggregation
+  - name: compile_test_benchmark_and_package_serial_TG
+  - name: .aggregation !.feature_flag_guarded
   - name: .auth
   - name: audit
   - name: .benchmarks
diff --git a/etc/evergreen_yml_components/variants/task_generation.yml b/etc/evergreen_yml_components/variants/task_generation.yml
index da222186cb6..6a345919a67 100644
--- a/etc/evergreen_yml_components/variants/task_generation.yml
+++ b/etc/evergreen_yml_components/variants/task_generation.yml
@@ -10,3 +10,4 @@ buildvariants:
   - rhel80-medium
   tasks:
   - name: version_gen
+  - name: version_expansions_gen
diff --git a/etc/perf.yml b/etc/perf.yml
index 4b3af98b28e..05cbcbc3c54 100644
--- a/etc/perf.yml
+++ b/etc/perf.yml
@@ -244,11 +244,16 @@ functions:
           set -o errexit
           set -o verbose
 
+          mkdir -p mongodb/bin
+
           /opt/mongodbtoolchain/v3/bin/virtualenv --python /opt/mongodbtoolchain/v3/bin/python3 "${workdir}/compile_venv"
           source "${workdir}/compile_venv/bin/activate"
 
           python -m pip install -r etc/pip/compile-requirements.txt
-
+    - command: expansions.write
+      params:
+         file: expansions.yml
+         redacted: true
     - command: shell.exec
       params:
         working_dir: src
@@ -256,10 +261,10 @@ functions:
           set -o errexit
           set -o verbose
 
-          mkdir -p mongodb/bin
+          source "${workdir}/compile_venv/bin/activate"
 
           # We get the raw version string (r1.2.3-45-gabcdef) from git
-          MONGO_VERSION=$(git describe --abbrev=7)
+          export MONGO_VERSION=$(git describe --abbrev=7)
 
           # If this is a patch build, we add the patch version id to the version string so we know
           # this build was a patch, and which evergreen task it came from
@@ -267,10 +272,25 @@ functions:
             MONGO_VERSION="$MONGO_VERSION-patch-${version_id}"
           fi
 
-          # This script converts the generated version string into a sanitized version string for
-          # use by scons and uploading artifacts as well as information about for the scons cache.
+          # This script handles sanitizing the version string for use during SCons build
+          # and when pushing artifacts up to S3.
+          IS_PATCH=${is_patch|false} IS_COMMIT_QUEUE=${is_commit_queue|false} \
+            buildscripts/generate_version_expansions.py --out version_expansions.yml
+    - command: expansions.update
+      params:
+        file: src/version_expansions.yml
+    - command: shell.exec
+      params:
+        working_dir: src
+        script: |
+          set -o errexit
+          set -o verbose
+
+          # This script handles whether the SCons cache should be used
           source "${workdir}/compile_venv/bin/activate"
-          MONGO_VERSION=$MONGO_VERSION USE_SCONS_CACHE=${use_scons_cache|false} python buildscripts/generate_compile_expansions.py --out compile_expansions.yml
+          SCONS_CACHE_MODE=${scons_cache_mode|} USE_SCONS_CACHE=${use_scons_cache|false} \
+            IS_PATCH=${is_patch|false} IS_COMMIT_QUEUE=${is_commit_queue|false} \
+            python buildscripts/generate_compile_expansions.py --out compile_expansions.yml
     - command: expansions.update
       params:
         file: src/compile_expansions.yml
@@ -755,7 +775,7 @@ microbenchmark-buildvariants:
   _linux-wt-standalone: &linux-wt-standalone
     name: linux-wt-standalone
     display_name: Standalone Linux inMemory
-    batchtime: 240  # 4 hours
+    cron: "0 */4 * * *" # Every 4 hours starting at midnight
     modules: *modules
     expansions:
       # We are explicitly tracking the rhel62 variant compile options from evergreen.yml for
@@ -806,7 +826,7 @@ microbenchmark-buildvariants:
   _linux-wt-repl: &linux-wt-repl
     name: linux-wt-repl
     display_name: 1-Node ReplSet Linux inMemory
-    batchtime: 240  # 4 hours
+    cron: "0 */4 * * *" # Every 4 hours starting at midnight
     modules: *modules
     expansions:
       mongod_exec_wrapper: *exec_wrapper
@@ -836,13 +856,20 @@ buildvariants:
   - <<: *linux-wt-standalone
     name: linux-wt-standalone-all-feature-flags
     display_name: Standalone Linux inMemory (all feature flags)
-    batchtime: 1440  # 24 hours
+    cron: "0 0 * * *" # Every day starting at 00:00
     expansions:
       mongodb_setup: microbenchmarks_standalone-all-feature-flags
 
+  - <<: *linux-wt-standalone
+    name: linux-wt-standalone-classic-query-engine
+    display_name: Standalone Linux inMemory (Classic Query Engine)
+    cron: "0 0 * * 4" # 00:00 on Thursday
+    expansions:
+      mongodb_setup: microbenchmarks_standalone-classic-query-engine
+
   - <<: *linux-wt-repl
     name: linux-wt-repl-all-feature-flags
     display_name: 1-Node ReplSet Linux inMemory (all feature flags)
-    batchtime: 1440  # 24 hours
+    cron: "0 0 * * *" # Every day starting at 00:00
     expansions:
       mongodb_setup: microbenchmarks_replica-all-feature-flags
diff --git a/etc/pip/components/build_metrics.req b/etc/pip/components/build_metrics.req
new file mode 100644
index 00000000000..17a95891ed2
--- /dev/null
+++ b/etc/pip/components/build_metrics.req
@@ -0,0 +1,2 @@
+psutil
+jsonschema
diff --git a/etc/pip/toolchain-requirements.txt b/etc/pip/toolchain-requirements.txt
index caa44fd3f2f..fa21acf83a9 100644
--- a/etc/pip/toolchain-requirements.txt
+++ b/etc/pip/toolchain-requirements.txt
@@ -14,3 +14,5 @@
 -r components/jiraclient.req
 
 -r components/platform.req
+
+-r components/build_metrics.req
diff --git a/etc/system_perf.yml b/etc/system_perf.yml
index 8098813985d..55933aba2f1 100755
--- a/etc/system_perf.yml
+++ b/etc/system_perf.yml
@@ -21,6 +21,11 @@ variables:
         variant: compile-rhel70
       - name: schedule_global_auto_tasks
         variant: task_generation
+  _real_compile_amazon_linux2_arm64: &_real_compile_amazon_linux2_arm64
+      - name: compile
+        variant: compile-amazon-linux2-arm64
+      - name: schedule_global_auto_tasks
+        variant: task_generation
   _real_expansions: &_expansion_updates
       []
   ###
@@ -33,6 +38,9 @@ variables:
 #  _skip_compile_rhel70: &_compile_rhel70
 #      - name: schedule_global_auto_tasks
 #        variant: task_generation
+# _skip_compile_amazon_linux2_arm64: &_real_compile_amazon_linux2_arm64
+#     - name: schedule_global_auto_tasks
+#       variant: task_generation
 #  _skip_expansions: &_expansion_updates
 #      # This is the normal (amazon2) "compile" artifact from https://evergreen.mongodb.com/version/sys_perf_97c6a9e443ff7e171b7310a1fa5c05d0768faff9
 #      - key: mdb_binary_for_client
@@ -266,11 +274,16 @@ functions:
           set -o errexit
           set -o verbose
 
+          mkdir -p mongodb/bin
+
           /opt/mongodbtoolchain/v3/bin/virtualenv --python /opt/mongodbtoolchain/v3/bin/python3 "${workdir}/compile_venv"
           source "${workdir}/compile_venv/bin/activate"
 
           python -m pip install -r etc/pip/compile-requirements.txt
-
+    - command: expansions.write
+      params:
+         file: expansions.yml
+         redacted: true
     - command: shell.exec
       params:
         working_dir: src
@@ -278,10 +291,10 @@ functions:
           set -o errexit
           set -o verbose
 
-          mkdir -p mongodb/bin
+          source "${workdir}/compile_venv/bin/activate"
 
           # We get the raw version string (r1.2.3-45-gabcdef) from git
-          MONGO_VERSION=$(git describe --abbrev=7)
+          export MONGO_VERSION=$(git describe --abbrev=7)
 
           # If this is a patch build, we add the patch version id to the version string so we know
           # this build was a patch, and which evergreen task it came from
@@ -289,10 +302,25 @@ functions:
             MONGO_VERSION="$MONGO_VERSION-patch-${version_id}"
           fi
 
-          # This script converts the generated version string into a sanitized version string for
-          # use by scons and uploading artifacts as well as information about for the scons cache.
+          # This script handles sanitizing the version string for use during SCons build
+          # and when pushing artifacts up to S3.
+          IS_PATCH=${is_patch|false} IS_COMMIT_QUEUE=${is_commit_queue|false} \
+            buildscripts/generate_version_expansions.py --out version_expansions.yml
+    - command: expansions.update
+      params:
+        file: src/version_expansions.yml
+    - command: shell.exec
+      params:
+        working_dir: src
+        script: |
+          set -o errexit
+          set -o verbose
+
+          # This script handles whether the SCons cache should be used
           source "${workdir}/compile_venv/bin/activate"
-          MONGO_VERSION=$MONGO_VERSION USE_SCONS_CACHE=${use_scons_cache|false} python buildscripts/generate_compile_expansions.py --out compile_expansions.yml
+          SCONS_CACHE_MODE=${scons_cache_mode|} USE_SCONS_CACHE=${use_scons_cache|false} \
+            IS_PATCH=${is_patch|false} IS_COMMIT_QUEUE=${is_commit_queue|false} \
+            python buildscripts/generate_compile_expansions.py --out compile_expansions.yml
     - command: expansions.update
       params:
         file: src/compile_expansions.yml
@@ -830,12 +858,13 @@ tasks:
             {scale: 10,
              schema: denormalized}
 
-  - name: queries_on_columnstore_indexes
+# TODO PERF-3094: Remove this task.
+  - name: column_store_index_charts_events_1G
     priority: 5
     commands:
       - func: f_run_dsi_workload
         vars:
-          test_control: "columnstore"
+          test_control: "charts_events_1G"
 
   - name: non_sharded_workloads
     priority: 5
@@ -1080,6 +1109,19 @@ buildvariants:
     tasks:
       - name: compile
 
+  - &compile-amazon-linux2-arm64
+    name: compile-amazon-linux2-arm64
+    display_name: Compile for Amazon Linux 2 arm64
+    modules: *modules
+    cron: "0 0 * * *" # Everyday at 00:00
+    expansions:
+      <<: *compile-expansions
+      compile-variant: -arm64
+    run_on:
+      - "amazon2-arm64"
+    tasks:
+      - name: compile
+
   - name: linux-standalone
     display_name: Linux Standalone
     cron: "0 0 * * *" # Everyday at 00:00
@@ -1139,7 +1181,58 @@ buildvariants:
       - name: tpch_1_denormalized
       - name: tpch_10_normalized
       - name: tpch_10_denormalized
-      - name: queries_on_columnstore_indexes
+      - name: column_store_index_charts_events_1G
+
+  - name: linux-standalone-classic-query-engine
+    display_name: Linux Standalone (Classic Query Engine)
+    cron: "0 0 * * 4" # 00:00 on Thursday
+    modules: *modules
+    expansions:
+      mongodb_setup: standalone-classic-query-engine
+      infrastructure_provisioning: single
+      platform: linux
+      project_dir: *project_dir
+      authentication: enabled
+      storageEngine: wiredTiger
+    run_on:
+      - "rhel70-perf-single"
+    depends_on: *_compile_amazon2
+    tasks: &classic_engine_tasks
+      - name: schedule_patch_auto_tasks
+      - name: schedule_variant_auto_tasks
+      - name: industry_benchmarks
+      - name: ycsb_60GB
+      - name: crud_workloads
+      - name: bestbuy_agg
+      - name: bestbuy_agg_merge_different_db
+      - name: bestbuy_agg_merge_same_db
+      - name: bestbuy_agg_merge_wordcount
+      - name: bestbuy_query
+      - name: cursor_manager
+      - name: map_reduce_workloads
+      - name: tpcc
+      - name: tpch_1_normalized
+      - name: tpch_1_denormalized
+      - name: tpch_10_normalized
+      - name: tpch_10_denormalized
+
+  - name: linux-1-node-replSet-classic-query-engine
+    display_name: Linux 1-Node ReplSet (Classic Query Engine)
+    cron: "0 0 * * 4" # 00:00 on Thursday
+    modules: *modules
+    expansions:
+      mongodb_setup: single-replica-classic-query-engine
+      infrastructure_provisioning: single
+      platform: linux
+      project_dir: *project_dir
+      authentication: enabled
+      storageEngine: wiredTiger
+    run_on:
+      - "rhel70-perf-single"
+    depends_on: *_compile_amazon2
+    tasks:
+      - name: linkbench
+      - name: linkbench2
 
   - name: compile-rhel70
     display_name: Compile for Atlas-like
diff --git a/evergreen/antithesis_dry_run.sh b/evergreen/antithesis_dry_run.sh
new file mode 100644
index 00000000000..740e21bd2d7
--- /dev/null
+++ b/evergreen/antithesis_dry_run.sh
@@ -0,0 +1,6 @@
+set -o errexit
+set -o verbose
+
+cd antithesis/topologies/sharded_cluster
+sudo docker-compose up -d
+sudo docker exec workload /bin/bash -c 'cd resmoke && . python3-venv/bin/activate && python3 run_suite.py'
diff --git a/evergreen/antithesis_image_build.sh b/evergreen/antithesis_image_build.sh
index 0b986770102..2be5630740d 100644
--- a/evergreen/antithesis_image_build.sh
+++ b/evergreen/antithesis_image_build.sh
@@ -3,15 +3,6 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"
 
 set -euo pipefail
 
-cd src
-commit_date=$(date -d "$(git log -1 -s --format=%ci)" "+%s")
-last_run_date=$(cat ../antithesis_last_push.txt || echo 0)
-if [ "${is_patch}" != "true" ] && [ "${last_run_date}" -gt "${commit_date}" ]; then
-  echo -e "Refusing to push new antithesis images because this commit is older\nthan the last pushed commit"
-  exit 0
-fi
-cd ..
-
 # check that the binaries in dist-test are linked to libvoidstar
 ldd src/dist-test/bin/mongod | grep libvoidstar
 ldd src/dist-test/bin/mongos | grep libvoidstar
@@ -75,27 +66,3 @@ sudo docker build . -t repl-set-config:$tag
 cd ../sharded_cluster
 sed -i s/evergreen-latest-master/$tag/ docker-compose.yml
 sudo docker build . -t sharded-cluster-config:$tag
-
-# login, push, and logout
-echo "${antithesis_repo_key}" > mongodb.key.json
-cat mongodb.key.json | sudo docker login -u _json_key https://us-central1-docker.pkg.dev --password-stdin
-rm mongodb.key.json
-
-# tag and push to the registry
-sudo docker tag "mongo-binaries:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/mongo-binaries:$tag"
-sudo docker push "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/mongo-binaries:$tag"
-
-sudo docker tag "workload:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/workload:$tag"
-sudo docker push "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/workload:$tag"
-
-sudo docker tag "repl-set-config:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/repl-set-config:$tag"
-sudo docker push "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/repl-set-config:$tag"
-
-sudo docker tag "sharded-cluster-config:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/sharded-cluster-config:$tag"
-sudo docker push "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/sharded-cluster-config:$tag"
-
-sudo docker logout https://us-central1-docker.pkg.dev
-
-if [ "${is_patch}" != "true" ]; then
-  echo "$commit_date" > antithesis_next_push.txt
-fi
diff --git a/evergreen/antithesis_image_push.sh b/evergreen/antithesis_image_push.sh
new file mode 100644
index 00000000000..94e1a2bf0b7
--- /dev/null
+++ b/evergreen/antithesis_image_push.sh
@@ -0,0 +1,35 @@
+DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"
+. "$DIR/prelude.sh"
+
+set -euo pipefail
+
+# push images as evergreen-latest-${branch_name}, unless it's a patch
+tag="evergreen-latest-${branch_name}"
+if [ "${is_patch}" = "true" ]; then
+  tag="evergreen-patch"
+fi
+
+if [ -n "${antithesis_image_tag:-}" ]; then
+  echo "Using provided tag: '$antithesis_image_tag' for docker pushes"
+  tag=$antithesis_image_tag
+fi
+
+# login, push, and logout
+echo "${antithesis_repo_key}" > mongodb.key.json
+cat mongodb.key.json | sudo docker login -u _json_key https://us-central1-docker.pkg.dev --password-stdin
+rm mongodb.key.json
+
+# tag and push to the registry
+sudo docker tag "mongo-binaries:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/mongo-binaries:$tag"
+sudo docker push "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/mongo-binaries:$tag"
+
+sudo docker tag "workload:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/workload:$tag"
+sudo docker push "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/workload:$tag"
+
+sudo docker tag "repl-set-config:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/repl-set-config:$tag"
+sudo docker push "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/repl-set-config:$tag"
+
+sudo docker tag "sharded-cluster-config:$tag" "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/sharded-cluster-config:$tag"
+sudo docker push "us-central1-docker.pkg.dev/molten-verve-216720/mongodb-repository/sharded-cluster-config:$tag"
+
+sudo docker logout https://us-central1-docker.pkg.dev
diff --git a/evergreen/functions/binary_version_check.sh b/evergreen/functions/binary_version_check.sh
deleted file mode 100755
index ac01374a07e..00000000000
--- a/evergreen/functions/binary_version_check.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"
-. "$DIR/../prelude.sh"
-
-cd src
-
-set -o errexit
-mongo_binary=dist-test/bin/mongo${exe}
-activate_venv
-bin_ver=$($python -c "import yaml; print(yaml.safe_load(open('compile_expansions.yml'))['version']);" | tr -d '[ \r\n]')
-# Due to SERVER-23810, we cannot use $mongo_binary --quiet --nodb --eval "version();"
-mongo_ver=$($mongo_binary --version | perl -pe '/version v([^\"]*)/; $_ = $1;' | tr -d '[ \r\n]')
-# The versions must match
-if [ "$bin_ver" != "$mongo_ver" ]; then
-  echo "The mongo version is $mongo_ver, expected version is $bin_ver"
-  exit 1
-fi
diff --git a/evergreen/functions/compile_expansions_generate.sh b/evergreen/functions/compile_expansions_generate.sh
index dc9642f5cd4..fe06d0fdf7d 100644
--- a/evergreen/functions/compile_expansions_generate.sh
+++ b/evergreen/functions/compile_expansions_generate.sh
@@ -5,14 +5,6 @@ cd src
 
 set -o errexit
 set -o verbose
-# We get the raw version string (r1.2.3-45-gabcdef) from git
-MONGO_VERSION=$(git describe --abbrev=7)
-# If this is a patch build, we add the patch version id to the version string so we know
-# this build was a patch, and which evergreen task it came from
-if [ "${is_patch}" = "true" ]; then
-  MONGO_VERSION="$MONGO_VERSION-patch-${version_id}"
-fi
-echo "MONGO_VERSION = ${MONGO_VERSION}"
 activate_venv
 # shared scons cache testing
 # if 'scons_cache_scope' enabled and project level 'disable_shared_scons_cache' is not true
@@ -43,12 +35,12 @@ if [ ! -z ${scons_cache_scope} ]; then
     set -o errexit
   fi
   echo "Shared Cache with setting: ${scons_cache_scope}"
-  MONGO_VERSION=$MONGO_VERSION SCONS_CACHE_MODE=${scons_cache_mode} SCONS_CACHE_SCOPE=$scons_cache_scope IS_PATCH=${is_patch} IS_COMMIT_QUEUE=${is_commit_queue} $python buildscripts/generate_compile_expansions_shared_cache.py --out compile_expansions.yml
+  SCONS_CACHE_MODE=${scons_cache_mode} SCONS_CACHE_SCOPE=$scons_cache_scope IS_PATCH=${is_patch} IS_COMMIT_QUEUE=${is_commit_queue} $python buildscripts/generate_compile_expansions_shared_cache.py --out compile_expansions.yml
 # Legacy Expansion generation
 else
   echo "Using legacy expansion generation"
   # Proceed with regular expansions generated
   # This script converts the generated version string into a sanitized version string for
   # use by scons and uploading artifacts as well as information about for the scons cache.
-  MONGO_VERSION=$MONGO_VERSION SCONS_CACHE_MODE=${scons_cache_mode} USE_SCONS_CACHE=${use_scons_cache} $python buildscripts/generate_compile_expansions.py --out compile_expansions.yml
+  SCONS_CACHE_MODE=${scons_cache_mode} USE_SCONS_CACHE=${use_scons_cache} IS_PATCH=${is_patch} IS_COMMIT_QUEUE=${is_commit_queue} $python buildscripts/generate_compile_expansions.py --out compile_expansions.yml
 fi
diff --git a/evergreen/functions/version_expansions_generate.sh b/evergreen/functions/version_expansions_generate.sh
new file mode 100755
index 00000000000..c0e577a59e6
--- /dev/null
+++ b/evergreen/functions/version_expansions_generate.sh
@@ -0,0 +1,18 @@
+DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"
+. "$DIR/../prelude.sh"
+
+cd src
+
+set -o errexit
+set -o verbose
+# We get the raw version string (r1.2.3-45-gabcdef) from git
+MONGO_VERSION=$(git describe --abbrev=7)
+# If this is a patch build, we add the patch version id to the version string so we know
+# this build was a patch, and which evergreen task it came from
+if [ "${is_patch}" = "true" ]; then
+  MONGO_VERSION="$MONGO_VERSION-patch-${version_id}"
+fi
+echo "MONGO_VERSION = ${MONGO_VERSION}"
+
+activate_venv
+MONGO_VERSION=${MONGO_VERSION} IS_PATCH=${is_patch} IS_COMMIT_QUEUE=${is_commit_queue} $python buildscripts/generate_version_expansions.py --out version_expansions.yml
diff --git a/evergreen/generate_version.sh b/evergreen/generate_version.sh
index db38741fbbd..b87c583b80a 100644
--- a/evergreen/generate_version.sh
+++ b/evergreen/generate_version.sh
@@ -6,7 +6,7 @@ cd src
 set -o errexit
 set -o verbose
 
-curl -L https://github.com/mongodb/mongo-task-generator/releases/download/v0.3.6/mongo-task-generator --output mongo-task-generator
+curl -L https://github.com/mongodb/mongo-task-generator/releases/download/v0.4.0/mongo-task-generator --output mongo-task-generator
 chmod +x mongo-task-generator
 
 activate_venv
diff --git a/jstests/aggregation/accumulators/accumulator_js.js b/jstests/aggregation/accumulators/accumulator_js.js
index 3a389851e24..90460e2ce72 100644
--- a/jstests/aggregation/accumulators/accumulator_js.js
+++ b/jstests/aggregation/accumulators/accumulator_js.js
@@ -192,8 +192,9 @@ command.pipeline = [{
         }
     }
 }];
-// 16554 means "$add only supports numeric or date types"
-assert.commandFailedWithCode(db.runCommand(command), 16554);
+// ErrorCodes.TypeMismatch means "$add only supports numeric or date types". Code 16554 represented
+// a type mismatch before 6.1 for this specific check.
+assert.commandFailedWithCode(db.runCommand(command), [16554, ErrorCodes.TypeMismatch]);
 
 // Test that initArgs can have a different length per group.
 assert(db.accumulator_js.drop());
diff --git a/jstests/aggregation/bugs/server6184.js b/jstests/aggregation/bugs/server6184.js
index ae0f5ae947c..bc2ce8c0f67 100644
--- a/jstests/aggregation/bugs/server6184.js
+++ b/jstests/aggregation/bugs/server6184.js
@@ -1,4 +1,5 @@
 // SERVER-6184 Support mixing nested and dotted fields with common prefixes
+// @tags: [tests_projection_field_order]
 
 c = db.c;
 c.drop();
diff --git a/jstests/aggregation/bugs/server6570.js b/jstests/aggregation/bugs/server6570.js
index 112feb49406..b12a83967ba 100644
--- a/jstests/aggregation/bugs/server6570.js
+++ b/jstests/aggregation/bugs/server6570.js
@@ -6,10 +6,19 @@ c = db.s6570;
 c.drop();
 c.save({x: 17, y: "foo"});
 
-assertErrorCode(c, {$project: {string_fields: {$add: [3, "$y", 4, "$y"]}}}, 16554);
-assertErrorCode(c, {$project: {number_fields: {$add: ["a", "$x", "b", "$x"]}}}, 16554);
-assertErrorCode(c, {$project: {all_strings: {$add: ["c", "$y", "d", "$y"]}}}, 16554);
-assertErrorCode(c, {$project: {potpourri_1: {$add: [5, "$y", "e", "$x"]}}}, 16554);
-assertErrorCode(c, {$project: {potpourri_2: {$add: [6, "$x", "f", "$y"]}}}, 16554);
-assertErrorCode(c, {$project: {potpourri_3: {$add: ["g", "$y", 7, "$x"]}}}, 16554);
-assertErrorCode(c, {$project: {potpourri_4: {$add: ["h", "$x", 8, "$y"]}}}, 16554);
+// 16554 was the code used instead of TypeMismatch before 6.1.
+assertErrorCode(
+    c, {$project: {string_fields: {$add: [3, "$y", 4, "$y"]}}}, [16554, ErrorCodes.TypeMismatch]);
+assertErrorCode(c,
+                {$project: {number_fields: {$add: ["a", "$x", "b", "$x"]}}},
+                [16554, ErrorCodes.TypeMismatch]);
+assertErrorCode(
+    c, {$project: {all_strings: {$add: ["c", "$y", "d", "$y"]}}}, [16554, ErrorCodes.TypeMismatch]);
+assertErrorCode(
+    c, {$project: {potpourri_1: {$add: [5, "$y", "e", "$x"]}}}, [16554, ErrorCodes.TypeMismatch]);
+assertErrorCode(
+    c, {$project: {potpourri_2: {$add: [6, "$x", "f", "$y"]}}}, [16554, ErrorCodes.TypeMismatch]);
+assertErrorCode(
+    c, {$project: {potpourri_3: {$add: ["g", "$y", 7, "$x"]}}}, [16554, ErrorCodes.TypeMismatch]);
+assertErrorCode(
+    c, {$project: {potpourri_4: {$add: ["h", "$x", 8, "$y"]}}}, [16554, ErrorCodes.TypeMismatch]);
diff --git a/jstests/aggregation/bugs/server66418.js b/jstests/aggregation/bugs/server66418.js
new file mode 100644
index 00000000000..9b8c960282a
--- /dev/null
+++ b/jstests/aggregation/bugs/server66418.js
@@ -0,0 +1,38 @@
+// SERVER-66418
+// Bad projection created during dependency analysis due to string order assumption
+(function() {
+"use strict";
+
+const coll = db[jsTest.name()];
+coll.drop();
+
+coll.save({
+    _id: 1,
+    type: 'PRODUCT',
+    status: 'VALID',
+    locale: {
+        en: 'INSTRUMENT PANEL',
+        es: 'INSTRUMENTOS DEL CUADRO',
+        fr: 'INSTRUMENT TABLEAU DE BORD',
+    }
+});
+
+// before SERVER-66418, this incorrectly threw a PathCollision error
+coll.aggregate([
+    {"$match": {"_id": 1}},
+    {"$sort": {"_id": 1}},
+    {
+        "$project": {
+            "designation": {
+                "$switch": {
+                    "branches": [{
+                        "case": {"$eq": ["$type", "PRODUCT"]},
+                        "then": {"$ifNull": ["$locale.en-GB.name", "$locale.en.name"]}
+                    }],
+                    "default": {"$ifNull": ["$locale.en-GB", "$locale.en"]}
+                }
+            }
+        }
+    }
+]);
+})();
diff --git a/jstests/aggregation/collection_uuid_coll_stats_index_stats.js b/jstests/aggregation/collection_uuid_coll_stats_index_stats.js
index 5aa92524652..b0779a310f4 100644
--- a/jstests/aggregation/collection_uuid_coll_stats_index_stats.js
+++ b/jstests/aggregation/collection_uuid_coll_stats_index_stats.js
@@ -49,10 +49,11 @@ const testCommand = function(cmd, cmdObj) {
     jsTestLog("The command '" + cmd +
               "' fails when the provided UUID corresponds to a different collection, even if the " +
               "provided namespace does not exist.");
-    coll2.drop();
+    assert.commandWorked(testDB.runCommand({drop: coll2.getName()}));
     res =
         assert.commandFailedWithCode(testDB.runCommand(cmdObj), ErrorCodes.CollectionUUIDMismatch);
     validateErrorResponse(res, testDB.getName(), uuid, coll2.getName(), coll.getName());
+    assert(!testDB.getCollectionNames().includes(coll2.getName()));
 
     jsTestLog("The command '" + cmd + "' succeeds on view when no UUID is provided.");
     const viewName = "view";
diff --git a/jstests/aggregation/expressions/date_from_string.js b/jstests/aggregation/expressions/date_from_string.js
index 9b4d0702e5e..a1257781b27 100644
--- a/jstests/aggregation/expressions/date_from_string.js
+++ b/jstests/aggregation/expressions/date_from_string.js
@@ -569,6 +569,27 @@ testCases.forEach(function(testCase) {
 });
 
 /* --------------------------------------------------------------------------------------- */
+/* Tests for textual month. */
+
+testCases = [
+    {inputString: "2017, July 4", format: "%Y, %B %d", expect: "2017-07-04T00:00:00Z"},
+    {inputString: "oct 20 2020", format: "%b %d %Y", expect: "2020-10-20T00:00:00Z"},
+];
+testCases.forEach(function(testCase) {
+    assert.eq(
+        [{_id: 0, date: ISODate(testCase.expect)}],
+        coll.aggregate({
+                $project: {
+                    date: {
+                        $dateFromString: {dateString: testCase.inputString, format: testCase.format}
+                    }
+                }
+            })
+            .toArray(),
+        tojson(testCase));
+});
+
+/* --------------------------------------------------------------------------------------- */
 /* Testing whether it throws the right assert for missing elements of a date/time string. */
 
 coll.drop();
@@ -763,6 +784,11 @@ assertErrCodeAndErrMsgContains(coll,
                                ErrorCodes.ConversionFailure,
                                "Mixing of ISO dates with natural dates is not allowed");
 
+pipeline =
+    [{$project: {date: {$dateFromString: {dateString: "Dece 31 2018", format: "%b %d %Y"}}}}];
+assertErrCodeAndErrMsgContains(
+    coll, pipeline, ErrorCodes.ConversionFailure, "Error parsing date string");
+
 // Test embedded null bytes in the 'dateString' and 'format' fields.
 pipeline =
     [{$project: {date: {$dateFromString: {dateString: "12/31\0/2018", format: "%m/%d/%Y"}}}}];
diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js
index cd5ba0a5258..a4235abc185 100644
--- a/jstests/aggregation/optimize_away_pipeline.js
+++ b/jstests/aggregation/optimize_away_pipeline.js
@@ -137,7 +137,7 @@ function testGetMore({command = null, expectedResult = null} = {}) {
     assert.sameMembers(documents, expectedResult);
 }
 
-const groupPushdownEnabled = checkSBEEnabled(db, ["featureFlagSBEGroupPushdown"]);
+const groupPushdownEnabled = checkSBEEnabled(db);
 
 // Calls 'assertPushdownEnabled' if groupPushdownEnabled is 'true'. Otherwise, it calls
 // 'assertPushdownDisabled'.
diff --git a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_hj.js b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_hj.js
index 3d5b0c73248..df26f4b6ffa 100644
--- a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_hj.js
+++ b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_hj.js
@@ -8,7 +8,7 @@ load("jstests/libs/fixture_helpers.js");  // For isSharded.
 load("jstests/libs/sbe_util.js");         // For checkSBEEnabled.
 load("jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js");  // For runTests.
 
-if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
+if (!checkSBEEnabled(db)) {
     jsTestLog("Skipping the test because it only applies to $lookup in SBE");
     return;
 }
diff --git a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js
index 4710a162cdb..d6923cf5e79 100644
--- a/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js
+++ b/jstests/aggregation/sources/lookup/lookup_equijoin_semantics_lib.js
@@ -49,7 +49,7 @@ function setupCollections(localRecords, foreignRecords, foreignField) {
  */
 function checkJoinConfiguration(explain) {
     const eqLookupNodes = getAggPlanStages(explain, "EQ_LOOKUP");
-    if (checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
+    if (checkSBEEnabled(db)) {
         if (eqLookupNodes.length > 0) {
             // The $lookup stage has been lowered. Check that it's using the expected join strategy.
             assert.eq(currentJoinAlgorithm.strategy, eqLookupNodes[0].strategy, "Join strategy");
@@ -258,7 +258,7 @@ function runTests() {
             {_id: 11, a: [[null, 1], 2]},
         ];
 
-        if (checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
+        if (checkSBEEnabled(db)) {
             // When lowered to SBE, "undefined" should only match "undefined".
             runTest_SingleForeignRecord({
                 testDescription: "Undefined in foreign, top-level field in local",
diff --git a/jstests/aggregation/sources/lookup/lookup_query_stats.js b/jstests/aggregation/sources/lookup/lookup_query_stats.js
index 77731d1df84..74e14964e5d 100644
--- a/jstests/aggregation/sources/lookup/lookup_query_stats.js
+++ b/jstests/aggregation/sources/lookup/lookup_query_stats.js
@@ -21,9 +21,8 @@ load("jstests/libs/sbe_util.js");             // For checkSBEEnabled.
 load("jstests/libs/sbe_explain_helpers.js");  // For getSbePlanStages and
                                               // getQueryInfoAtTopLevelOrFirstStage.
 
-const isSBELookupEnabled = checkSBEEnabled(db, ["featureFlagSBELookupPushdown"]);
-const isSBELookupNLJEnabled =
-    checkSBEEnabled(db, ["featureFlagSBELookupPushdown", "featureFlagSbeFull"]);
+const isSBELookupEnabled = checkSBEEnabled(db);
+const isSBELookupNLJEnabled = checkSBEEnabled(db, ["featureFlagSbeFull"]);
 const testDB = db.getSiblingDB("lookup_query_stats");
 testDB.dropDatabase();
 
diff --git a/jstests/aggregation/sources/lookup/profile_lookup.js b/jstests/aggregation/sources/lookup/profile_lookup.js
index fdd201d5521..fcfe48fa5cb 100644
--- a/jstests/aggregation/sources/lookup/profile_lookup.js
+++ b/jstests/aggregation/sources/lookup/profile_lookup.js
@@ -41,21 +41,13 @@ const actualCount = newTop.totals[foreignColl.getFullName()].commands.count -
     oldTop.totals[foreignColl.getFullName()].commands.count;
 
 // Compute the expected count as follows:
-// 1) If the feature flag is enabled, add one to the count. This is because we will take a lock
-// over 'foreignColl' and, even if we don't push down $lookup into SBE, this will still
-// increment the top counter for 'foreignColl' by one.
-// 2) If $lookup is NOT pushed down into SBE, then we increment the count by three. This is
-// because when executing $lookup in the classic engine, we will add one entry to top for the
-// foreign collection for each document in the local collection (of which there are three).
-let expectedCount = 0;
-const getFeatureFlagSBELookupPushdown =
-    db.adminCommand({getParameter: 1, featureFlagSBELookupPushdown: 1});
-const isSBELookupPushdownEnabled =
-    getFeatureFlagSBELookupPushdown.hasOwnProperty("featureFlagSBELookupPushdown") &&
-    getFeatureFlagSBELookupPushdown["featureFlagSBELookupPushdown"]["value"];
-if (isSBELookupPushdownEnabled) {
-    expectedCount++;
-}
+// 1) We expect the count to be at least one. This is because we will take a lock over 'foreignColl'
+// and, even if we don't push down $lookup into SBE, this will still increment the top counter for
+// 'foreignColl' by one.
+// 2) If $lookup is NOT pushed down into SBE, then we increment the count by three. This is because
+// when executing $lookup in the classic engine, we will add one entry to top for the foreign
+// collection for each document in the local collection (of which there are three).
+let expectedCount = 1;
 const eqLookupNodes = getAggPlanStages(localColl.explain().aggregate(pipeline), "EQ_LOOKUP");
 if (eqLookupNodes.length === 0) {
     expectedCount += 3;
diff --git a/jstests/aggregation/sources/out/out_read_write_to_same_collection.js b/jstests/aggregation/sources/out/out_read_write_to_same_collection.js
index b77aca92537..5ceea5c4e9b 100644
--- a/jstests/aggregation/sources/out/out_read_write_to_same_collection.js
+++ b/jstests/aggregation/sources/out/out_read_write_to_same_collection.js
@@ -2,7 +2,11 @@
 //
 // This test assumes that collections are not implicitly sharded, since $out is prohibited if the
 // output collection is sharded.
-// @tags: [assumes_unsharded_collection]
+// @tags: [
+//   assumes_unsharded_collection,
+//   # Asserts on the number of indexes.
+//   assumes_no_implicit_index_creation,
+// ]
 (function() {
 "use strict";
 
diff --git a/jstests/aggregation/sources/project/remove_redundant_projects.js b/jstests/aggregation/sources/project/remove_redundant_projects.js
index fe31fade134..512efdd2546 100644
--- a/jstests/aggregation/sources/project/remove_redundant_projects.js
+++ b/jstests/aggregation/sources/project/remove_redundant_projects.js
@@ -18,7 +18,7 @@ assert.commandWorked(coll.insert({_id: {a: 1, b: 1}, a: 1, c: {d: 1}, e: ['elem1
 
 let indexSpec = {a: 1, 'c.d': 1, 'e.0': 1};
 
-const groupPushdownEnabled = checkSBEEnabled(db, ["featureFlagSBEGroupPushdown"]);
+const groupPushdownEnabled = checkSBEEnabled(db);
 
 /**
  * Helper to test that for a given pipeline, the same results are returned whether or not an
diff --git a/jstests/aggregation/sources/unionWith/unionWith_allows_stages.js b/jstests/aggregation/sources/unionWith/unionWith_allows_stages.js
index a7c8b6b372b..3cf8394d1ca 100644
--- a/jstests/aggregation/sources/unionWith/unionWith_allows_stages.js
+++ b/jstests/aggregation/sources/unionWith/unionWith_allows_stages.js
@@ -2,6 +2,8 @@
  * Test that $unionWith works with $geoNear, $text, and $indexStats
  * Some of these stages cannot be used in facets.
  * @tags: [
+ *   # Asserts on the output of $indexStats.
+ *   assumes_no_implicit_index_creation,
  *   do_not_wrap_aggregations_in_facets,
  * ]
  */
diff --git a/jstests/aggregation/sources/unionWith/unionWith_explain.js b/jstests/aggregation/sources/unionWith/unionWith_explain.js
index 60d6a7ae4a4..a1db4d975e7 100644
--- a/jstests/aggregation/sources/unionWith/unionWith_explain.js
+++ b/jstests/aggregation/sources/unionWith/unionWith_explain.js
@@ -13,8 +13,6 @@ load("jstests/libs/fixture_helpers.js");      // For FixtureHelpers.
 load("jstests/libs/analyze_plan.js");         // For getAggPlanStage.
 load("jstests/libs/sbe_util.js");             // For checkSBEEnabled.
 
-const groupPushdownEnabled = checkSBEEnabled(db, ["featureFlagSBEGroupPushdown"]);
-
 const testDB = db.getSiblingDB(jsTestName());
 const collA = testDB.A;
 collA.drop();
diff --git a/jstests/aggregation/spill_to_disk.js b/jstests/aggregation/spill_to_disk.js
index e89a1a8ef31..df33065ea1f 100644
--- a/jstests/aggregation/spill_to_disk.js
+++ b/jstests/aggregation/spill_to_disk.js
@@ -27,7 +27,7 @@ const sharded = FixtureHelpers.isSharded(coll);
 
 const memoryLimitMB = sharded ? 200 : 100;
 
-const isSBELookupEnabled = checkSBEEnabled(db, ["featureFlagSBELookupPushdown"]);
+const isSBELookupEnabled = checkSBEEnabled(db);
 
 const bigStr = Array(1024 * 1024 + 1).toString();  // 1MB of ','
 for (let i = 0; i < memoryLimitMB + 1; i++)
diff --git a/jstests/auth/lib/commands_lib.js b/jstests/auth/lib/commands_lib.js
index a849f441a44..f67108737f0 100644
--- a/jstests/auth/lib/commands_lib.js
+++ b/jstests/auth/lib/commands_lib.js
@@ -3028,8 +3028,27 @@ var authCommandsLib = {
           ]
         },
         {
-          testname: "_configsvrCommitChunkMigration",
-          command: {_configsvrCommitChunkMigration: "x.y"},
+            testname: "_configsvrCommitChunkMigration",
+            command: {
+              _configsvrCommitChunkMigration: "db.fooHashed",
+              fromShard: "move_chunk_basic-rs0",
+              toShard: "move_chunk_basic-rs1",
+              migratedChunk: {
+                  lastmod: {
+                      e: new ObjectId('62b052ac7f5653479a67a54f'),
+                      t: new Timestamp(1655722668, 22),
+                      v: new Timestamp(1, 0)
+                  },
+                  min: {_id: MinKey}, 
+                  max: {_id: -4611686018427387902}
+              },
+              fromShardCollectionVersion: {
+                  e: new ObjectId('62b052ac7f5653479a67a54f'),
+                  t: new Timestamp(1655722668, 22),
+                  v: new Timestamp(1, 3)
+              },
+              validAfter: new Timestamp(1655722670, 6)
+          },
           skipSharded: true,
           expectFail: true,
           testcases: [
@@ -4179,7 +4198,7 @@ var authCommandsLib = {
           skipTest: (conn) => {
               const hello = assert.commandWorked(conn.getDB("admin").runCommand({hello: 1}));
               const isStandalone = hello.msg !== "isdbgrid" && !hello.hasOwnProperty('setName');
-              return !TestData.setParameters.featureFlagClusterWideConfig || isStandalone;
+              return isStandalone;
           },
           testcases: [
             {
@@ -5010,7 +5029,7 @@ var authCommandsLib = {
         },
         {
           testname: "s_moveChunk",
-          command: {moveChunk: "test.x"},
+          command: {moveChunk: "test.x", find:{}, to:"a"},
           skipUnlessSharded: true,
           testcases: [
               {
@@ -5672,7 +5691,7 @@ var authCommandsLib = {
           skipTest: (conn) => {
               const hello = assert.commandWorked(conn.getDB("admin").runCommand({hello: 1}));
               const isStandalone = hello.msg !== "isdbgrid" && !hello.hasOwnProperty('setName');
-              return !TestData.setParameters.featureFlagClusterWideConfig || isStandalone;
+              return isStandalone;
           },
           testcases: [
               {
diff --git a/jstests/auth/security_token.js b/jstests/auth/security_token.js
index 8ff91f8b6c2..82825dc6579 100644
--- a/jstests/auth/security_token.js
+++ b/jstests/auth/security_token.js
@@ -85,9 +85,11 @@ function runTest(conn, enabled, rst = undefined) {
     // Test that no token equates to unauthenticated.
     assert.commandFailed(tokenDB.runCommand({features: 1}));
 
-    // Passing a security token with unknown fields will always fail.
+    // Passing a security token with unknown fields will fail at the client
+    // while trying to construct a signed security token.
+    const kIDLParserUnknownField = 40415;
     tokenConn._setSecurityToken({invalid: 1});
-    assert.commandFailed(tokenDB.runCommand({ping: 1}));
+    assert.throwsWithCode(() => tokenDB.runCommand({ping: 1}), kIDLParserUnknownField);
     assertNoTokensProcessedYet(conn);
 
     const [token, expect] = makeTokenAndExpect('user1', 'admin');
@@ -153,13 +155,17 @@ function runTests(enabled) {
         runTest(standalone, enabled);
         MongoRunner.stopMongod(standalone);
     }
-    {
+
+    // TODO SERVER-66708 Run on replica sets as well. Currently the namespace from oplog entries
+    // won't be deserialized including the tenantId.
+    /*{
         const rst = new ReplSetTest({nodes: 2, nodeOptions: opts});
         rst.startSet({keyFile: 'jstests/libs/key1'});
         rst.initiate();
         runTest(rst.getPrimary(), enabled, rst);
         rst.stopSet();
-    }
+    }*/
+
     // Do not test sharding since mongos must have an authenticated connection to
     // all mongod nodes, and this conflicts with proxying tokens which we'll be
     // performing in mongoq.
diff --git a/jstests/auth/token_privileges.js b/jstests/auth/token_privileges.js
index f7d5c567b88..2c10e9caf44 100644
--- a/jstests/auth/token_privileges.js
+++ b/jstests/auth/token_privileges.js
@@ -72,11 +72,14 @@ const opts = {
     runTest(standalone);
     MongoRunner.stopMongod(standalone);
 }
-{
+
+// TODO SERVER-66708 Run on replica sets as well. Currently the namespace from oplog entries
+// won't be deserialized including the tenantId.
+/*{
     const rst = new ReplSetTest({nodes: 2, nodeOptions: opts});
     rst.startSet({keyFile: 'jstests/libs/key1'});
     rst.initiate();
     runTest(rst.getPrimary(), rst);
     rst.stopSet();
-}
+}*/
 })();
diff --git a/jstests/change_streams/apply_ops.js b/jstests/change_streams/apply_ops.js
index f123902b65a..89f605ceba4 100644
--- a/jstests/change_streams/apply_ops.js
+++ b/jstests/change_streams/apply_ops.js
@@ -1,5 +1,12 @@
-// Tests that a change stream will correctly unwind applyOps entries generated by a transaction.
-// @tags: [uses_transactions, requires_snapshot_read, requires_majority_read_concern]
+/**
+ * Tests that a change stream will correctly unwind applyOps entries generated by a transaction.
+ * @tags: [
+ *   uses_transactions,
+ *   requires_fcv_61, // Pre-6.1 builds do not emit change stream events for atomic applyOps.
+ *   requires_majority_read_concern,
+ *   requires_snapshot_read,
+ * ]
+ */
 
 (function() {
 "use strict";
diff --git a/jstests/change_streams/expanded_update_description.js b/jstests/change_streams/expanded_update_description.js
index 758257a51e1..51ee93381b9 100644
--- a/jstests/change_streams/expanded_update_description.js
+++ b/jstests/change_streams/expanded_update_description.js
@@ -23,19 +23,21 @@ assert.commandWorked(db.coll.insert({
     _id: 100,
     "topLevelArray": [{subArray: [0, [0, [{bottomArray: [1, 2, kLargeStr]}]], 2, 3, kLargeStr]}],
     "arrayForReplacement": [0, 1, 2, 3],
-    "array.For.Resize": [kLargeStr, 1],
+    "arrayForResize": [kLargeStr, 1],
     obj: {
         'sub.obj': {'d.o.t.t.e.d.a.r.r.a.y..': [[{a: {'b.c': 1, field: kLargeStr}}, "truncated"]]}
     },
     'd.o.t.t.e.d.o.b.j.': {'sub.obj': {'b.c': 2}},
+    'objectWithNumericField': {'0': {'1': 'numeric', field: kLargeStr}},
     "arrayWithNumericField": [[{'0': "numeric", a: {'b.c': 1}, field: kLargeStr}]],
+    "arrayWithDotted.AndNumericFields": [[{'0': [{'1.2': {'a.b': null, c: kLargeStr}}]}]],
 }));
 
 const changeStreamCursor = cst.startWatchingChanges(
     {pipeline: [{$changeStream: {showExpandedEvents: true}}], collection: db.coll});
 
-// Test to verify that 'specialFields.arrayIndices' reports all the arrayIndices along a path in the
-// presence of nested arrays.
+// Test that a path which only contains non-dotted fields and array indices is not reported under
+// 'disambiguatedPaths'.
 assert.commandWorked(db.coll.update({_id: 100}, {
     $set: {"a": 2, "topLevelArray.0.subArray.1.1.0.bottomArray.2": 3, "arrayForReplacement": [0]}
 }));
@@ -49,62 +51,45 @@ let expected = {
             {"arrayForReplacement": [0], "a": 2, "topLevelArray.0.subArray.1.1.0.bottomArray.2": 3},
         removedFields: [],
         truncatedArrays: [],
-        specialFields: {
-            arrayIndices: {
-                "topLevelArray": [0],
-                "topLevelArray.0.subArray": [1],
-                "topLevelArray.0.subArray.1": [1],
-                "topLevelArray.0.subArray.1.1": [0],
-                "topLevelArray.0.subArray.1.1.0.bottomArray": [2],
-            },
-            dottedFields: {}
-        }
+        disambiguatedPaths: {}
     },
 };
 cst.assertNextChangesEqual({cursor: changeStreamCursor, expectedChanges: [expected]});
 
-// Tests that an update modifying multiple array elements are all reported under
-// 'specialFields.arrayIndices'.
-assert.commandWorked(db.coll.update(
-    {_id: 100}, {$set: {"topLevelArray.0.subArray.2": 4, "topLevelArray.0.subArray.3": 5}}));
+// Tests that an update modifying a non-array numeric field name is reported as a string rather than
+// as an integer under 'disambiguatedPaths'. Array indexes are reported as integers.
+assert.commandWorked(
+    db.coll.update({_id: 100}, {$set: {"arrayWithNumericField.0.0.1": {"b.c": 1}}}));
 expected = {
     documentKey: {_id: 100},
     ns: {db: "test", coll: "coll"},
     operationType: "update",
     updateDescription: {
-        updatedFields: {"topLevelArray.0.subArray.2": 4, "topLevelArray.0.subArray.3": 5},
+        updatedFields: {"arrayWithNumericField.0.0.1": {"b.c": 1}},
         removedFields: [],
         truncatedArrays: [],
-        specialFields: {
-            arrayIndices: {topLevelArray: [0], "topLevelArray.0.subArray": [2, 3]},
-            dottedFields: {}
-        }
+        disambiguatedPaths: {"arrayWithNumericField.0.0.1": ["arrayWithNumericField", 0, 0, "1"]}
     },
 };
 cst.assertNextChangesEqual({cursor: changeStreamCursor, expectedChanges: [expected]});
 
-// Tests that an update modifying a non-array numeric field name is NOT reported under
-// 'specialFields.arrayIndices'.
-assert.commandWorked(
-    db.coll.update({_id: 100}, {$set: {"arrayWithNumericField.0.0.1": {"b.c": 1}}}));
+// Tests that an update modifying a non-array numeric field name is reported when no array indices
+// or dotted fields are present.
+assert.commandWorked(db.coll.update({_id: 100}, {$set: {"objectWithNumericField.0.1": "updated"}}));
 expected = {
     documentKey: {_id: 100},
     ns: {db: "test", coll: "coll"},
     operationType: "update",
     updateDescription: {
-        updatedFields: {"arrayWithNumericField.0.0.1": {"b.c": 1}},
+        updatedFields: {"objectWithNumericField.0.1": "updated"},
         removedFields: [],
         truncatedArrays: [],
-        specialFields: {
-            arrayIndices: {arrayWithNumericField: [0], "arrayWithNumericField.0": [0]},
-            dottedFields: {}
-        }
+        disambiguatedPaths: {"objectWithNumericField.0.1": ["objectWithNumericField", "0", "1"]}
     },
 };
 cst.assertNextChangesEqual({cursor: changeStreamCursor, expectedChanges: [expected]});
 
-// Tests that an update with $unset array does not report the array under
-// 'specialFields.arrayIndices'.
+// Tests that an update with $unset array does not report the array under 'disambiguatedPaths'.
 assert.commandWorked(db.coll.update({_id: 100}, [{$unset: ["arrayForReplacement"]}]));
 expected = {
     documentKey: {_id: 100},
@@ -114,15 +99,14 @@ expected = {
         updatedFields: {},
         removedFields: ["arrayForReplacement"],
         truncatedArrays: [],
-        specialFields: {arrayIndices: {}, dottedFields: {}}
+        disambiguatedPaths: {}
     },
 };
 cst.assertNextChangesEqual({cursor: changeStreamCursor, expectedChanges: [expected]});
 
-// Tests that an update with 'truncatedArrays' does not report the array under
-// 'specialFields.arrayIndices'.
+// Tests that an update with 'truncatedArrays' does not report the array under 'disambiguatedPaths'.
 assert.commandWorked(db.coll.update({_id: 100}, [
-    {$replaceWith: {$setField: {field: "array.For.Resize", input: '$$ROOT', value: [kLargeStr]}}},
+    {$replaceWith: {$setField: {field: "arrayForResize", input: '$$ROOT', value: [kLargeStr]}}},
 ]));
 expected = {
     documentKey: {_id: 100},
@@ -131,14 +115,13 @@ expected = {
     updateDescription: {
         updatedFields: {},
         removedFields: [],
-        truncatedArrays: [{field: "array.For.Resize", newSize: 1}],
-        specialFields: {arrayIndices: {}, dottedFields: {"": ["array.For.Resize"]}}
+        truncatedArrays: [{field: "arrayForResize", newSize: 1}],
+        disambiguatedPaths: {}
     },
 };
 cst.assertNextChangesEqual({cursor: changeStreamCursor, expectedChanges: [expected]});
 
-// Verify that the top-level dotted fields are reported with empty path-prefix under
-// 'specialFields.dottedFields'.
+// Verify that top-level dotted fields are reported under 'disambiguatedPaths'.
 assert.commandWorked(db.coll.update({_id: 100}, [
     {
         $replaceWith:
@@ -154,13 +137,13 @@ expected = {
         updatedFields: {"d.o.t.t.e.d.o.b.j.": {subObj: 1}, "new.Field.": 1},
         removedFields: [],
         truncatedArrays: [],
-        specialFields: {arrayIndices: {}, dottedFields: {"": ["d.o.t.t.e.d.o.b.j.", "new.Field."]}}
+        disambiguatedPaths:
+            {"d.o.t.t.e.d.o.b.j.": ["d.o.t.t.e.d.o.b.j."], "new.Field.": ["new.Field."]}
     },
 };
 cst.assertNextChangesEqual({cursor: changeStreamCursor, expectedChanges: [expected]});
 
-// Verify that a dotted fields can be reported under both 'specialFields.arrayIndices' and
-// 'specialFields.dottedFields'.
+// Test that a combination of dotted fields and array indices are reported in 'disambiguatedPaths'.
 assert.commandWorked(db.coll.update(
     {_id: 100}, [{
         $set: {
@@ -183,16 +166,39 @@ expected = {
         updatedFields: {"obj.sub.obj.d.o.t.t.e.d.a.r.r.a.y...0.0.a.b.c": 2},
         removedFields: [],
         truncatedArrays: [{field: "obj.sub.obj.d.o.t.t.e.d.a.r.r.a.y...0", newSize: 1}],
-        specialFields: {
-            arrayIndices: {
-                "obj.sub.obj.d.o.t.t.e.d.a.r.r.a.y..": [0],
-                "obj.sub.obj.d.o.t.t.e.d.a.r.r.a.y...0": [0]
-            },
-            dottedFields: {
-                "obj.sub.obj.d.o.t.t.e.d.a.r.r.a.y...0.0.a": ["b.c"],
-                "obj.sub.obj": ["d.o.t.t.e.d.a.r.r.a.y.."],
-                obj: ["sub.obj"]
-            }
+        disambiguatedPaths: {
+            "obj.sub.obj.d.o.t.t.e.d.a.r.r.a.y...0":
+                ["obj", "sub.obj", "d.o.t.t.e.d.a.r.r.a.y..", 0],
+            "obj.sub.obj.d.o.t.t.e.d.a.r.r.a.y...0.0.a.b.c":
+                ["obj", "sub.obj", "d.o.t.t.e.d.a.r.r.a.y..", 0, 0, "a", "b.c"],
+        }
+    },
+};
+cst.assertNextChangesEqual({cursor: changeStreamCursor, expectedChanges: [expected]});
+
+// Test that an update which modifies a path containing dotted, numeric and array index fields
+// distinguishes all three in 'disambiguatedPaths'.
+assert.commandWorked(
+    db.coll.update({_id: 100}, [{
+                       $replaceWith: {
+                           $setField: {
+                               field: "arrayWithDotted.AndNumericFields",
+                               input: '$$ROOT',
+                               value: {$literal: [[{'0': [{'1.2': {'a.b': true, c: kLargeStr}}]}]]}
+                           }
+                       }
+                   }]));
+expected = {
+    documentKey: {_id: 100},
+    ns: {db: "test", coll: "coll"},
+    operationType: "update",
+    updateDescription: {
+        updatedFields: {"arrayWithDotted.AndNumericFields.0.0.0.0.1.2.a.b": true},
+        removedFields: [],
+        truncatedArrays: [],
+        disambiguatedPaths: {
+            "arrayWithDotted.AndNumericFields.0.0.0.0.1.2.a.b":
+                ["arrayWithDotted.AndNumericFields", 0, 0, "0", 0, "1.2", "a.b"]
         }
     },
 };
diff --git a/jstests/change_streams/serverless/basic_read_from_change_collection.js b/jstests/change_streams/serverless/basic_read_from_change_collection.js
new file mode 100644
index 00000000000..26b0f5eee9c
--- /dev/null
+++ b/jstests/change_streams/serverless/basic_read_from_change_collection.js
@@ -0,0 +1,50 @@
+// Tests that a change stream can be opened on a change collection when one exists, and that an
+// exception is thrown if we attempt to open a stream while change streams are disabled.
+// @tags: [
+//   featureFlagServerlessChangeStreams,
+//   multiversion_incompatible,
+//   featureFlagMongoStore,
+// ]
+
+(function() {
+"use strict";
+
+// TODO SERVER-66632 replace this with change stream disablement command. Extend the test cases for
+// enablement/disablement combinations.
+function disableChangeStream(connection) {
+    const configDB = connection.getDB("config");
+    assert(configDB.system.change_collection.drop());
+}
+
+(function runInReplicaSet() {
+    // TODO SERVER-66892 remove test-fixtures and let change stream passthrough create the test
+    // environment.
+    const replSetTest = new ReplSetTest({nodes: 1});
+    replSetTest.startSet({setParameter: "multitenancySupport=true"});
+    replSetTest.initiate();
+    const connection = replSetTest.getPrimary();
+
+    // Insert a document to the 'stockPrice' collection.
+    const testDb = connection.getDB("test");
+    const csCursor1 = connection.getDB("test").stockPrice.watch([]);
+    testDb.stockPrice.insert({_id: "mdb", price: 250});
+
+    // Verify that the change stream observes the required event.
+    assert.soon(() => csCursor1.hasNext());
+    const event = csCursor1.next();
+    assert.eq(event.documentKey._id, "mdb");
+
+    // Disable the change stream while the change stream cursor is still opened.
+    disableChangeStream(connection);
+
+    // Verify that the cursor throws 'QueryPlanKilled' exception on doing get next.
+    assert.throwsWithCode(() => assert.soon(() => csCursor1.hasNext()), ErrorCodes.QueryPlanKilled);
+
+    // Open a new change stream cursor with change stream disabled state and verify that
+    // 'ChangeStreamNotEnabled' exception is thrown.
+    assert.throwsWithCode(() => connection.getDB("test").stock.watch([]),
+                          ErrorCodes.ChangeStreamNotEnabled);
+
+    replSetTest.stopSet();
+})();
+}());
diff --git a/jstests/change_streams/show_expanded_events.js b/jstests/change_streams/show_expanded_events.js
index 64027e4c8b9..ec5f265ced0 100644
--- a/jstests/change_streams/show_expanded_events.js
+++ b/jstests/change_streams/show_expanded_events.js
@@ -110,12 +110,8 @@ assertChangeEvent(() => assert.commandWorked(coll.update({_id: 0}, {$inc: {a: 1}
     ns,
     operationType: 'update',
     documentKey: {_id: 0},
-    updateDescription: {
-        removedFields: [],
-        updatedFields: {a: 3},
-        truncatedArrays: [],
-        specialFields: {arrayIndices: {}, dottedFields: {}}
-    },
+    updateDescription:
+        {removedFields: [], updatedFields: {a: 3}, truncatedArrays: [], disambiguatedPaths: {}},
 });
 
 // Test change stream event for 'remove' operation.
diff --git a/jstests/concurrency/fsm_workload_helpers/chunks.js b/jstests/concurrency/fsm_workload_helpers/chunks.js
index caa84a6c38c..2c71eda6a87 100644
--- a/jstests/concurrency/fsm_workload_helpers/chunks.js
+++ b/jstests/concurrency/fsm_workload_helpers/chunks.js
@@ -70,13 +70,16 @@ var ChunkHelper = (function() {
             moveChunk: db[collName].getFullName(),
             bounds: bounds,
             to: toShard,
-            _waitForDelete: waitForDelete
         };
 
+        if (waitForDelete != null) {
+            cmd._waitForDelete = waitForDelete;
+        }
+
         // Using _secondaryThrottle adds coverage for additional waits for write concern on the
         // recipient during cloning.
-        if (secondaryThrottle) {
-            cmd._secondaryThrottle = true;
+        if (secondaryThrottle != null) {
+            cmd._secondaryThrottle = secondaryThrottle;
             cmd.writeConcern = {w: "majority"};  // _secondaryThrottle requires a write concern.
         }
 
diff --git a/jstests/concurrency/fsm_workloads/agg_lookup.js b/jstests/concurrency/fsm_workloads/agg_lookup.js
index 9395c503d3c..2b78703b96c 100644
--- a/jstests/concurrency/fsm_workloads/agg_lookup.js
+++ b/jstests/concurrency/fsm_workloads/agg_lookup.js
@@ -70,12 +70,8 @@ var $config = (function() {
     function setup(db, collName, cluster) {
         // Do not run the rest of the tests if the foreign collection is implicitly sharded but the
         // flag to allow $lookup into a sharded collection is disabled.
-        const getParam = db.adminCommand({
-            getParameter: 1,
-            featureFlagShardedLookup: 1,
-            featureFlagSBELookupPushdown: 1,
-            internalQueryForceClassicEngine: 1
-        });
+        const getParam = db.adminCommand(
+            {getParameter: 1, featureFlagShardedLookup: 1, internalQueryForceClassicEngine: 1});
         const isShardedLookupEnabled = getParam.hasOwnProperty("featureFlagShardedLookup") &&
             getParam.featureFlagShardedLookup.value;
         if (FixtureHelpers.isSharded(db[collName]) && !isShardedLookupEnabled) {
@@ -96,9 +92,8 @@ var $config = (function() {
         assertWhenOwnColl.eq(this.numDocs, res.nInserted);
         assertWhenOwnColl.eq(this.numDocs, db[collName].find().itcount());
 
-        const isLookupPushdownEnabled = getParam.hasOwnProperty("featureFlagSBELookupPushdown") &&
+        const isLookupPushdownEnabled =
             getParam.hasOwnProperty("internalQueryForceClassicEngine") &&
-            getParam.featureFlagSBELookupPushdown.value &&
             !getParam.internalQueryForceClassicEngine.value;
 
         this.allowDiskUse = true;
diff --git a/jstests/concurrency/fsm_workloads/auth_drop_role.js b/jstests/concurrency/fsm_workloads/auth_drop_role.js
index 5cff820b3e9..275e56906d6 100644
--- a/jstests/concurrency/fsm_workloads/auth_drop_role.js
+++ b/jstests/concurrency/fsm_workloads/auth_drop_role.js
@@ -47,18 +47,25 @@ var $config = (function() {
 
             // Some test machines may hit high contention during these concurrency tests
             // allow for occaisional failure with retries.
-            for (var i = 3; i >= 0; --i) {
-                let dropResult = db.runCommand({dropRole: roleName, maxTimeMS: kMaxCmdTimeMs});
-
-                if (dropResult === true) {
-                    // Success
+            for (var i = 5; i >= 0; --i) {
+                let cmdResult;
+                try {
+                    cmdResult = db.runCommand({dropRole: roleName, maxTimeMS: kMaxCmdTimeMs});
+                    assert.commandWorked(cmdResult);
                     break;
-                } else if (i > 0) {
-                    // Failure, try again
-                    print("Retrying a dropRole() which resulted in: " + tojson(dropResult));
-                } else {
-                    // Out of do-overs, just die.
-                    assertAlways(dropResult);
+                } catch (e) {
+                    if (i > 0) {
+                        // Failure, try again
+                        print("Retrying dropRole(" + roleName + "), previous call resulted in " +
+                              tojson(cmdResult));
+                        if (cmdResult.code == ErrorCodes.SnapshotUnavailable) {
+                            // Give pending catalog changes a chance to catch up.
+                            sleep(5000);
+                        }
+                    } else {
+                        // Out of do-overs, just die.
+                        throw e;
+                    }
                 }
             }
 
diff --git a/jstests/concurrency/fsm_workloads/cleanupOrphanedWhileMigrating.js b/jstests/concurrency/fsm_workloads/cleanupOrphanedWhileMigrating.js
index b34c3d60e1f..ec673ba935d 100644
--- a/jstests/concurrency/fsm_workloads/cleanupOrphanedWhileMigrating.js
+++ b/jstests/concurrency/fsm_workloads/cleanupOrphanedWhileMigrating.js
@@ -3,7 +3,7 @@
 /**
  * Performs range deletions while chunks are being moved.
  *
- * @tags: [requires_sharding, assumes_balancer_on]
+ * @tags: [requires_sharding, assumes_balancer_on, antithesis_incompatible]
  */
 
 load('jstests/concurrency/fsm_libs/extend_workload.js');
diff --git a/jstests/concurrency/fsm_workloads/collection_defragmentation.js b/jstests/concurrency/fsm_workloads/collection_defragmentation.js
index 7ec9bb8b071..c84729399e5 100644
--- a/jstests/concurrency/fsm_workloads/collection_defragmentation.js
+++ b/jstests/concurrency/fsm_workloads/collection_defragmentation.js
@@ -5,7 +5,7 @@
  *
  * Runs defragmentation on collections with concurrent operations.
  *
- * @tags: [requires_sharding, assumes_balancer_on]
+ * @tags: [requires_sharding, assumes_balancer_on, antithesis_incompatible]
  */
 
 const dbPrefix = jsTestName() + '_DB_';
diff --git a/jstests/concurrency/fsm_workloads/collection_uuid.js b/jstests/concurrency/fsm_workloads/collection_uuid.js
index 7e16eac0246..011d3423a5b 100644
--- a/jstests/concurrency/fsm_workloads/collection_uuid.js
+++ b/jstests/concurrency/fsm_workloads/collection_uuid.js
@@ -54,6 +54,10 @@ const runCommandInLoop = function(
         // TODO (SERVER-64449): Get rid of this exception
         ErrorCodes.OBSOLETE_StaleShardVersion,
         ErrorCodes.QueryPlanKilled,
+        // StaleConfig is usually retried by the mongos, but in situations where multiple errors
+        // have ocurred on the same batch and MultipleErrorsOcurred is returned, one of the errors
+        // could be StaleConfig and the other could be one that mongos does not retry the batch on.
+        ErrorCodes.StaleConfig,
     ];
 
     let iteration = 0;
diff --git a/jstests/concurrency/fsm_workloads/create_collection_and_view.js b/jstests/concurrency/fsm_workloads/create_collection_and_view.js
index 5a17b8b3b7c..29bcddee65f 100644
--- a/jstests/concurrency/fsm_workloads/create_collection_and_view.js
+++ b/jstests/concurrency/fsm_workloads/create_collection_and_view.js
@@ -4,7 +4,7 @@
  * Repeatedly creates a collection and a view with the same namespace. Validates that we never
  * manage to have both a Collection and View created on the same namespace at the same time.
  *
- * @tags: [catches_command_failures]
+ * @tags: [catches_command_failures, antithesis_incompatible]
  */
 
 var $config = (function() {
diff --git a/jstests/concurrency/fsm_workloads/internal_transactions_kill_sessions.js b/jstests/concurrency/fsm_workloads/internal_transactions_kill_sessions.js
index 916479c6181..4d355c97b96 100644
--- a/jstests/concurrency/fsm_workloads/internal_transactions_kill_sessions.js
+++ b/jstests/concurrency/fsm_workloads/internal_transactions_kill_sessions.js
@@ -7,6 +7,7 @@
  * @tags: [
  *  requires_fcv_60,
  *  uses_transactions,
+ *  kills_random_sessions
  * ]
  */
 
@@ -23,11 +24,9 @@ var $config = extendWorkload($config, function($config, $super) {
     $config.data.insertInitialDocsOnSetUp = true;
 
     // The transaction API does not abort internal transactions that are interrupted after they
-    // have started to commit. Lowering the transactionLifetimeLimitSeconds enables a retry of a
-    // retryable write that uses such an interrupted internal transaction to not get blocked
-    // indefinitely (24 hours) due to the RetryableTransactionInProgress error.
-    // TODO (SERVER-66725): Make incoming retryable transactions abort conflicting transactions
-    // once.
+    // have started to commit. The first retry of that transaction will abort the open transaction,
+    // but will block if it happens again on that retry, so we lower the
+    // transactionLifetimeLimitSeconds so subsequent retries do not block indefinitely (24 hours).
     $config.data.lowerTransactionLifetimeLimitSeconds = true;
 
     $config.data.expectDirtyDocs = {
@@ -81,6 +80,15 @@ var $config = extendWorkload($config, function($config, $super) {
         return killSession(db, collName);
     };
 
+    $config.teardown = function teardown(db, collName, cluster) {
+        $super.teardown.apply(this, arguments);
+
+        // If a client session is killed and the transaction API is running a non-retryable
+        // transaction on that session, the API may be killed and unable to abort the transaction,
+        // leaving it open, which can block later tasks like CheckReplDBHash.
+        this.killAllSessions(cluster);
+    };
+
     $config.transitions = {
         init: {
             killSession: 0.2,
diff --git a/jstests/concurrency/fsm_workloads/internal_transactions_move_chunk.js b/jstests/concurrency/fsm_workloads/internal_transactions_move_chunk.js
index 122352d1d37..16062a9f36a 100644
--- a/jstests/concurrency/fsm_workloads/internal_transactions_move_chunk.js
+++ b/jstests/concurrency/fsm_workloads/internal_transactions_move_chunk.js
@@ -8,7 +8,8 @@
  * @tags: [
  *  requires_fcv_60,
  *  requires_sharding,
- *  uses_transactions
+ *  uses_transactions,
+ *  antithesis_incompatible
  * ]
  */
 load('jstests/concurrency/fsm_libs/extend_workload.js');
diff --git a/jstests/concurrency/fsm_workloads/internal_transactions_resharding.js b/jstests/concurrency/fsm_workloads/internal_transactions_resharding.js
index 23fa3fcc77e..d8443a2ef7b 100644
--- a/jstests/concurrency/fsm_workloads/internal_transactions_resharding.js
+++ b/jstests/concurrency/fsm_workloads/internal_transactions_resharding.js
@@ -8,7 +8,8 @@
  * @tags: [
  *  requires_fcv_60,
  *  requires_sharding,
- *  uses_transactions
+ *  uses_transactions,
+ *  antithesis_incompatible
  * ]
  */
 load('jstests/concurrency/fsm_libs/extend_workload.js');
@@ -26,10 +27,6 @@ var $config = extendWorkload($config, function($config, $super) {
     $config.data.currentShardKeyIndex = -1;
     $config.data.reshardingCount = 0;
 
-    // TODO SERVER-67076: Investigate segfault in resharding image collection agg stage with
-    // concurrent reaps.
-    $config.data.overrideReapThreshold = false;
-
     $config.data.getQueryForDocument = function getQueryForDocument(doc) {
         // The query for a write command against a sharded collection must contain the shard key.
         const query = $super.data.getQueryForDocument.apply(this, arguments);
diff --git a/jstests/concurrency/fsm_workloads/internal_transactions_sharded.js b/jstests/concurrency/fsm_workloads/internal_transactions_sharded.js
index f2c7b502a8d..018df932d56 100644
--- a/jstests/concurrency/fsm_workloads/internal_transactions_sharded.js
+++ b/jstests/concurrency/fsm_workloads/internal_transactions_sharded.js
@@ -8,7 +8,8 @@
  * @tags: [
  *  requires_fcv_60,
  *  requires_sharding,
- *  uses_transactions
+ *  uses_transactions,
+ *  antithesis_incompatible
  * ]
  */
 load('jstests/concurrency/fsm_libs/extend_workload.js');
@@ -105,9 +106,7 @@ var $config = extendWorkload($config, function($config, $super) {
             }
         }
 
-        if (this.overrideReapThreshold) {
-            this.overrideInternalTransactionsReapThreshold(cluster);
-        }
+        this.overrideInternalTransactionsReapThreshold(cluster);
         this.overrideStoreFindAndModifyImagesInSideCollection(cluster);
         if (this.lowerTransactionLifetimeLimitSeconds) {
             this.overrideTransactionLifetimeLimit(cluster);
diff --git a/jstests/concurrency/fsm_workloads/internal_transactions_sharded_from_mongod.js b/jstests/concurrency/fsm_workloads/internal_transactions_sharded_from_mongod.js
index f914980e6f4..f2ae41e3ac5 100644
--- a/jstests/concurrency/fsm_workloads/internal_transactions_sharded_from_mongod.js
+++ b/jstests/concurrency/fsm_workloads/internal_transactions_sharded_from_mongod.js
@@ -8,7 +8,8 @@
  * @tags: [
  *  requires_fcv_60,
  *  requires_sharding,
- *  uses_transactions
+ *  uses_transactions,
+ *  antithesis_incompatible
  * ]
  */
 load('jstests/concurrency/fsm_libs/extend_workload.js');
@@ -111,21 +112,16 @@ var $config = extendWorkload($config, function($config, $super) {
         }
     };
 
-    $config.data.killAllSessions = function killAllSessions(cluster) {
-        cluster.executeOnMongodNodes((db) => {
-            assert.commandWorked(db.adminCommand({killAllSessions: []}));
-        });
-    };
-
     $config.teardown = function teardown(db, collName, cluster) {
         $super.teardown.apply(this, arguments);
 
         // If a shard node that is acting as a router for an internal transaction is
-        // killed/terminated/stepped down, the transaction would be left in-progress since nothing
+        // killed/terminated/stepped down or the transaction's session is killed while running a
+        // non-retryable transaction, the transaction would be left in-progress since nothing
         // would aborted it. Such dangling transactions can cause the CheckReplDBHash hook to hang
         // as the fsyncLock command requires taking the global S lock and it cannot do that while
         // there is an in-progress transaction.
-        if (TestData.runningWithShardStepdowns) {
+        if (TestData.runningWithShardStepdowns || this.retryOnKilledSession) {
             this.killAllSessions(cluster);
         }
     };
diff --git a/jstests/concurrency/fsm_workloads/internal_transactions_sharded_from_mongod_kill_sessions.js b/jstests/concurrency/fsm_workloads/internal_transactions_sharded_from_mongod_kill_sessions.js
index edaba22247c..013850a86d6 100644
--- a/jstests/concurrency/fsm_workloads/internal_transactions_sharded_from_mongod_kill_sessions.js
+++ b/jstests/concurrency/fsm_workloads/internal_transactions_sharded_from_mongod_kill_sessions.js
@@ -10,6 +10,7 @@
  *  requires_fcv_60,
  *  requires_sharding,
  *  uses_transactions,
+ *  antithesis_incompatible
  * ]
  */
 
@@ -25,11 +26,9 @@ var $config = extendWorkload($config, function($config, $super) {
     $config.data.insertInitialDocsOnSetUp = true;
 
     // The transaction API does not abort internal transactions that are interrupted after they
-    // have started to commit. Lowering the transactionLifetimeLimitSeconds enables a retry of a
-    // retryable write that uses such an interrupted internal transaction to not get blocked
-    // indefinitely (24 hours) due to the RetryableTransactionInProgress error.
-    // TODO (SERVER-66725): Make incoming retryable transactions abort conflicting transactions
-    // once.
+    // have started to commit. The first retry of that transaction will abort the open transaction,
+    // but will block if it happens again on that retry, so we lower the
+    // transactionLifetimeLimitSeconds so subsequent retries do not block indefinitely (24 hours).
     $config.data.lowerTransactionLifetimeLimitSeconds = true;
 
     $config.data.expectDirtyDocs = {
diff --git a/jstests/concurrency/fsm_workloads/internal_transactions_unsharded.js b/jstests/concurrency/fsm_workloads/internal_transactions_unsharded.js
index c1ecbe773c2..828fb3357b6 100644
--- a/jstests/concurrency/fsm_workloads/internal_transactions_unsharded.js
+++ b/jstests/concurrency/fsm_workloads/internal_transactions_unsharded.js
@@ -78,7 +78,6 @@ var $config = extendWorkload($config, function($config, $super) {
     // The reap threshold is overriden to get coverage for when it schedules reaps during an active
     // workload.
     $config.data.originalInternalSessionReapThreshold = {};
-    $config.data.overrideReapThreshold = true;
 
     // This workload supports setting the 'transactionLifetimeLimitSeconds' to 45 seconds
     // (configurable) during setup() and restoring the original value during teardown().
@@ -547,6 +546,12 @@ var $config = extendWorkload($config, function($config, $super) {
         });
     };
 
+    $config.data.killAllSessions = function killAllSessions(cluster) {
+        cluster.executeOnMongodNodes((db) => {
+            assert.commandWorked(db.adminCommand({killAllSessions: []}));
+        });
+    };
+
     $config.setup = function setup(db, collName, cluster) {
         assert.commandWorked(db.createCollection(collName, {writeConcern: {w: "majority"}}));
         if (this.insertInitialDocsOnSetUp) {
@@ -556,9 +561,7 @@ var $config = extendWorkload($config, function($config, $super) {
                 this.insertInitialDocuments(db, collName, tid);
             }
         }
-        if (this.overrideReapThreshold) {
-            this.overrideInternalTransactionsReapThreshold(cluster);
-        }
+        this.overrideInternalTransactionsReapThreshold(cluster);
         this.overrideStoreFindAndModifyImagesInSideCollection(cluster);
         if (this.lowerTransactionLifetimeLimitSeconds) {
             this.overrideTransactionLifetimeLimit(cluster);
@@ -566,9 +569,7 @@ var $config = extendWorkload($config, function($config, $super) {
     };
 
     $config.teardown = function teardown(db, collName, cluster) {
-        if (this.overrideReapThreshold) {
-            this.restoreInternalTransactionsReapThreshold(cluster);
-        }
+        this.restoreInternalTransactionsReapThreshold(cluster);
         this.restoreStoreFindAndModifyImagesInSideCollection(cluster);
         if (this.lowerTransactionLifetimeLimitSeconds) {
             this.restoreTransactionLifetimeLimit(cluster);
diff --git a/jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_server_status_mongos.js b/jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_server_status_mongos.js
index b1d4929b764..9f4ff6a31e1 100644
--- a/jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_server_status_mongos.js
+++ b/jstests/concurrency/fsm_workloads/multi_statement_transaction_atomicity_isolation_server_status_mongos.js
@@ -2,8 +2,10 @@
 
 /**
  * Verifies the transactions server status metrics on mongos while running transactions.
+ * Temporarily disabled for BF-24311.
  *
- * @tags: [requires_sharding, assumes_snapshot_transactions, uses_transactions]
+ * @tags: [__TEMPORARILY_DISABLED__, requires_sharding, assumes_snapshot_transactions,
+ * uses_transactions]
  */
 
 load('jstests/concurrency/fsm_libs/extend_workload.js');  // for extendWorkload
diff --git a/jstests/concurrency/fsm_workloads/multi_statement_transaction_kill_sessions_atomicity_isolation.js b/jstests/concurrency/fsm_workloads/multi_statement_transaction_kill_sessions_atomicity_isolation.js
index 9970bb46a31..a1861646220 100644
--- a/jstests/concurrency/fsm_workloads/multi_statement_transaction_kill_sessions_atomicity_isolation.js
+++ b/jstests/concurrency/fsm_workloads/multi_statement_transaction_kill_sessions_atomicity_isolation.js
@@ -3,7 +3,7 @@
 /**
  * Tests periodically killing sessions that are running transactions.
  *
- * @tags: [uses_transactions, assumes_snapshot_transactions]
+ * @tags: [uses_transactions, assumes_snapshot_transactions, kills_random_sessions]
  */
 
 load('jstests/concurrency/fsm_libs/extend_workload.js');           // for extendWorkload
diff --git a/jstests/concurrency/fsm_workloads/multi_statement_transaction_simple_kill_sessions.js b/jstests/concurrency/fsm_workloads/multi_statement_transaction_simple_kill_sessions.js
index 359562be59b..94b595825cc 100644
--- a/jstests/concurrency/fsm_workloads/multi_statement_transaction_simple_kill_sessions.js
+++ b/jstests/concurrency/fsm_workloads/multi_statement_transaction_simple_kill_sessions.js
@@ -5,7 +5,7 @@
  * transactions with two writes, which will require two phase commit in a sharded cluster if each
  * write targets a different shard.
  *
- * @tags: [uses_transactions, assumes_snapshot_transactions]
+ * @tags: [uses_transactions, assumes_snapshot_transactions, kills_random_sessions]
  */
 
 load('jstests/concurrency/fsm_libs/extend_workload.js');           // for extendWorkload
diff --git a/jstests/core/api_version_new_50_language_features.js b/jstests/core/api_version_new_50_language_features.js
index cda462fdff3..8d73124b2eb 100644
--- a/jstests/core/api_version_new_50_language_features.js
+++ b/jstests/core/api_version_new_50_language_features.js
@@ -2,6 +2,7 @@
  * Tests that language features introduced in version 4.9 or 5.0 are included in API Version 1.
  *
  * @tags: [
+ *   requires_fcv_60,
  *   uses_api_parameters,
  * ]
  */
diff --git a/jstests/core/api_version_new_51_language_features.js b/jstests/core/api_version_new_51_language_features.js
index a86d6007b41..48e73d7e052 100644
--- a/jstests/core/api_version_new_51_language_features.js
+++ b/jstests/core/api_version_new_51_language_features.js
@@ -2,8 +2,8 @@
  * Tests that language features introduced in version 5.1 are included in API Version 1.
  *
  * @tags: [
- *   requires_fcv_51,
- *   uses_api_parameters,
+ *   requires_fcv_60,
+ *   uses_api_parameters
  * ]
  */
 
diff --git a/jstests/core/api_version_new_52_language_features.js b/jstests/core/api_version_new_52_language_features.js
index 91fe800f6d8..f37666157dd 100644
--- a/jstests/core/api_version_new_52_language_features.js
+++ b/jstests/core/api_version_new_52_language_features.js
@@ -2,7 +2,7 @@
  * Tests that language features introduced in version 5.2 are included in API Version 1.
  *
  * @tags: [
- *   requires_fcv_52,
+ *   requires_fcv_60,
  *   uses_api_parameters,
  * ]
  */
diff --git a/jstests/core/capped_resize.js b/jstests/core/capped_resize.js
index 38cc7abd9a0..06baab6b21e 100644
--- a/jstests/core/capped_resize.js
+++ b/jstests/core/capped_resize.js
@@ -83,6 +83,14 @@ let verifyLimitUpdate = function(updates) {
     assert.eq(stats.count, initialDocSize);
     assert.lte(stats.size, maxSize);
 
+    // We used to not allow resizing the size of a capped collection below 4096 bytes. This
+    // restriction was lifted in SERVER-67036.
+    // We should see a reduction in collection size and count relative to the previous test case.
+    verifyLimitUpdate({cappedSize: 256});
+    stats = assert.commandWorked(cappedColl.stats());
+    assert.lt(stats.count, initialDocSize);
+    assert.lt(stats.size, maxSize);
+
     // We expect the resizing of a capped collection to fail when maxSize <= 0 and maxSize >
     // maxSizeCeiling.
     const negativeSize = -1 * maxSize;
diff --git a/jstests/core/check_shard_index.js b/jstests/core/check_shard_index.js
index 61e84c038b1..2beb3c12891 100644
--- a/jstests/core/check_shard_index.js
+++ b/jstests/core/check_shard_index.js
@@ -150,4 +150,69 @@ res = db.runCommand(
     {checkShardingIndex: "test.jstests_shardingindex", keyPattern: {x: 1, y: 1, z: 1}});
 assert.eq(false, res.ok, "4e " + tojson(res));
 
+// -------------------------
+// Test error messages of checkShardingIndex failing:
+
+// Shard key is not a prefix of index key:
+f.drop();
+f.createIndex({x: 1});
+res = db.runCommand({checkShardingIndex: "test.jstests_shardingindex", keyPattern: {y: 1}});
+assert.eq(false, res.ok);
+assert(res.errmsg.includes("Shard key is not a prefix of index key."));
+
+// Index key is partial:
+f.drop();
+f.createIndex({x: 1, y: 1}, {partialFilterExpression: {y: {$gt: 0}}});
+res = db.runCommand({checkShardingIndex: "test.jstests_shardingindex", keyPattern: {x: 1, y: 1}});
+assert.eq(false, res.ok);
+assert(res.errmsg.includes("Index key is partial."));
+
+// Index key is sparse:
+f.drop();
+f.createIndex({x: 1, y: 1}, {sparse: true});
+res = db.runCommand({checkShardingIndex: "test.jstests_shardingindex", keyPattern: {x: 1, y: 1}});
+assert.eq(false, res.ok);
+assert(res.errmsg.includes("Index key is sparse."));
+
+// Index key is multikey:
+f.drop();
+f.createIndex({x: 1, y: 1});
+f.save({y: [1, 2, 3, 4, 5]});
+res = db.runCommand({checkShardingIndex: "test.jstests_shardingindex", keyPattern: {x: 1, y: 1}});
+assert.eq(false, res.ok);
+assert(res.errmsg.includes("Index key is multikey."));
+
+// Index key has a non-simple collation:
+f.drop();
+f.createIndex({x: 1, y: 1}, {collation: {locale: "en"}});
+res = db.runCommand({checkShardingIndex: "test.jstests_shardingindex", keyPattern: {x: 1, y: 1}});
+assert.eq(false, res.ok);
+assert(res.errmsg.includes("Index has a non-simple collation."));
+
+// Index key is sparse and index has non-simple collation:
+f.drop();
+f.createIndex({x: 1, y: 1}, {sparse: true, collation: {locale: "en"}});
+res = db.runCommand({checkShardingIndex: "test.jstests_shardingindex", keyPattern: {x: 1, y: 1}});
+assert.eq(false, res.ok);
+assert(res.errmsg.includes("Index key is sparse.") &&
+       res.errmsg.includes("Index has a non-simple collation."));
+
+// Multiple incompatible indexes: Index key is multikey and is partial:
+f.drop();
+f.createIndex({x: 1, y: 1}, {name: "index_1_part", partialFilterExpression: {x: {$gt: 0}}});
+f.createIndex({x: 1, y: 1}, {name: "index_2"});
+f.save({y: [1, 2, 3, 4, 5]});
+res = db.runCommand({checkShardingIndex: "test.jstests_shardingindex", keyPattern: {x: 1, y: 1}});
+assert.eq(false, res.ok);
+assert(res.errmsg.includes("Index key is multikey.") &&
+       res.errmsg.includes("Index key is partial."));
+
+// Multiple incompatible indexes: Index key is partial and sparse:
+f.drop();
+f.createIndex({x: 1, y: 1}, {name: "index_1_part", partialFilterExpression: {x: {$gt: 0}}});
+f.createIndex({x: 1, y: 1}, {name: "index_2_sparse", sparse: true});
+res = db.runCommand({checkShardingIndex: "test.jstests_shardingindex", keyPattern: {x: 1, y: 1}});
+assert.eq(false, res.ok);
+assert(res.errmsg.includes("Index key is partial.") && res.errmsg.includes("Index key is sparse."));
+
 print("PASSED");
diff --git a/jstests/core/collection_uuid_coll_mod.js b/jstests/core/collection_uuid_coll_mod.js
index 52da75bb345..2e984d81713 100644
--- a/jstests/core/collection_uuid_coll_mod.js
+++ b/jstests/core/collection_uuid_coll_mod.js
@@ -57,7 +57,7 @@ assert.eq(res.actualCollection, null);
 
 // 5. The command fails when the provided UUID corresponds to a different collection, even if the
 // provided namespace does not exist.
-coll2.drop();
+assert.commandWorked(testDB.runCommand({drop: coll2.getName()}));
 res = assert.commandFailedWithCode(
     testDB.runCommand({collMod: coll2.getName(), collectionUUID: uuid}),
     ErrorCodes.CollectionUUIDMismatch);
@@ -65,4 +65,5 @@ assert.eq(res.db, testDB.getName());
 assert.eq(res.collectionUUID, uuid);
 assert.eq(res.expectedCollection, coll2.getName());
 assert.eq(res.actualCollection, coll.getName());
+assert(!testDB.getCollectionNames().includes(coll2.getName()));
 })();
diff --git a/jstests/core/collection_uuid_drop.js b/jstests/core/collection_uuid_drop.js
index 276f591afdf..747a4b114ed 100644
--- a/jstests/core/collection_uuid_drop.js
+++ b/jstests/core/collection_uuid_drop.js
@@ -58,13 +58,14 @@ assert.eq(res.actualCollection, null);
 
 // The command fails when the provided UUID corresponds to a different collection, even if the
 // provided namespace does not exist.
-coll2.drop();
+assert.commandWorked(testDB.runCommand({drop: coll2.getName()}));
 res = assert.commandFailedWithCode(testDB.runCommand({drop: coll2.getName(), collectionUUID: uuid}),
                                    ErrorCodes.CollectionUUIDMismatch);
 assert.eq(res.db, testDB.getName());
 assert.eq(res.collectionUUID, uuid);
 assert.eq(res.expectedCollection, coll2.getName());
 assert.eq(res.actualCollection, coll.getName());
+assert(!testDB.getCollectionNames().includes(coll2.getName()));
 
 // The command fails when the provided UUID corresponds to a different collection, even if the
 // provided namespace is a view.
diff --git a/jstests/core/collection_uuid_find.js b/jstests/core/collection_uuid_find.js
index f3ce69f0e34..93d6d011266 100644
--- a/jstests/core/collection_uuid_find.js
+++ b/jstests/core/collection_uuid_find.js
@@ -56,13 +56,15 @@ assert.eq(res.actualCollection, null);
 
 // The command fails when the provided UUID corresponds to a different collection, even if the
 // provided namespace does not exist.
-coll2.drop();
+assert.commandWorkedOrFailedWithCode(testDB.runCommand({drop: coll2.getName()}),
+                                     ErrorCodes.NamespaceNotFound);
 res = assert.commandFailedWithCode(testDB.runCommand({find: coll2.getName(), collectionUUID: uuid}),
                                    ErrorCodes.CollectionUUIDMismatch);
 assert.eq(res.db, testDB.getName());
 assert.eq(res.collectionUUID, uuid);
 assert.eq(res.expectedCollection, coll2.getName());
 assert.eq(res.actualCollection, coll.getName());
+assert(!testDB.getCollectionNames().includes(coll2.getName()));
 
 // The command fails when the provided UUID corresponds to a different collection, even if the
 // provided namespace is a view.
diff --git a/jstests/core/collection_uuid_index_commands.js b/jstests/core/collection_uuid_index_commands.js
index a323859144c..76ee83d8336 100644
--- a/jstests/core/collection_uuid_index_commands.js
+++ b/jstests/core/collection_uuid_index_commands.js
@@ -82,10 +82,11 @@ const testCommand = function(cmd, cmdObj) {
     jsTestLog("The command '" + cmd +
               "' fails when the provided UUID corresponds to a different collection, even if the " +
               "provided namespace does not exist.");
-    coll2.drop();
+    assert.commandWorked(testDB.runCommand({drop: coll2.getName()}));
     res =
         assert.commandFailedWithCode(testDB.runCommand(cmdObj), ErrorCodes.CollectionUUIDMismatch);
     validateErrorResponse(res, testDB.getName(), uuid, coll2.getName(), coll.getName());
+    assert(!testDB.getCollectionNames().includes(coll2.getName()));
 
     jsTestLog("Only collections in the same database are specified by actualCollection.");
     const otherDB = testDB.getSiblingDB(testDB.getName() + '_2');
diff --git a/jstests/core/collection_uuid_rename_collection.js b/jstests/core/collection_uuid_rename_collection.js
index 3dc99fe98ab..85e8507c9d2 100644
--- a/jstests/core/collection_uuid_rename_collection.js
+++ b/jstests/core/collection_uuid_rename_collection.js
@@ -162,7 +162,7 @@ assert.eq(res.actualCollection, null);
 
 // The command fails when the provided UUID corresponds to a different collection, even if the
 // provided source namespace does not exist.
-coll2.drop();
+assert.commandWorked(testDB.runCommand({drop: coll2.getName()}));
 res = assert.commandFailedWithCode(testDB.adminCommand({
     renameCollection: coll2.getFullName(),
     to: coll3.getFullName(),
@@ -174,6 +174,7 @@ assert.eq(res.db, testDB.getName());
 assert.eq(res.collectionUUID, uuid(coll));
 assert.eq(res.expectedCollection, coll2.getName());
 assert.eq(res.actualCollection, coll.getName());
+assert(!testDB.getCollectionNames().includes(coll2.getName()));
 
 // The collectionUUID parameter cannot be provided when renaming a collection between databases.
 const otherDBColl = db.getSiblingDB(jsTestName() + '_2').coll;
diff --git a/jstests/core/collection_uuid_write_commands.js b/jstests/core/collection_uuid_write_commands.js
index 03bd0b09ae7..0ab9794df6f 100644
--- a/jstests/core/collection_uuid_write_commands.js
+++ b/jstests/core/collection_uuid_write_commands.js
@@ -57,10 +57,12 @@ var testCommand = function(cmd, cmdObj) {
     jsTestLog("The command '" + cmd +
               "' fails when the provided UUID corresponds to a different collection, even if the " +
               "provided namespace does not exist.");
-    coll2.drop();
+    assert.commandWorkedOrFailedWithCode(testDB.runCommand({drop: coll2.getName()}),
+                                         ErrorCodes.NamespaceNotFound);
     res =
         assert.commandFailedWithCode(testDB.runCommand(cmdObj), ErrorCodes.CollectionUUIDMismatch);
     validateErrorResponse(res, testDB.getName(), uuid, coll2.getName(), coll.getName());
+    assert(!testDB.getCollectionNames().includes(coll2.getName()));
 
     jsTestLog("Only collections in the same database are specified by actualCollection.");
     const otherDB = testDB.getSiblingDB(testDB.getName() + '_2');
@@ -75,5 +77,7 @@ var testCommand = function(cmd, cmdObj) {
 
 testCommand("insert", {insert: "", documents: [{inserted: true}]});
 testCommand("update", {update: "", updates: [{q: {_id: 0}, u: {$set: {updated: true}}}]});
+testCommand("update",
+            {update: "", updates: [{q: {_id: 0}, u: {$set: {updated: true}}, upsert: true}]});
 testCommand("delete", {delete: "", deletes: [{q: {_id: 0}, limit: 1}]});
 })();
diff --git a/jstests/core/columnstore_index_correctness.js b/jstests/core/columnstore_index_correctness.js
index b0115af1398..324b32def1e 100644
--- a/jstests/core/columnstore_index_correctness.js
+++ b/jstests/core/columnstore_index_correctness.js
@@ -511,6 +511,7 @@ const docs = [
     {a: {b: [{c: [1, 2]}]}},
     {a: {b: {c: [1, 2]}}},
     {a: [[1, 2], [{b: [[1, 2], [{c: [[1, 2], [{}], 2]}], 2]}], 2]},
+    {a: [{m: 1, n: 2}, {m: 2, o: 1}]},
 ];
 
 let docNum = 0;
@@ -537,7 +538,7 @@ const kProjection = {
 };
 
 // Run an explain.
-const explain = coll.find({}, kProjection).explain();
+let explain = coll.find({}, kProjection).explain();
 assert(planHasStage(db, explain, "COLUMN_SCAN"), explain);
 
 // Run a query getting all of the results using the column index.
@@ -549,4 +550,24 @@ for (let res of results) {
     const originalDoc = coll.findOne({num: res.num});
     assert.eq(res, trueResult, originalDoc);
 }
+
+// Run a similar query that projects multiple fields with a shared parent object.
+const kSiblingProjection = {
+    _id: 0,
+    "a.m": 1,
+    "a.n": 1,
+    num: 1
+};
+
+explain = coll.find({}, kSiblingProjection).explain();
+assert(planHasStage(db, explain, "COLUMN_SCAN"), explain);
+
+results = coll.find({}, kSiblingProjection).toArray();
+assert.gt(results.length, 0);
+for (let res of results) {
+    const trueResult =
+        coll.find({num: res.num}, kSiblingProjection).hint({$natural: 1}).toArray()[0];
+    const originalDoc = coll.findOne({num: res.num});
+    assert.eq(res, trueResult, originalDoc);
+}
 })();
diff --git a/jstests/core/count.js b/jstests/core/count.js
index c34f8721cef..f0ea858eca0 100644
--- a/jstests/core/count.js
+++ b/jstests/core/count.js
@@ -1,13 +1,21 @@
-// @tags: [requires_fastcount, assumes_against_mongod_not_mongos]
+/**
+ * Tests that count() in shell takes query.
+ *
+ * @tags: [
+ *     requires_fastcount,
+ *     assumes_against_mongod_not_mongos,
+ * ]
+ */
 
 (function() {
 "use strict";
 
-const coll = db.jstests_count;
+const collNamePrefix = 'jstests_count_';
+let collCount = 0;
 
+let coll = db.getCollection(collNamePrefix + collCount++);
 coll.drop();
-assert.commandWorked(coll.insert({i: 1}));
-assert.commandWorked(coll.insert({i: 2}));
+assert.commandWorked(coll.insert([{_id: 1, i: 1}, {_id: 2, i: 2}]));
 assert.eq(1, coll.find({i: 1}).count());
 assert.eq(1, coll.count({i: 1}));
 assert.eq(2, coll.find().count());
@@ -15,15 +23,17 @@ assert.eq(2, coll.find(undefined).count());
 assert.eq(2, coll.find(null).count());
 assert.eq(2, coll.count());
 
+coll = db.getCollection(collNamePrefix + collCount++);
 coll.drop();
-assert.commandWorked(coll.insert({a: true, b: false}));
 assert.commandWorked(coll.createIndex({b: 1, a: 1}));
+assert.commandWorked(coll.insert({a: true, b: false}));
 assert.eq(1, coll.find({a: true, b: false}).count());
 assert.eq(1, coll.find({b: false, a: true}).count());
 
+coll = db.getCollection(collNamePrefix + collCount++);
 coll.drop();
-assert.commandWorked(coll.insert({a: true, b: false}));
 assert.commandWorked(coll.createIndex({b: 1, a: 1, c: 1}));
+assert.commandWorked(coll.insert({a: true, b: false}));
 
 assert.eq(1, coll.find({a: true, b: false}).count());
 assert.eq(1, coll.find({b: false, a: true}).count());
diff --git a/jstests/core/exhaust.js b/jstests/core/exhaust.js
index 8a59ac54938..4508e7a51e6 100644
--- a/jstests/core/exhaust.js
+++ b/jstests/core/exhaust.js
@@ -71,5 +71,14 @@ assert.eq(
 assert.eq(
     numDocs - 1,
     coll.find().addOption(DBQuery.Option.exhaust).limit(numDocs - 1).batchSize(100).itcount());
+
+// Test that exhaust with negative limit is allowed. A negative limit means "single batch": the
+// server will return just a single batch and then close the cursor, even if the limit has not yet
+// been reached. When the batchSize is not specified explicitly, we expect the default initial batch
+// size of 101 to be used.
+assert.eq(101, coll.find().addOption(DBQuery.Option.exhaust).limit(-numDocs).itcount());
+assert.eq(50,
+          coll.find().addOption(DBQuery.Option.exhaust).limit(-numDocs).batchSize(50).itcount());
+assert.eq(1, coll.find().addOption(DBQuery.Option.exhaust).limit(-1).itcount());
 }());
 }());
diff --git a/jstests/core/geo_parse_err.js b/jstests/core/geo_parse_err.js
new file mode 100644
index 00000000000..73bc451bd7c
--- /dev/null
+++ b/jstests/core/geo_parse_err.js
@@ -0,0 +1,124 @@
+/**
+ * Test the error messages users get when creating geo objects. For example:
+ * - Do we get the error message we expect when:
+ * - We insert something of a different type than an array of doubles for coordinates?
+ * - When the number of loops in a simple polygon exceeds 1?
+ * @tags: [
+ *  multiversion_incompatible
+ * ]
+ */
+
+(function() {
+"use strict";
+let t = db.geo_parse_err;
+t.drop();
+
+const indexname = "2dsphere";
+const bigCRS = {
+    type: "name",
+    properties: {name: "urn:x-mongodb:crs:strictwinding:EPSG:4326"}
+};
+
+t.createIndex({loc: indexname});
+
+// parseFlatPoint
+let err = t.insert({loc: {type: "Point", coordinates: "hello"}});
+assert.includes(err.getWriteError().errmsg,
+                'Point must be an array or object, instead got type string');
+
+err = t.insert({loc: {type: "Point", coordinates: ["hello", 5]}});
+assert.includes(err.getWriteError().errmsg,
+                "Point must only contain numeric elements, instead got type string");
+
+err = t.insert({loc: {type: "Point", coordinates: [5 / 0, 5]}});
+assert.includes(err.getWriteError().errmsg, "Point coordinates must be finite numbers");
+
+// parseGeoJSONCoordinate
+err = t.insert({loc: {type: "LineString", coordinates: [5, 5]}});
+assert.includes(err.getWriteError().errmsg,
+                "GeoJSON coordinates must be an array, instead got type double");
+
+// parseArrayOfCoordinates
+err = t.insert({loc: {type: "LineString", coordinates: 5}});
+assert.includes(err.getWriteError().errmsg,
+                "GeoJSON coordinates must be an array of coordinates, instead got type double");
+// isLoopClosed
+err = t.insert({loc: {type: "Polygon", coordinates: [[[0, 0], [1, 2], [2, 3]]]}});
+assert.includes(err.getWriteError().errmsg,
+                "Loop is not closed, first vertex does not equal last vertex:");
+
+// parseGeoJSONPolygonCoordinates
+err = t.insert({loc: {type: "Polygon", coordinates: "hi"}});
+assert.includes(err.getWriteError().errmsg,
+                "Polygon coordinates must be an array, instead got type string");
+
+err = t.insert({loc: {type: "Polygon", coordinates: [[[0, 0], [1, 2], [0, 0]]]}});
+assert.includes(err.getWriteError().errmsg,
+                "Loop must have at least 3 different vertices, 2 unique vertices were provided:");
+
+// parseBigSimplePolygonCoordinates
+err = t.insert({loc: {type: "Polygon", coordinates: "", crs: bigCRS}});
+assert.includes(err.getWriteError().errmsg,
+                "Coordinates of polygon must be an array, instead got type string");
+
+err = t.insert({
+    loc: {
+        type: "Polygon",
+        coordinates:
+            [[[10.0, 10.0], [-10.0, 10.0], [-10.0, -10.0], [10.0, -10.0], [10.0, 10.0]], []],
+        crs: bigCRS
+    }
+});
+assert.includes(err.getWriteError().errmsg,
+                "Only one simple loop is allowed in a big polygon, instead provided 2");
+err = t.insert({
+    loc: {type: "Polygon", coordinates: [[[10.0, 10.0], [-10.0, 10.0], [10.0, 10.0]]], crs: bigCRS}
+});
+assert.includes(err.getWriteError().errmsg,
+                "Loop must have at least 3 different vertices, 2 unique vertices were provided:");
+
+// parseGeoJSONCRS
+const bigPoly20 = [[[10.0, 10.0], [-10.0, 10.0], [10.0, 10.0]]];
+
+err = t.insert({loc: {type: "Polygon", coordinates: bigPoly20, crs: {type: "name"}}});
+
+assert.includes(err.getWriteError().errmsg,
+                "CRS must have field \"properties\" which is an object, instead got type missing");
+
+err = t.insert({
+    loc: {
+        type: "Polygon",
+        coordinates: bigPoly20,
+        crs: {type: "name", properties: {nam: "urn:x-mongodb:crs:strictwinding:EPSG:4326"}}
+    }
+});
+assert.includes(err.getWriteError().errmsg,
+                "In CRS, \"properties.name\" must be a string, instead got type missing");
+
+// parseMultiPolygon
+err = t.insert({loc: {type: "MultiPolygon", coordinates: ""}});
+
+assert.includes(err.getWriteError().errmsg,
+                "MultiPolygon coordinates must be an array, instead got type string");
+
+// Geometry collection
+err = t.insert({
+    loc: {
+        type: "GeometryCollection",
+        geometries: [
+            {
+                type: "MultiPoint",
+                coordinates: [
+                    [-73.9580, 40.8003],
+                    [-73.9498, 40.7968],
+                    [-73.9737, 40.7648],
+                    [-73.9814, 40.7681]
+                ]
+            },
+            5
+        ]
+    }
+});
+assert.includes(err.getWriteError().errmsg,
+                "Element 1 of \"geometries\" must be an object, instead got type double:");
+})();
+\ No newline at end of file
diff --git a/jstests/core/geo_s2index.js b/jstests/core/geo_s2index.js
index e110cb1ce04..af4475a79cb 100644
--- a/jstests/core/geo_s2index.js
+++ b/jstests/core/geo_s2index.js
@@ -159,6 +159,12 @@ assert.commandFailedWithCode(res, ErrorCodes.TypeMismatch);
 res = t.createIndex({loc: '2dsphere'}, {coarsestIndexedLevel: 'NOT_A_NUMBER'});
 assert.commandFailedWithCode(res, ErrorCodes.TypeMismatch);
 
+res = t.createIndex({loc: '2dsphere'}, {finestIndexedLevel: true});
+assert.commandFailedWithCode(res, ErrorCodes.TypeMismatch);
+
+res = t.createIndex({loc: '2dsphere'}, {coarsestIndexedLevel: true});
+assert.commandFailedWithCode(res, ErrorCodes.TypeMismatch);
+
 // Ensure polygon which previously triggered an assertion error in SERVER-19674
 // is able to be indexed.
 t.drop();
diff --git a/jstests/core/illegal_cmd_namespace.js b/jstests/core/illegal_cmd_namespace.js
new file mode 100644
index 00000000000..3dc26c4e67a
--- /dev/null
+++ b/jstests/core/illegal_cmd_namespace.js
@@ -0,0 +1,36 @@
+/**
+ * Test that an attempt to run a query over a $cmd namespace is not treated specially by the shell,
+ * but is rejected by the server.
+ *
+ * @tags: [
+ *   assumes_unsharded_collection,
+ * ]
+ */
+(function() {
+"use strict";
+
+function testBadNamespace(collName) {
+    const coll = db[collName];
+    assert.commandFailedWithCode(db.runCommand({find: collName}), ErrorCodes.InvalidNamespace);
+    assert.throwsWithCode(() => coll.find().itcount(), ErrorCodes.InvalidNamespace);
+    assert.throwsWithCode(() => coll.findOne(), ErrorCodes.InvalidNamespace);
+}
+
+testBadNamespace("$cmd");
+testBadNamespace("$cmd.foo");
+
+// These namespaces were formerly accepted by old versions of the server as so-called
+// "pseudo-commands".
+testBadNamespace("$cmd.sys.inprog");
+testBadNamespace("$cmd.sys.killop");
+testBadNamespace("$cmd.sys.unlock");
+
+// These namespaces are used internally, but queries over them should be rejected.
+testBadNamespace("$cmd.listCollections");
+testBadNamespace("$cmd.aggregate");
+
+// "$cmd" or "$" are not allowed in the collection name in general.
+testBadNamespace("a$cmdb");
+testBadNamespace("$");
+testBadNamespace("a$b");
+}());
diff --git a/jstests/core/index2.js b/jstests/core/index2.js
index 2c37c48c871..5cfbb2f1590 100644
--- a/jstests/core/index2.js
+++ b/jstests/core/index2.js
@@ -1,52 +1,51 @@
-/**test indexing where the key is an embedded object.
+/**
+ * Test indexing where the key is an embedded object.
  */
 
-t = db.embeddedIndexTest2;
+(function() {
+'use strict';
 
+let t = db.index2_without_index;
 t.drop();
-assert(t.findOne() == null);
 
-o = {
-    name: "foo",
-    z: {a: 17}
-};
-p = {
-    name: "foo",
-    z: {a: 17}
-};
-q = {
-    name: "barrr",
-    z: {a: 18}
-};
-r = {
-    name: "barrr",
-    z: {k: "zzz", L: [1, 2]}
-};
-
-t.save(o);
-
-assert(t.findOne().z.a == 17);
-
-t.save(p);
-t.save(q);
-
-assert(t.findOne({z: {a: 17}}).z.a == 17);
-assert(t.find({z: {a: 17}}).length() == 2);
-assert(t.find({z: {a: 18}}).length() == 1);
-
-t.save(r);
-
-assert(t.findOne({z: {a: 17}}).z.a == 17);
-assert(t.find({z: {a: 17}}).length() == 2);
-assert(t.find({z: {a: 18}}).length() == 1);
-
-t.createIndex({z: 1});
-
-assert(t.findOne({z: {a: 17}}).z.a == 17);
-assert(t.find({z: {a: 17}}).length() == 2);
-assert(t.find({z: {a: 18}}).length() == 1);
-
-assert(t.find().sort({z: 1}).length() == 4);
-assert(t.find().sort({z: -1}).length() == 4);
-
-assert(t.validate().valid);
+assert.eq(t.findOne(), null);
+
+const docs = [
+    {_id: 0, name: "foo", z: {a: 17}},
+    {_id: 1, name: "foo", z: {a: 17}},
+    {_id: 2, name: "barrr", z: {a: 18}},
+    {_id: 3, name: "barrr", z: {k: "zzz", L: [1, 2]}},
+];
+
+assert.commandWorked(t.insert(docs[0]));
+assert.eq(t.findOne().z.a, 17);
+
+// We will reuse these predicates to check the effect of
+// additional inserts and indexes on our query results.
+assert.commandWorked(t.insert(docs.slice(1, 3)));
+assert.eq(t.findOne({z: {a: 17}}).z.a, 17);
+assert.eq(t.countDocuments({z: {a: 17}}), 2);
+assert.eq(t.countDocuments({z: {a: 18}}), 1);
+
+// Inserting document with key that does not match any of our
+// predicates.
+assert.commandWorked(t.insert(docs.slice(3)));
+assert.eq(t.findOne({z: {a: 17}}).z.a, 17);
+assert.eq(t.countDocuments({z: {a: 17}}), 2);
+assert.eq(t.countDocuments({z: {a: 18}}), 1);
+
+// Adding an index should not change results.
+t = db.index2_with_index;
+t.drop();
+assert.commandWorked(t.createIndex({z: 1}));
+assert.commandWorked(t.insert(docs));
+assert.eq(t.findOne({z: {a: 17}}).z.a, 17);
+assert.eq(t.countDocuments({z: {a: 17}}), 2);
+assert.eq(t.countDocuments({z: {a: 18}}), 1);
+
+// Providing a sort preference should not change resutls.
+const sortedDocsAscending = t.find().sort({z: 1});
+assert.eq(sortedDocsAscending.length(), 4, tojson(sortedDocsAscending.toArray()));
+const sortedDocsDescending = t.find().sort({z: -1});
+assert.eq(sortedDocsDescending.length(), 4, tojson(sortedDocsDescending.toArray()));
+})();
diff --git a/jstests/core/index3.js b/jstests/core/index3.js
deleted file mode 100644
index cc5ad3b0796..00000000000
--- a/jstests/core/index3.js
+++ /dev/null
@@ -1,18 +0,0 @@
-// Cannot implicitly shard accessed collections because of extra shard key index in sharded
-// collection.
-// @tags: [assumes_no_implicit_index_creation]
-
-t = db.index3;
-t.drop();
-
-assert(t.getIndexes().length == 0);
-
-t.createIndex({name: 1});
-
-t.save({name: "a"});
-
-t.createIndex({name: 1});
-
-assert(t.getIndexes().length == 2);
-
-assert(t.validate().valid);
diff --git a/jstests/core/index_stats.js b/jstests/core/index_stats.js
index 7e88fec9c83..cae76c1ec28 100644
--- a/jstests/core/index_stats.js
+++ b/jstests/core/index_stats.js
@@ -235,7 +235,7 @@ assert.eq(2,
                  ])
                   .itcount());
 assert.eq(1, getUsageCount("_id_", col), "Expected aggregation to use _id index");
-if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
+if (!checkSBEEnabled(db)) {
     assert.eq(2,
               getUsageCount("_id_", foreignCollection),
               "Expected each lookup to be tracked as an index use");
@@ -273,7 +273,7 @@ const pipeline = [
 ];
 assert.eq(2, col.aggregate(pipeline).itcount());
 assert.eq(1, getUsageCount("_id_", col), "Expected aggregation to use _id index");
-if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
+if (!checkSBEEnabled(db)) {
     assert.eq(2,
               getUsageCount("_id_", foreignCollection),
               "Expected each lookup to be tracked as an index use");
diff --git a/jstests/core/insert1.js b/jstests/core/insert1.js
index 29996e7527b..6fa207e0bfc 100644
--- a/jstests/core/insert1.js
+++ b/jstests/core/insert1.js
@@ -1,9 +1,3 @@
-/**
- * @tags: [
- *     operations_longer_than_stepdown_interval_in_txns,
- * ]
- */
-
 (function() {
 'use strict';
 
diff --git a/jstests/core/projection_semantics.js b/jstests/core/projection_semantics.js
index a9b1c32bf01..42605b3df49 100644
--- a/jstests/core/projection_semantics.js
+++ b/jstests/core/projection_semantics.js
@@ -201,6 +201,42 @@ function testInputOutput({input, projection, expectedOutput, interestingIndexes
             {b: 1},
         ]
     });
+
+    // Test the case where two paths in a projection go through the same parent object.
+    const testIncludeOnlyADotBAndADotC = (input, output) => testInputOutput({
+        input: input,
+        projection: {'a.b': 1, 'a.c': 1, _id: 0},
+        expectedOutput: output,
+        interestingIndexes:
+            [{a: 1}, {'a.b': 1}, {'a.c': 1}, {'a.b': 1, 'a.c': 1}, {'a.b': 1, 'a.c': -1}]
+    });
+    testIncludeOnlyADotBAndADotC({_id: 0, a: {b: "scalar", c: "scalar", d: "extra"}},
+                                 {a: {b: "scalar", c: "scalar"}});
+    testIncludeOnlyADotBAndADotC({_id: 1, a: [{b: 1, c: 2, d: 3}, {b: 4, c: 5, d: 6}]},
+                                 {a: [{b: 1, c: 2}, {b: 4, c: 5}]});
+
+    // Array cases where one or both of the paths don't exist.
+    testIncludeOnlyADotBAndADotC({_id: 5, a: [{b: 1, c: 2}, {b: 3, d: 4}]},
+                                 {a: [{b: 1, c: 2}, {b: 3}]});
+    testIncludeOnlyADotBAndADotC({_id: 6, a: [{c: 1, d: 2}, {b: 3, d: 4}]}, {a: [{c: 1}, {b: 3}]});
+    testIncludeOnlyADotBAndADotC({_id: 7, a: []}, {a: []});
+    testIncludeOnlyADotBAndADotC({_id: 8, a: [{b: 1, c: 2}, "extra", {b: 3, c: 4}]},
+                                 {a: [{b: 1, c: 2}, {b: 3, c: 4}]});
+
+    // Non-array cases where one or both of the paths don't exist.
+    //
+    // TODO SERVER-23229: This will return different results if there is a covering index, so here
+    // but not elsewhere we don't use any "interestingIndexes" in test cases.
+    const testIncludeADotBAndCNoIndexes = (input, output) => testInputOutput({
+        input: input,
+        projection: {'a.b': 1, 'a.c': 1, _id: 0},
+        expectedOutput: output,
+        interestingIndexes: []
+    });
+
+    testIncludeADotBAndCNoIndexes({_id: 2, a: {b: "scalar", d: "extra"}}, {a: {b: "scalar"}});
+    testIncludeADotBAndCNoIndexes({_id: 3, a: {c: "scalar", d: "extra"}}, {a: {c: "scalar"}});
+    testIncludeADotBAndCNoIndexes({_id: 4, a: {d: "extra"}}, {a: {}});
 }());
 
 (function testInclusionLevelsOfNesting() {
diff --git a/jstests/core/timeseries/bucket_unpacking_with_sort.js b/jstests/core/timeseries/bucket_unpacking_with_sort.js
index b5f145b6e41..029697be74a 100644
--- a/jstests/core/timeseries/bucket_unpacking_with_sort.js
+++ b/jstests/core/timeseries/bucket_unpacking_with_sort.js
@@ -25,6 +25,7 @@
 load("jstests/libs/fixture_helpers.js");             // For FixtureHelpers.
 load("jstests/aggregation/extras/utils.js");         // For getExplainedPipelineFromAggregation.
 load("jstests/core/timeseries/libs/timeseries.js");  // For TimeseriesTest
+load("jstests/libs/analyze_plan.js");                // For getAggPlanStage
 
 if (!TimeseriesTest.bucketUnpackWithSortEnabled(db.getMongo())) {
     jsTestLog("Skipping test because 'BucketUnpackWithSort' is disabled.");
@@ -133,8 +134,30 @@ const hasInternalBoundedSort = (pipeline) =>
 
 const findFirstMatch = (pipeline) => pipeline.find(stage => stage.hasOwnProperty("$match"));
 
+const getWinningPlan = (explain) => {
+    if (explain.hasOwnProperty("shards")) {
+        for (const shardName in explain.shards) {
+            return explain.shards[shardName].stages[0]["$cursor"].queryPlanner.winningPlan;
+        }
+    }
+    return explain.stages[0]["$cursor"].queryPlanner.winningPlan;
+};
+
+const getAccessPathFromWinningPlan = (winningPlan) => {
+    if (winningPlan.stage == "SHARDING_FILTER" || winningPlan.stage === "FETCH") {
+        return getAccessPathFromWinningPlan(winningPlan.inputStage);
+    } else if (winningPlan.stage === "COLLSCAN" || winningPlan.stage === "IXSCAN") {
+        return winningPlan;
+    }
+};
+
+const getAccessPath = (explain) => {
+    return getAccessPathFromWinningPlan(getWinningPlan(explain));
+};
+
 const setup = (coll, createIndex = null) => {
     if (createIndex) {
+        assert.commandWorked(coll.dropIndexes());
         assert.commandWorked(coll.createIndex(createIndex));
     }
 };
@@ -153,6 +176,7 @@ const setup = (coll, createIndex = null) => {
 const runRewritesTest = (sortSpec,
                          createIndex,
                          hint,
+                         expectedAccessPath,
                          testColl,
                          precise,
                          intermediaryStages = [],
@@ -161,6 +185,7 @@ const runRewritesTest = (sortSpec,
         sortSpec,
         createIndex,
         hint,
+        expectedAccessPath,
         testColl,
         precise,
         intermediaryStages,
@@ -211,7 +236,12 @@ const runRewritesTest = (sortSpec,
     // changing out from under us.
     const bucketSpanMatch = {
         $match: {
-            $expr: {$lte: [{$subtract: ["$control.max.t", "$control.min.t"]}, {$const: 3600000}]},
+            $expr: {
+                $lte: [
+                    {$subtract: ["$control.max.t", "$control.min.t"]},
+                    {$const: NumberLong(3600000)}
+                ]
+            },
         }
     };
     let foundMatch = findFirstMatch(optExplain);
@@ -224,6 +254,15 @@ const runRewritesTest = (sortSpec,
                    sortDoc(bucketSpanMatch),
                    'Did not expect an extra $match to check the bucket span');
     }
+
+    if (expectedAccessPath) {
+        const paths = getAggPlanStages(optExplainFull, expectedAccessPath.stage);
+        for (const path of paths) {
+            for (const key in expectedAccessPath) {
+                assert.eq(path[key], expectedAccessPath[key]);
+            }
+        }
+    }
 };
 
 const runDoesntRewriteTest = (sortSpec, createIndex, hint, testColl, intermediaryStages = []) => {
@@ -247,46 +286,75 @@ const runDoesntRewriteTest = (sortSpec, createIndex, hint, testColl, intermediar
     assert(!containsOptimization, optExplainFull);
 };
 
+const forwardCollscan = {
+    stage: "COLLSCAN",
+    direction: "forward"
+};
+const backwardCollscan = {
+    stage: "COLLSCAN",
+    direction: "backward"
+};
+// We drop all other indexes during runRewritesTest, so asserting that an IXSCAN is used is enough.
+const forwardIxscan = {
+    stage: "IXSCAN",
+    direction: "forward"
+};
+const backwardIxscan = {
+    stage: "IXSCAN",
+    direction: "backward"
+};
+
 // Collscan cases
-runRewritesTest({t: 1}, null, null, coll, true);
-runRewritesTest({t: -1}, null, {$natural: -1}, coll, false);
+runRewritesTest({t: 1}, null, null, forwardCollscan, coll, true);
+runRewritesTest({t: -1}, null, null, backwardCollscan, coll, false);
 
 // Indexed cases
-runRewritesTest({t: 1}, {t: 1}, {t: 1}, coll, true);
-runRewritesTest({t: -1}, {t: -1}, {t: -1}, coll, true);
-runRewritesTest({t: 1}, {t: 1}, {t: 1}, coll, true);
-runRewritesTest({m: 1, t: -1}, {m: 1, t: -1}, {m: 1, t: -1}, metaColl, true);
-runRewritesTest({m: -1, t: 1}, {m: -1, t: 1}, {m: -1, t: 1}, metaColl, true);
-runRewritesTest({m: -1, t: -1}, {m: -1, t: -1}, {m: -1, t: -1}, metaColl, true);
-runRewritesTest({m: 1, t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, metaColl, true);
+runRewritesTest({t: 1}, {t: 1}, null, null, coll, true);
+runRewritesTest({t: -1}, {t: -1}, {t: -1}, forwardIxscan, coll, true);
+runRewritesTest({t: 1}, {t: 1}, {t: 1}, forwardIxscan, coll, true);
+runRewritesTest({t: 1}, {t: -1}, {t: -1}, backwardIxscan, coll, false);
+runRewritesTest({t: -1}, {t: 1}, {t: 1}, backwardIxscan, coll, false);
+runRewritesTest({m: 1, t: -1}, {m: 1, t: -1}, {m: 1, t: -1}, forwardIxscan, metaColl, true);
+runRewritesTest({m: -1, t: 1}, {m: -1, t: 1}, {m: -1, t: 1}, forwardIxscan, metaColl, true);
+runRewritesTest({m: -1, t: -1}, {m: -1, t: -1}, {m: -1, t: -1}, forwardIxscan, metaColl, true);
+runRewritesTest({m: 1, t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, forwardIxscan, metaColl, true);
 
 // Intermediary projects that don't modify sorted fields are allowed.
-runRewritesTest({m: 1, t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, metaColl, true, [{$project: {a: 0}}]);
 runRewritesTest(
-    {m: 1, t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, metaColl, true, [{$project: {m: 1, t: 1}}]);
-runRewritesTest({t: 1}, {t: 1}, {t: 1}, metaColl, true, [{$project: {m: 0, _id: 0}}]);
-runRewritesTest({'m.b': 1, t: 1}, {'m.b': 1, t: 1}, {'m.b': 1, t: 1}, metaCollSubFields, true, [
-    {$project: {'m.a': 0}}
+    {m: 1, t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, forwardIxscan, metaColl, true, [{$project: {a: 0}}]);
+runRewritesTest({m: 1, t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, forwardIxscan, metaColl, true, [
+    {$project: {m: 1, t: 1}}
 ]);
+runRewritesTest(
+    {t: 1}, {t: 1}, {t: 1}, forwardIxscan, metaColl, true, [{$project: {m: 0, _id: 0}}]);
+runRewritesTest(
+    {'m.b': 1, t: 1}, {'m.b': 1, t: 1}, {'m.b': 1, t: 1}, forwardIxscan, metaCollSubFields, true, [
+        {$project: {'m.a': 0}}
+    ]);
 
 // Test multiple meta fields
 let metaIndexObj = Object.assign({}, ...subFields.map(field => ({[`m.${field}`]: 1})));
 Object.assign(metaIndexObj, {t: 1});
-runRewritesTest(metaIndexObj, metaIndexObj, metaIndexObj, metaCollSubFields, true);
-runRewritesTest(
-    metaIndexObj, metaIndexObj, metaIndexObj, metaCollSubFields, true, [{$project: {m: 1, t: 1}}]);
+runRewritesTest(metaIndexObj, metaIndexObj, metaIndexObj, forwardIxscan, metaCollSubFields, true);
+runRewritesTest(metaIndexObj, metaIndexObj, metaIndexObj, forwardIxscan, metaCollSubFields, true, [
+    {$project: {m: 1, t: 1}}
+]);
 
 // Check sort-limit optimization.
-runRewritesTest({t: 1}, {t: 1}, {t: 1}, coll, true, [], [{$limit: 10}]);
+runRewritesTest({t: 1}, {t: 1}, {t: 1}, null, coll, true, [], [{$limit: 10}]);
 
 // Check set window fields is optimized as well.
 // Since {k: 1} cannot provide a bounded sort we know if there's a bounded sort it comes form
 // setWindowFields.
-runRewritesTest({k: 1}, {m: 1, t: 1}, {m: 1, t: 1}, metaColl, true, [], [
+runRewritesTest({k: 1}, {m: 1, t: 1}, {m: 1, t: 1}, null, metaColl, true, [], [
     {$setWindowFields: {partitionBy: "$m", sortBy: {t: 1}, output: {arr: {$max: "$t"}}}}
 ]);
+// Test that when a collection scan is hinted, we rewrite to bounded sort even if the hint of
+// the direction is opposite to the sort.
+runRewritesTest({t: -1}, null, {$natural: 1}, backwardCollscan, coll, false, [], []);
+runRewritesTest({t: 1}, null, {$natural: -1}, forwardCollscan, coll, true, [], []);
 
-// Negative tests
+// Negative tests and backwards cases
 for (let m = -1; m < 2; m++) {
     for (let t = -1; t < 2; t++) {
         for (let k = -1; k < 2; k++) {
@@ -321,12 +389,7 @@ for (let m = -1; m < 2; m++) {
                 // For the meta case, negate the time order.
                 // For the non-meta case, use a collscan with a negated order.
                 if (m == 0) {
-                    if (t == 0) {
-                        // Do not execute a test run.
-                    } else {
-                        sort = {t: t};
-                        hint = {$natural: -t};
-                    }
+                    // Do not execute a test run.
                 } else {
                     if (t == 0) {
                         // Do not execute a test run.
@@ -338,8 +401,9 @@ for (let m = -1; m < 2; m++) {
                     }
                 }
 
-                if (sort)
+                if (sort) {
                     runDoesntRewriteTest(sort, createIndex, hint, usesMeta ? metaColl : coll);
+                }
 
                 sort = null;
                 hint = null;
@@ -348,13 +412,7 @@ for (let m = -1; m < 2; m++) {
                 // For the meta case, negate the meta order.
                 // For the non-meta case, use an index instead of a collscan.
                 if (m == 0) {
-                    if (t == 0) {
-                        // Do not execute a test run.
-                    } else {
-                        sort = {t: t};
-                        createIndex = {t: -t};
-                        hint = createIndex;
-                    }
+                    // Do not execute a test run.
                 } else {
                     if (t == 0) {
                         // Do not execute a test run.
@@ -366,8 +424,9 @@ for (let m = -1; m < 2; m++) {
                     }
                 }
 
-                if (sort)
+                if (sort) {
                     runDoesntRewriteTest(sort, createIndex, hint, usesMeta ? metaColl : coll);
+                }
 
                 sort = null;
                 hint = null;
@@ -392,7 +451,8 @@ for (let m = -1; m < 2; m++) {
                 }
 
                 if (sort)
-                    runDoesntRewriteTest(sort, createIndex, hint, usesMeta ? metaColl : coll);
+                    runRewritesTest(
+                        sort, createIndex, hint, backwardIxscan, usesMeta ? metaColl : coll);
             }
         }
     }
@@ -442,11 +502,13 @@ for (const sort of [-1, +1]) {
     for (const m of [-1, +1]) {
         for (const t of [-1, +1]) {
             const index = {m, t};
-            // TODO SERVER-64994 will allow reverse scan.
-            if (t === sort)
-                runRewritesTest({t: sort}, index, index, metaColl, true, [{$match: {m: 7}}]);
-            else
-                runDoesntRewriteTest({t: sort}, index, index, metaColl, [{$match: {m: 7}}]);
+            const expectedAccessPath = t === sort ? forwardIxscan : backwardIxscan;
+            runRewritesTest({t: sort}, index, index, expectedAccessPath, metaColl, t === sort, [
+                {$match: {m: 7}}
+            ]);
+            runRewritesTest({t: sort}, index, null, expectedAccessPath, metaColl, t === sort, [
+                {$match: {m: 7}}
+            ]);
         }
     }
 }
@@ -458,13 +520,16 @@ for (const sort of [-1, +1]) {
             for (const t of [-1, +1]) {
                 for (const trailing of [{}, {x: 1, y: -1}]) {
                     const index = Object.merge({'m.a': a, 'm.b': b, t: t}, trailing);
-                    // TODO SERVER-64994 will allow reverse scan.
-                    if (t === sort)
-                        runRewritesTest({t: sort}, index, index, metaCollSubFields, true, [
-                            {$match: {'m.a': 5, 'm.b': 5}}
-                        ]);
-                    else
-                        runDoesntRewriteTest({t: sort}, index, index, metaCollSubFields, [
+                    const expectedAccessPath = t === sort ? forwardIxscan : backwardIxscan;
+                    runRewritesTest({t: sort},
+                                    index,
+                                    index,
+                                    expectedAccessPath,
+                                    metaCollSubFields,
+                                    t === sort,
+                                    [{$match: {'m.a': 5, 'm.b': 5}}]);
+                    runRewritesTest(
+                        {t: sort}, index, null, expectedAccessPath, metaCollSubFields, t === sort, [
                             {$match: {'m.a': 5, 'm.b': 5}}
                         ]);
                 }
@@ -494,11 +559,15 @@ for (const ixA of [-1, +1]) {
                         // the index key. The index and sort are compatible iff they agree on
                         // whether or not these two fields are in the same direction.
                         if (ixB * ixT === sortB * sortT) {
-                            // TODO SERVER-64994 will allow reverse scan.
-                            if (ixT === sortT)
-                                runRewritesTest(sort, ix, ix, metaCollSubFields, true, predicate);
-                            else
-                                runDoesntRewriteTest(sort, ix, ix, metaCollSubFields, predicate);
+                            runRewritesTest(
+                                sort, ix, ix, null, metaCollSubFields, ixT === sortT, predicate);
+                            runRewritesTest(sort,
+                                            ix,
+                                            null,
+                                            ixT === sortT ? forwardIxscan : backwardIxscan,
+                                            metaCollSubFields,
+                                            ixT === sortT,
+                                            predicate);
                         } else {
                             runDoesntRewriteTest(sort, ix, ix, metaCollSubFields, predicate);
                         }
@@ -530,11 +599,15 @@ for (const ixA of [-1, +1]) {
                         // in the same direction.
                         const predicate = [{$match: {'m.a': {$gte: -999, $lte: 999}, 'm.b': 7}}];
                         if (ixA * ixT === sortA * sortT) {
-                            // TODO SERVER-64994 will allow reverse scan.
-                            if (ixT === sortT)
-                                runRewritesTest(sort, ix, ix, metaCollSubFields, true, predicate);
-                            else
-                                runDoesntRewriteTest(sort, ix, ix, metaCollSubFields, predicate);
+                            runRewritesTest(
+                                sort, ix, ix, null, metaCollSubFields, ixT === sortT, predicate);
+                            runRewritesTest(sort,
+                                            ix,
+                                            null,
+                                            ixT === sortT ? forwardIxscan : backwardIxscan,
+                                            metaCollSubFields,
+                                            ixT === sortT,
+                                            predicate);
                         } else {
                             runDoesntRewriteTest(sort, ix, ix, metaCollSubFields, predicate);
                         }
@@ -578,8 +651,12 @@ runDoesntRewriteTest({t: 1},
 {
     // When the collation of the query matches the index, an equality predicate in the query
     // becomes a 1-point interval in the index bounds.
-    runRewritesTest({t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, csStringColl, true, [{$match: {m: 'a'}}]);
-    runRewritesTest({t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, ciStringColl, true, [{$match: {m: 'a'}}]);
+    runRewritesTest({t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, forwardIxscan, csStringColl, true, [
+        {$match: {m: 'a'}}
+    ]);
+    runRewritesTest({t: 1}, {m: 1, t: 1}, {m: 1, t: 1}, forwardIxscan, ciStringColl, true, [
+        {$match: {m: 'a'}}
+    ]);
     // When the collation doesn't match, then the equality predicate is not a 1-point interval
     // in the index.
     csStringColl.dropIndexes();
diff --git a/jstests/core/timeseries/bucket_unpacking_with_sort_plan_cache.js b/jstests/core/timeseries/bucket_unpacking_with_sort_plan_cache.js
index 4e8783e7375..25e1c99312e 100644
--- a/jstests/core/timeseries/bucket_unpacking_with_sort_plan_cache.js
+++ b/jstests/core/timeseries/bucket_unpacking_with_sort_plan_cache.js
@@ -66,68 +66,83 @@ const bucketsName = "system.buckets." + collName;
 const stageName = "$_internalBoundedSort";
 const bucketsColl = db[bucketsName];
 
-const numDocs = 20;
-// Setup with a few documents.
-setupCollection(coll, collName, numDocs);
-
-// Create indexes so that we have something to multiplan.
-assert.commandWorked(coll.createIndex({"m.a": 1, "m.i": 1, t: 1}));
-assert.commandWorked(coll.createIndex({"m.b": 1, "m.i": 1, t: 1}));
-
-// Check that the rewrite is performed before caching.
-const pipeline = [{$sort: {"m.i": 1, t: 1}}, {$match: {"m.a": 1, "m.b": 1}}];
-let explain = coll.explain().aggregate(pipeline);
-assert.eq(getAggPlanStages(explain, stageName).length, 1, explain);
-
-// Check the cache is empty.
-assert.eq(db[bucketsName].getPlanCache().list().length, 0);
-
-// Run in order to cache the plan.
-let result = coll.aggregate(pipeline).toArray();
-assert.eq(result.length, 20, result);
-
-// Check the answer was cached.
-assert.eq(db[bucketsName].getPlanCache().list().length, 1);
-
-// Check that the solution still uses internal bounded sort.
-explain = coll.explain().aggregate(pipeline);
-assert(getAggPlanStages(explain, stageName).length === 1, explain);
-
-// Get constants needed for replanning.
-const cursorStageName = "$cursor";
-const planCacheKey =
-    getPlanCacheKeyFromExplain(getAggPlanStage(explain, cursorStageName)[cursorStageName], db);
-const planCacheEntry = (() => {
-    const planCache = bucketsColl.getPlanCache().list([{$match: {planCacheKey}}]);
-    assert.eq(planCache.length, 1, planCache);
-    return planCache[0];
-})();
-let ratio = (() => {
-    const getParamRes = assert.commandWorked(
-        db.adminCommand({getParameter: 1, internalQueryCacheEvictionRatio: 1}));
-    return getParamRes["internalQueryCacheEvictionRatio"];
-})();
-
-// Remove existing docs, add docs to trigger replanning.
-assert.commandWorked(coll.deleteMany({"m.a": 1, "m.b": 1}));
-let numNewDocs = ratio * planCacheEntry.works + 1;
-addDocs(coll, numNewDocs, [1, 0]);
-addDocs(coll, numNewDocs, [0, 1]);
-
-// Turn on profiling.
-db.setProfilingLevel(2);
-
-// Rerun command with replanning.
-const comment = jsTestName();
-result = coll.aggregate(pipeline, {comment}).toArray();
-assert.eq(result.length, 0);
-
-// Check that the plan was replanned.
-const replanProfileEntry = getLatestProfilerEntry(db, {'command.comment': comment});
-assert(replanProfileEntry.replanned, replanProfileEntry);
+const testBoundedSorterPlanCache = (sortDirection, indexDirection) => {
+    // Setup with a few documents.
+    const numDocs = 20;
+    setupCollection(coll, collName, numDocs);
+
+    assert.commandWorked(
+        coll.createIndex({"m.a": indexDirection, "m.i": indexDirection, t: indexDirection}));
+    assert.commandWorked(
+        coll.createIndex({"m.b": indexDirection, "m.i": indexDirection, t: indexDirection}));
+
+    // Check that the rewrite is performed before caching.
+    const pipeline =
+        [{$sort: {"m.i": sortDirection, t: sortDirection}}, {$match: {"m.a": 1, "m.b": 1}}];
+    let explain = coll.explain().aggregate(pipeline);
+    assert.eq(getAggPlanStages(explain, stageName).length, 1, explain);
+    const traversalDirection = sortDirection === indexDirection ? "forward" : "backward";
+    assert.eq(getAggPlanStage(explain, "IXSCAN").direction, traversalDirection, explain);
+
+    // Check the cache is empty.
+    assert.eq(db[bucketsName].getPlanCache().list().length, 0);
+
+    // Run in order to cache the plan.
+    let result = coll.aggregate(pipeline).toArray();
+    assert.eq(result.length, 20, result);
+
+    // Check the answer was cached.
+    assert.eq(db[bucketsName].getPlanCache().list().length, 1);
+
+    // Check that the solution still uses internal bounded sort with the correct order.
+    explain = coll.explain().aggregate(pipeline);
+    assert.eq(getAggPlanStages(explain, stageName).length, 1, explain);
+    assert.eq(getAggPlanStage(explain, "IXSCAN").direction, traversalDirection, explain);
+
+    // Get constants needed for replanning.
+    const cursorStageName = "$cursor";
+    const planCacheKey =
+        getPlanCacheKeyFromExplain(getAggPlanStage(explain, cursorStageName)[cursorStageName], db);
+    const planCacheEntry = (() => {
+        const planCache = bucketsColl.getPlanCache().list([{$match: {planCacheKey}}]);
+        assert.eq(planCache.length, 1, planCache);
+        return planCache[0];
+    })();
+    let ratio = (() => {
+        const getParamRes = assert.commandWorked(
+            db.adminCommand({getParameter: 1, internalQueryCacheEvictionRatio: 1}));
+        return getParamRes["internalQueryCacheEvictionRatio"];
+    })();
+
+    // Remove existing docs, add docs to trigger replanning.
+    assert.commandWorked(coll.deleteMany({"m.a": 1, "m.b": 1}));
+    let numNewDocs = ratio * planCacheEntry.works + 1;
+    addDocs(coll, numNewDocs, [1, 0]);
+    addDocs(coll, numNewDocs, [0, 1]);
+
+    // Turn on profiling.
+    db.setProfilingLevel(2);
+
+    // Rerun command with replanning.
+    const comment = jsTestName();
+    result = coll.aggregate(pipeline, {comment}).toArray();
+    assert.eq(result.length, 0);
+
+    // Check that the plan was replanned.
+    const replanProfileEntry = getLatestProfilerEntry(db, {'command.comment': comment});
+    assert(replanProfileEntry.replanned, replanProfileEntry);
+
+    // Check that rewrite happens with replanning.
+    explain = coll.explain().aggregate(pipeline);
+    assert.eq(getAggPlanStages(explain, stageName).length,
+              1,
+              {explain, stages: getAggPlanStages(explain, stageName)});
+    assert.eq(getAggPlanStage(explain, "IXSCAN").direction, traversalDirection, explain);
+};
 
-// Check that rewrite happens with replanning.
-explain = coll.explain().aggregate(pipeline);
-assert(getAggPlanStages(explain, stageName).length === 1,
-       {explain, stages: getAggPlanStages(explain, stageName)});
+for (const sortDirection of [-1, 1]) {
+    for (const indexDirection of [-1, 1]) {
+        testBoundedSorterPlanCache(sortDirection, indexDirection);
+    }
+}
 })();
diff --git a/jstests/core/timeseries/libs/timeseries.js b/jstests/core/timeseries/libs/timeseries.js
index ea8d71c87a1..34465117a53 100644
--- a/jstests/core/timeseries/libs/timeseries.js
+++ b/jstests/core/timeseries/libs/timeseries.js
@@ -12,6 +12,13 @@ var TimeseriesTest = class {
     }
 
     /**
+     * Returns whether time-series scalability improvements (like bucket reopening) are enabled.
+     */
+    static timeseriesScalabilityImprovementsEnabled(conn) {
+        return FeatureFlagUtil.isEnabled(conn, "TimeseriesScalabilityImprovements");
+    }
+
+    /**
      * Returns whether time-series updates and deletes are supported.
      */
     static timeseriesUpdatesAndDeletesEnabled(conn) {
diff --git a/jstests/core/timeseries/nondefault_collation.js b/jstests/core/timeseries/nondefault_collation.js
index 3ff1c63d330..6f88ca2243f 100644
--- a/jstests/core/timeseries/nondefault_collation.js
+++ b/jstests/core/timeseries/nondefault_collation.js
@@ -5,7 +5,6 @@
  * @tags: [
  *   requires_non_retryable_writes,
  *   requires_pipeline_optimization,
- *   requires_getmore,
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
  *   multiversion_incompatible,
diff --git a/jstests/core/timeseries/timeseries_bucket_index.js b/jstests/core/timeseries/timeseries_bucket_index.js
index 24bd8a9ce33..d1bef847bfc 100644
--- a/jstests/core/timeseries/timeseries_bucket_index.js
+++ b/jstests/core/timeseries/timeseries_bucket_index.js
@@ -4,8 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_53,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_bucket_limit_count.js b/jstests/core/timeseries/timeseries_bucket_limit_count.js
index 70a44099e38..8234e247e06 100644
--- a/jstests/core/timeseries/timeseries_bucket_limit_count.js
+++ b/jstests/core/timeseries/timeseries_bucket_limit_count.js
@@ -1,10 +1,13 @@
 /**
  * Tests maximum number of measurements held in each bucket in a time-series buckets collection.
  * @tags: [
+ *   # This test depends on certain writes ending up in the same bucket. Stepdowns may result in
+ *   # writes splitting between two primaries, and thus different buckets.
  *   does_not_support_stepdowns,
+ *   # Same goes for tenant migrations.
+ *   tenant_migration_incompatible,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_52,
+ *   requires_collstats,
  * ]
  */
 (function() {
@@ -15,6 +18,8 @@ load("jstests/core/timeseries/libs/timeseries.js");  // For 'TimeseriesTest'.
 TimeseriesTest.run((insert) => {
     const isTimeseriesBucketCompressionEnabled =
         TimeseriesTest.timeseriesBucketCompressionEnabled(db);
+    const areTimeseriesScalabilityImprovementsEnabled =
+        TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db);
 
     const collNamePrefix = 'timeseries_bucket_limit_count_';
 
@@ -75,6 +80,15 @@ TimeseriesTest.run((insert) => {
                   bucketDocs[0].control.version,
                   'unexpected control.version in first bucket: ' + tojson(bucketDocs));
 
+        if (areTimeseriesScalabilityImprovementsEnabled) {
+            assert.eq(true,
+                      bucketDocs[0].control.closed,
+                      'unexpected control.closed in first bucket: ' + tojson(bucketDocs));
+        } else {
+            assert(!bucketDocs[0].control.hasOwnProperty("closed"),
+                   'unexpected control.closed in first bucket: ' + tojson(bucketDocs));
+        }
+
         // Second bucket should contain the remaining documents.
         assert.eq(bucketMaxCount,
                   bucketDocs[1].control.min._id,
@@ -90,7 +104,16 @@ TimeseriesTest.run((insert) => {
                   'invalid control.max for x in second bucket: ' + tojson(bucketDocs));
         assert.eq(1,
                   bucketDocs[1].control.version,
-                  'unexpected control.version in first bucket: ' + tojson(bucketDocs));
+                  'unexpected control.version in second bucket: ' + tojson(bucketDocs));
+
+        if (areTimeseriesScalabilityImprovementsEnabled) {
+            assert.eq(false,
+                      bucketDocs[1].control.closed,
+                      'unexpected control.closed in second bucket: ' + tojson(bucketDocs));
+        } else {
+            assert(!bucketDocs[1].control.hasOwnProperty("closed"),
+                   'unexpected control.closed in second bucket: ' + tojson(bucketDocs));
+        }
     };
 
     runTest(1);
diff --git a/jstests/core/timeseries/timeseries_bucket_limit_time_range.js b/jstests/core/timeseries/timeseries_bucket_limit_time_range.js
index f15bf19c046..71949def50e 100644
--- a/jstests/core/timeseries/timeseries_bucket_limit_time_range.js
+++ b/jstests/core/timeseries/timeseries_bucket_limit_time_range.js
@@ -3,8 +3,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_52,
  * ]
  */
 (function() {
@@ -15,6 +13,8 @@ load("jstests/core/timeseries/libs/timeseries.js");  // For 'TimeseriesTest'.
 TimeseriesTest.run((insert) => {
     const isTimeseriesBucketCompressionEnabled =
         TimeseriesTest.timeseriesBucketCompressionEnabled(db);
+    const isTimeseriesScalabilityImprovmentsEnabled =
+        TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db);
 
     const collNamePrefix = 'timeseries_bucket_limit_time_range_';
 
@@ -86,9 +86,14 @@ TimeseriesTest.run((insert) => {
         assert.eq(docTimes[2],
                   bucketDocs[0].control.max[timeFieldName],
                   'invalid control.max for time in first bucket: ' + tojson(bucketDocs[0].control));
-        assert.eq(isTimeseriesBucketCompressionEnabled ? 2 : 1,
-                  bucketDocs[0].control.version,
-                  'unexpected control.version in first bucket: ' + tojson(bucketDocs));
+        if (!isTimeseriesScalabilityImprovmentsEnabled) {  // If enabled, we will archive instead of
+                                                           // closing, but another simultaneous
+                                                           // operation may close it in the
+                                                           // background.
+            assert.eq(isTimeseriesBucketCompressionEnabled ? 2 : 1,
+                      bucketDocs[0].control.version,
+                      'unexpected control.version in first bucket: ' + tojson(bucketDocs));
+        }
 
         // Second bucket should contain the remaining document.
         assert.eq(numDocs - 1,
diff --git a/jstests/core/timeseries/timeseries_bucket_manual_removal.js b/jstests/core/timeseries/timeseries_bucket_manual_removal.js
index f44ad874741..1e7915aaa63 100644
--- a/jstests/core/timeseries/timeseries_bucket_manual_removal.js
+++ b/jstests/core/timeseries/timeseries_bucket_manual_removal.js
@@ -5,7 +5,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_bucket_rename.js b/jstests/core/timeseries/timeseries_bucket_rename.js
index 98a0b73b810..6f98179cf22 100644
--- a/jstests/core/timeseries/timeseries_bucket_rename.js
+++ b/jstests/core/timeseries/timeseries_bucket_rename.js
@@ -4,7 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_collation.js b/jstests/core/timeseries/timeseries_collation.js
index c6bb3c1fd14..1b591b0b345 100644
--- a/jstests/core/timeseries/timeseries_collation.js
+++ b/jstests/core/timeseries/timeseries_collation.js
@@ -4,7 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_create_collection.js b/jstests/core/timeseries/timeseries_create_collection.js
index feb2f443d26..f23b7b49d19 100644
--- a/jstests/core/timeseries/timeseries_create_collection.js
+++ b/jstests/core/timeseries/timeseries_create_collection.js
@@ -4,8 +4,8 @@
  * in that case.
  *
  * @tags: [
+ *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_delete.js b/jstests/core/timeseries/timeseries_delete.js
index 14f50199e74..64c224cb976 100644
--- a/jstests/core/timeseries/timeseries_delete.js
+++ b/jstests/core/timeseries/timeseries_delete.js
@@ -3,8 +3,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_51,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_delete_concurrent.js b/jstests/core/timeseries/timeseries_delete_concurrent.js
index 14ddefe3dfb..582a7f0c903 100644
--- a/jstests/core/timeseries/timeseries_delete_concurrent.js
+++ b/jstests/core/timeseries/timeseries_delete_concurrent.js
@@ -5,8 +5,6 @@
  *   assumes_no_implicit_collection_creation_after_drop,
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  *   # $currentOp can't run with a readConcern other than 'local'.
  *   assumes_read_concern_unchanged,
  *   # This test only synchronizes deletes on the primary.
diff --git a/jstests/core/timeseries/timeseries_delete_hint.js b/jstests/core/timeseries/timeseries_delete_hint.js
index 0facca816e1..cf8edf3b70d 100644
--- a/jstests/core/timeseries/timeseries_delete_hint.js
+++ b/jstests/core/timeseries/timeseries_delete_hint.js
@@ -4,8 +4,6 @@
  *   assumes_no_implicit_collection_creation_after_drop,
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  *   # $currentOp can't run with a readConcern other than 'local'.
  *   assumes_read_concern_unchanged,
  *   # This test only synchronizes deletes on the primary.
diff --git a/jstests/core/timeseries/timeseries_hint.js b/jstests/core/timeseries/timeseries_hint.js
index 037de311829..1087af48a66 100644
--- a/jstests/core/timeseries/timeseries_hint.js
+++ b/jstests/core/timeseries/timeseries_hint.js
@@ -4,8 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  *   requires_pipeline_optimization,
  *   # Explain of a resolved view must be executed by mongos.
  *   directly_against_shardsvrs_incompatible,
diff --git a/jstests/core/timeseries/timeseries_id_index.js b/jstests/core/timeseries/timeseries_id_index.js
index e3bc20d6648..3cf2fcfa8fc 100644
--- a/jstests/core/timeseries/timeseries_id_index.js
+++ b/jstests/core/timeseries/timeseries_id_index.js
@@ -2,10 +2,8 @@
  * Verifies that the _id index can be created on a timeseries collection.
  *
  * @tags: [
- *   requires_fcv_52,
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_id_range.js b/jstests/core/timeseries/timeseries_id_range.js
index 46aa03f8230..38b07e921c7 100644
--- a/jstests/core/timeseries/timeseries_id_range.js
+++ b/jstests/core/timeseries/timeseries_id_range.js
@@ -6,11 +6,11 @@
  *   # The test assumes no index exists on the time field. shardCollection implicitly creates an
  *   # index.
  *   assumes_unsharded_collection,
+ *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_52,
  *   # Explain of a resolved view must be executed by mongos.
  *   directly_against_shardsvrs_incompatible,
+ *   tenant_migration_incompatible,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_index.js b/jstests/core/timeseries/timeseries_index.js
index bef86453999..2e6f964dd52 100644
--- a/jstests/core/timeseries/timeseries_index.js
+++ b/jstests/core/timeseries/timeseries_index.js
@@ -4,8 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_52,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_index_collation.js b/jstests/core/timeseries/timeseries_index_collation.js
index e8ef56501f3..47af30bee6d 100644
--- a/jstests/core/timeseries/timeseries_index_collation.js
+++ b/jstests/core/timeseries/timeseries_index_collation.js
@@ -4,7 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_index_skipped_record_tracker.js b/jstests/core/timeseries/timeseries_index_skipped_record_tracker.js
index 4142fc268c3..ab4f1f88e8e 100644
--- a/jstests/core/timeseries/timeseries_index_skipped_record_tracker.js
+++ b/jstests/core/timeseries/timeseries_index_skipped_record_tracker.js
@@ -5,7 +5,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_index_spec.js b/jstests/core/timeseries/timeseries_index_spec.js
index f13c64dd80b..13377cc9ab7 100644
--- a/jstests/core/timeseries/timeseries_index_spec.js
+++ b/jstests/core/timeseries/timeseries_index_spec.js
@@ -1,15 +1,13 @@
 /**
  * Tests that the original user index definition is stored on the transformed index definition on
- * the buckets collection for newly supported index types. Indexes created directly on the buckets
- * collection do not have an original user index definition and rely on the reverse mapping
- * mechanism.
+ * the buckets collection for newly supported index types introduced in v6.0. Indexes created
+ * directly on the buckets collection do not have an original user index definition and rely on the
+ * reverse mapping mechanism.
  *
  * @tags: [
  *     does_not_support_stepdowns,
  *     does_not_support_transactions,
- *     requires_fcv_51,
  *     requires_find_command,
- *     requires_getmore,
  * ]
  */
 (function() {
@@ -30,7 +28,7 @@ TimeseriesTest.run(() => {
     assert.commandWorked(db.createCollection(
         coll.getName(), {timeseries: {timeField: timeFieldName, metaField: metaFieldName}}));
 
-    const checkIndexSpec = function(spec, userIndex, isDowngradeCompatible) {
+    const checkIndexSpec = function(spec, userIndex, shouldHaveOriginalSpec) {
         assert(spec.hasOwnProperty("v"));
         assert(spec.hasOwnProperty("name"));
         assert(spec.hasOwnProperty("key"));
@@ -40,7 +38,7 @@ TimeseriesTest.run(() => {
             return;
         }
 
-        if (!isDowngradeCompatible) {
+        if (shouldHaveOriginalSpec) {
             assert(spec.hasOwnProperty("originalSpec"));
             assert.eq(spec.v, spec.originalSpec.v);
             assert.eq(spec.name, spec.originalSpec.name);
@@ -50,14 +48,14 @@ TimeseriesTest.run(() => {
         }
     };
 
-    const verifyAndDropIndex = function(isDowngradeCompatible, indexName) {
+    const verifyAndDropIndex = function(shouldHaveOriginalSpec, indexName) {
         let sawIndex = false;
 
         let userIndexes = coll.getIndexes();
         for (const index of userIndexes) {
             if (index.name === indexName) {
                 sawIndex = true;
-                checkIndexSpec(index, /*userIndex=*/true, isDowngradeCompatible);
+                checkIndexSpec(index, /*userIndex=*/true, shouldHaveOriginalSpec);
             }
         }
 
@@ -65,7 +63,7 @@ TimeseriesTest.run(() => {
         for (const index of bucketIndexes) {
             if (index.name === indexName) {
                 sawIndex = true;
-                checkIndexSpec(index, /*userIndex=*/false, isDowngradeCompatible);
+                checkIndexSpec(index, /*userIndex=*/false, shouldHaveOriginalSpec);
             }
         }
 
@@ -76,35 +74,35 @@ TimeseriesTest.run(() => {
     };
 
     assert.commandWorked(coll.createIndex({[timeFieldName]: 1}, {name: "timefield_downgradable"}));
-    verifyAndDropIndex(/*isDowngradeCompatible=*/true, "timefield_downgradable");
+    verifyAndDropIndex(/*shouldHaveOriginalSpec=*/false, "timefield_downgradable");
 
     assert.commandWorked(coll.createIndex({[metaFieldName]: 1}, {name: "metafield_downgradable"}));
-    verifyAndDropIndex(/*isDowngradeCompatible=*/true, "metafield_downgradable");
+    verifyAndDropIndex(/*shouldHaveOriginalSpec=*/false, "metafield_downgradable");
 
     assert.commandWorked(coll.createIndex({[timeFieldName]: 1, [metaFieldName]: 1},
                                           {name: "time_meta_field_downgradable"}));
-    verifyAndDropIndex(/*isDowngradeCompatible=*/true, "time_meta_field_downgradable");
+    verifyAndDropIndex(/*shouldHaveOriginalSpec=*/false, "time_meta_field_downgradable");
 
     if (TimeseriesTest.timeseriesMetricIndexesEnabled(db.getMongo())) {
         assert.commandWorked(coll.createIndex({x: 1}, {name: "x_1"}));
-        verifyAndDropIndex(/*isDowngradeCompatible=*/false, "x_1");
+        verifyAndDropIndex(/*shouldHaveOriginalSpec=*/true, "x_1");
 
         assert.commandWorked(
             coll.createIndex({x: 1}, {name: "x_partial", partialFilterExpression: {x: {$gt: 5}}}));
-        verifyAndDropIndex(/*isDowngradeCompatible=*/false, "x_partial");
+        verifyAndDropIndex(/*shouldHaveOriginalSpec=*/true, "x_partial");
 
         assert.commandWorked(coll.createIndex(
             {[timeFieldName]: 1}, {name: "time_partial", partialFilterExpression: {x: {$gt: 5}}}));
-        verifyAndDropIndex(/*isDowngradeCompatible=*/false, "time_partial");
+        verifyAndDropIndex(/*shouldHaveOriginalSpec=*/true, "time_partial");
 
         assert.commandWorked(coll.createIndex(
             {[metaFieldName]: 1}, {name: "meta_partial", partialFilterExpression: {x: {$gt: 5}}}));
-        verifyAndDropIndex(/*isDowngradeCompatible=*/false, "meta_partial");
+        verifyAndDropIndex(/*shouldHaveOriginalSpec=*/true, "meta_partial");
 
         assert.commandWorked(
             coll.createIndex({[metaFieldName]: 1, x: 1},
                              {name: "meta_x_partial", partialFilterExpression: {x: {$gt: 5}}}));
-        verifyAndDropIndex(/*isDowngradeCompatible=*/false, "meta_x_partial");
+        verifyAndDropIndex(/*shouldHaveOriginalSpec=*/true, "meta_x_partial");
     }
 
     // Creating an index directly on the buckets collection is permitted. However, these types of
diff --git a/jstests/core/timeseries/timeseries_index_stats.js b/jstests/core/timeseries/timeseries_index_stats.js
index d8c14c905d4..802bb154d51 100644
--- a/jstests/core/timeseries/timeseries_index_stats.js
+++ b/jstests/core/timeseries/timeseries_index_stats.js
@@ -8,9 +8,7 @@
  *   assumes_read_preference_unchanged,
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  *   requires_non_retryable_writes,
- *   requires_fcv_52,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_index_ttl_partial.js b/jstests/core/timeseries/timeseries_index_ttl_partial.js
new file mode 100644
index 00000000000..b8364919b33
--- /dev/null
+++ b/jstests/core/timeseries/timeseries_index_ttl_partial.js
@@ -0,0 +1,144 @@
+/**
+ * Tests the creation of partial, TTL indexes on a time-series collection.
+ *
+ * @tags: [
+ *   does_not_support_stepdowns,
+ *   does_not_support_transactions,
+ *   featureFlagTimeseriesScalabilityImprovements,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");
+
+const collName = "timeseries_index_ttl_partial";
+const indexName = "partialTTLIndex";
+const coll = db.getCollection(collName);
+const bucketsColl = db.getCollection("system.buckets." + collName);
+
+const timeFieldName = "tm";
+const metaFieldName = "mm";
+const timeSpec = {
+    [timeFieldName]: 1
+};
+const metaSpec = {
+    [metaFieldName]: 1
+};
+
+const expireAfterSeconds = NumberLong(400);
+
+const resetTsColl = function(extraOptions = {}) {
+    coll.drop();
+
+    let options = {timeseries: {timeField: timeFieldName, metaField: metaFieldName}};
+    assert.commandWorked(db.createCollection(coll.getName(), Object.merge(options, extraOptions)));
+};
+
+(function invalidTTLIndexes() {
+    resetTsColl();
+
+    let options = {name: indexName, expireAfterSeconds: 3600};
+    // TTL indexes on the time field are only allowed in conjunction with partialFilterExpressions
+    // on the metafield.
+    assert.commandFailedWithCode(coll.createIndex(timeSpec, options), ErrorCodes.InvalidOptions);
+
+    // TTL indexes on the metafield are not allowed.
+    assert.commandFailedWithCode(coll.createIndex(metaSpec, options), ErrorCodes.InvalidOptions);
+}());
+
+(function partialTTLIndexesShouldSucceed() {
+    resetTsColl();
+    const options = {
+        name: indexName,
+        partialFilterExpression: {[metaFieldName]: {$gt: 5}},
+        expireAfterSeconds: expireAfterSeconds
+    };
+
+    // Creating a TTL index on time, with a partial filter expression on the metaField should
+    // succeed.
+    assert.commandWorked(coll.createIndex(
+        timeSpec, Object.merge(options, {expireAfterSeconds: expireAfterSeconds})));
+    let indexes = coll.getIndexes().filter(ix => ix.name === indexName);
+    assert.eq(1, indexes.length, tojson(indexes));
+
+    let partialTTLIndex = indexes[0];
+    assert.eq(indexName, partialTTLIndex.name, tojson(partialTTLIndex));
+    assert.eq(timeSpec, partialTTLIndex.key, tojson(partialTTLIndex));
+    assert.eq(expireAfterSeconds, partialTTLIndex.expireAfterSeconds, tojson(partialTTLIndex));
+
+    resetTsColl({expireAfterSeconds: 3600});
+
+    // Creating an index on time (on a time-series collection created with the expireAfterSeconds
+    // parameter) with a partial filter expression on the metaField should succeed.
+    assert.commandWorked(coll.createIndex(timeSpec, options));
+    indexes = coll.getIndexes().filter(ix => ix.name === indexName);
+    assert.eq(1, indexes.length, tojson(indexes));
+
+    partialTTLIndex = indexes[0];
+    assert.eq(indexName, partialTTLIndex.name, tojson(partialTTLIndex));
+    assert.eq(timeSpec, partialTTLIndex.key, tojson(partialTTLIndex));
+    assert.eq(expireAfterSeconds, partialTTLIndex.expireAfterSeconds, tojson(partialTTLIndex));
+}());
+
+(function invalidPartialTTLIndexesShouldFail() {
+    resetTsColl();
+
+    const currentData = ISODate();
+    const filterOnData = {
+        name: indexName,
+        partialFilterExpression: {"data": {$gt: 5}},
+        expireAfterSeconds: expireAfterSeconds
+    };
+    const filterOnMeta = {
+        name: indexName,
+        partialFilterExpression: {[metaFieldName]: {$gt: 5}},
+        expireAfterSeconds: expireAfterSeconds
+    };
+    const filterOnMetaAndData = {
+        name: indexName,
+        partialFilterExpression: {[metaFieldName]: {$gt: 5}, "data": {$gt: 5}},
+        expireAfterSeconds: expireAfterSeconds
+    };
+    const filterOnTime = {
+        name: indexName,
+        partialFilterExpression: {[timeFieldName]: {$gt: currentData}},
+        expireAfterSeconds: expireAfterSeconds
+    };
+    const dataSpec = {"data": 1};
+
+    // These cases have a valid index specs on the time field but invalid partialFilterExpressions.
+    {
+        // A TTL index on time requires partial indexes to be on the metadata field.
+        assert.commandFailedWithCode(coll.createIndex(timeSpec, filterOnData),
+                                     ErrorCodes.InvalidOptions);
+
+        // A TTL index on time requires partial indexes on the metadata field only, no compound
+        // expressions.
+        assert.commandFailedWithCode(coll.createIndex(timeSpec, filterOnMetaAndData),
+                                     ErrorCodes.InvalidOptions);
+
+        // Partial indexes are not allowed to be on the timeField.
+        assert.commandFailedWithCode(coll.createIndex(timeSpec, filterOnTime),
+                                     ErrorCodes.InvalidOptions);
+    }
+
+    const timeAndMetaSpec = Object.merge(timeSpec, metaSpec);
+    const timeAndDataSpec = Object.merge(timeSpec, dataSpec);
+    // These cases have valid partialFilterExpressions but invalid index specs.
+    {
+        // TTL indexes are only allowed on the time field.
+        assert.commandFailedWithCode(coll.createIndex(metaSpec, filterOnMeta),
+                                     ErrorCodes.InvalidOptions);
+        assert.commandFailedWithCode(coll.createIndex(dataSpec, filterOnMeta),
+                                     ErrorCodes.InvalidOptions);
+
+        // TTL indexes are not allowed on compound indexes (even if a time field exists in the
+        // spec).
+        assert.commandFailedWithCode(coll.createIndex(timeAndMetaSpec, filterOnMeta),
+                                     ErrorCodes.CannotCreateIndex);
+        assert.commandFailedWithCode(coll.createIndex(timeAndDataSpec, filterOnMeta),
+                                     ErrorCodes.CannotCreateIndex);
+    }
+}());
+})();
diff --git a/jstests/core/timeseries/timeseries_index_use.js b/jstests/core/timeseries/timeseries_index_use.js
index 8770047f4d2..098bf6fe813 100644
--- a/jstests/core/timeseries/timeseries_index_use.js
+++ b/jstests/core/timeseries/timeseries_index_use.js
@@ -4,8 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  *   requires_pipeline_optimization,
  *   # Explain of a resolved view must be executed by mongos.
  *   directly_against_shardsvrs_incompatible,
diff --git a/jstests/core/timeseries/timeseries_insert_after_delete.js b/jstests/core/timeseries/timeseries_insert_after_delete.js
index e1ede952d18..a125d2a3f89 100644
--- a/jstests/core/timeseries/timeseries_insert_after_delete.js
+++ b/jstests/core/timeseries/timeseries_insert_after_delete.js
@@ -3,8 +3,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_51,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_insert_after_update.js b/jstests/core/timeseries/timeseries_insert_after_update.js
index eec6005720e..993cdee6e9f 100644
--- a/jstests/core/timeseries/timeseries_insert_after_update.js
+++ b/jstests/core/timeseries/timeseries_insert_after_update.js
@@ -5,8 +5,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_51,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_internal_bounded_sort.js b/jstests/core/timeseries/timeseries_internal_bounded_sort.js
index c909fe8e397..45cb1af79da 100644
--- a/jstests/core/timeseries/timeseries_internal_bounded_sort.js
+++ b/jstests/core/timeseries/timeseries_internal_bounded_sort.js
@@ -105,21 +105,12 @@ function runTest(ascending) {
 
         // Check plan using control.max.t
         if (ascending) {
-            // TODO (SERVER-64994): We can remove this manual re-write once we support index
-            // direction hints
-            const opt = buckets
-                            .aggregate([
-                                {$sort: {'control.max.t': ascending ? 1 : -1}},
-                                unpackStage,
-                                {
-                                    $_internalBoundedSort: {
-                                        sortKey: {t: 1},
-                                        bound: {base: "max", offsetSeconds: -bucketMaxSpanSeconds}
-                                    }
-                                },
-                            ])
+            const opt = coll.aggregate(
+                                [
+                                    {$sort: {t: 1}},
+                                ],
+                                {hint: {t: -1}})
                             .toArray();
-
             assertSorted(opt, ascending);
             assert.eq(reference, opt);
         } else {
diff --git a/jstests/core/timeseries/timeseries_internal_bounded_sort_compound.js b/jstests/core/timeseries/timeseries_internal_bounded_sort_compound.js
index c579ecc6e3e..6f3a57f39e2 100644
--- a/jstests/core/timeseries/timeseries_internal_bounded_sort_compound.js
+++ b/jstests/core/timeseries/timeseries_internal_bounded_sort_compound.js
@@ -152,38 +152,17 @@ function runTest(sortSpec) {
                                   sortSpec,
                                   sortSpec);
         } else {
-            // TODO (SERVER-64994): We can remove this manual re-write once we support index
-            // direction hints
-            const optFromMinQuery = [
-                {$sort: {meta: sortSpec.m, 'control.min.t': sortSpec.t}},
-                unpackStage,
-                {
-                    $_internalBoundedSort: {
-                        sortKey: sortSpec,
-                        bound: {base: "min", offsetSeconds: bucketMaxSpanSeconds}
-                    }
-                },
-            ];
-            const optFromMin = buckets.aggregate(optFromMinQuery).toArray();
+            const optFromMinQuery = [{$sort: {m: sortSpec.m, t: sortSpec.t}}];
+            const optFromMin = coll.aggregate(optFromMinQuery).toArray();
             assertSorted(optFromMin, sortSpec);
             assert.eq(reference, optFromMin);
         }
 
         // Check plan using control.max.t
         if (sortSpec.t > 0) {
-            // TODO (SERVER-64994): We can remove this manual re-write once we support index
-            // direction hints
-            const optFromMaxQuery = [
-                {$sort: {meta: sortSpec.m, 'control.max.t': sortSpec.t}},
-                unpackStage,
-                {
-                    $_internalBoundedSort: {
-                        sortKey: sortSpec,
-                        bound: {base: "max", offsetSeconds: -bucketMaxSpanSeconds}
-                    }
-                },
-            ];
-            const optFromMax = buckets.aggregate(optFromMaxQuery).toArray();
+            const optFromMaxQuery = [{$sort: {m: sortSpec.m, t: sortSpec.t}}];
+            const optFromMax =
+                coll.aggregate(optFromMaxQuery, {hint: {m: -sortSpec.m, t: -sortSpec.t}}).toArray();
             assertSorted(optFromMax, sortSpec);
             assert.eq(reference, optFromMax);
         } else {
diff --git a/jstests/core/timeseries/timeseries_large_measurements.js b/jstests/core/timeseries/timeseries_large_measurements.js
new file mode 100644
index 00000000000..409af5a80d3
--- /dev/null
+++ b/jstests/core/timeseries/timeseries_large_measurements.js
@@ -0,0 +1,73 @@
+/**
+ * Tests that the space usage calculation for new fields in time-series inserts accounts for the
+ * control.min and control.max fields.
+ *
+ * @tags: [
+ *   does_not_support_stepdowns,
+ *   does_not_support_transactions,
+ *   tenant_migration_incompatible,
+ *   requires_collstats,
+ *   requires_fcv_61,
+ * ]
+ */
+(function() {
+"use strict";
+
+const coll = db.getCollection(jsTestName());
+const bucketColl = db.getCollection("system.buckets." + jsTestName());
+
+const timeFieldName = "time";
+const resetCollection = (() => {
+    coll.drop();
+    assert.commandWorked(
+        db.createCollection(jsTestName(), {timeseries: {timeField: timeFieldName}}));
+});
+
+const timeseriesBucketMaxSize = (() => {
+    const res =
+        assert.commandWorked(db.adminCommand({getParameter: 1, timeseriesBucketMaxSize: 1}));
+    return res.timeseriesBucketMaxSize;
+})();
+
+const checkAverageBucketSize = (() => {
+    const timeseriesStats = assert.commandWorked(coll.stats()).timeseries;
+    const averageBucketSize = timeseriesStats.numBytesUncompressed / timeseriesStats.bucketCount;
+
+    jsTestLog("Average bucket size: " + averageBucketSize);
+    assert.lte(averageBucketSize, timeseriesBucketMaxSize);
+
+    const firstBucket = bucketColl.find().sort({'control.min._id': 1}).toArray()[0];
+    assert.eq(0, firstBucket.control.min._id);
+    assert.eq(9, firstBucket.control.max._id);
+});
+
+// Each measurement inserted will consume roughly 1/12th of the bucket max size. In theory, we'll
+// only be able to fit ten measurements per bucket. The first measurement will also create the
+// control.min and control.max summaries, which will account for two measurements worth of data.
+// The other measurements will not modify the control.min and control.max fields to the same degree
+// as they're going to insert the same-length values. The remaining ~4% of the bucket size is left
+// for other internal fields that need to be written out.
+const measurementValueLength = Math.floor(timeseriesBucketMaxSize * 0.08);
+
+const numMeasurements = 100;
+
+jsTestLog("Testing single inserts");
+resetCollection();
+
+for (let i = 0; i < numMeasurements; i++) {
+    const doc = {_id: i, [timeFieldName]: ISODate(), value: "a".repeat(measurementValueLength)};
+    assert.commandWorked(coll.insert(doc));
+}
+checkAverageBucketSize();
+
+jsTestLog("Testing batched inserts");
+resetCollection();
+
+let batch = [];
+for (let i = 0; i < numMeasurements; i++) {
+    const doc = {_id: i, [timeFieldName]: ISODate(), value: "a".repeat(measurementValueLength)};
+    batch.push(doc);
+}
+assert.commandWorked(coll.insertMany(batch));
+checkAverageBucketSize();
+}());
diff --git a/jstests/core/timeseries/timeseries_lastpoint.js b/jstests/core/timeseries/timeseries_lastpoint.js
index d3bdc66d3bd..c7a1b2170a1 100644
--- a/jstests/core/timeseries/timeseries_lastpoint.js
+++ b/jstests/core/timeseries/timeseries_lastpoint.js
@@ -2,13 +2,14 @@
  * Tests the optimization of "lastpoint"-type queries on time-series collections.
  *
  * @tags: [
+ *   # This test depends on certain writes ending up in the same bucket. Stepdowns may result in
+ *   # writes splitting between two primaries, and thus different buckets.
  *   does_not_support_stepdowns,
+ *   # Same goes for tenant migrations.
+ *   tenant_migration_incompatible,
  *   does_not_support_transactions,
  *   requires_timeseries,
  *   requires_pipeline_optimization,
- *   requires_fcv_53,
- *   # TODO (SERVER-63590): Investigate presence of getmore tag in timeseries jstests.
- *   requires_getmore,
  *   # Explain of a resolved view must be executed by mongos.
  *   directly_against_shardsvrs_incompatible,
  * ]
diff --git a/jstests/core/timeseries/timeseries_lastpoint_top.js b/jstests/core/timeseries/timeseries_lastpoint_top.js
index 2efd352b574..47671d97399 100644
--- a/jstests/core/timeseries/timeseries_lastpoint_top.js
+++ b/jstests/core/timeseries/timeseries_lastpoint_top.js
@@ -2,13 +2,14 @@
  * Tests the optimization of "lastpoint"-type queries on time-series collections.
  *
  * @tags: [
+ *   # This test depends on certain writes ending up in the same bucket. Stepdowns may result in
+ *   # writes splitting between two primaries, and thus different buckets.
  *   does_not_support_stepdowns,
+ *   # Same goes for tenant migrations.
+ *   tenant_migration_incompatible,
  *   does_not_support_transactions,
  *   requires_timeseries,
  *   requires_pipeline_optimization,
- *   requires_fcv_53,
- *   # TODO (SERVER-63590): Investigate presence of getmore tag in timeseries jstests.
- *   requires_getmore,
  *   # Explain of a resolved view must be executed by mongos.
  *   directly_against_shardsvrs_incompatible,
  * ]
diff --git a/jstests/core/timeseries/timeseries_list_collections.js b/jstests/core/timeseries/timeseries_list_collections.js
index 2b2ce211761..f2f6b9a9d12 100644
--- a/jstests/core/timeseries/timeseries_list_collections.js
+++ b/jstests/core/timeseries/timeseries_list_collections.js
@@ -3,7 +3,6 @@
  *
  * @tags: [
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_list_collections_filter_name.js b/jstests/core/timeseries/timeseries_list_collections_filter_name.js
index 673fb40a99e..af53436ab66 100644
--- a/jstests/core/timeseries/timeseries_list_collections_filter_name.js
+++ b/jstests/core/timeseries/timeseries_list_collections_filter_name.js
@@ -4,7 +4,6 @@
  *
  * @tags: [
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_metadata.js b/jstests/core/timeseries/timeseries_metadata.js
index c445c920723..b946bb7f4c2 100644
--- a/jstests/core/timeseries/timeseries_metadata.js
+++ b/jstests/core/timeseries/timeseries_metadata.js
@@ -4,8 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_metric_index_2dsphere.js b/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
index e47119b5bbd..d0a8b387598 100644
--- a/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
+++ b/jstests/core/timeseries/timeseries_metric_index_2dsphere.js
@@ -6,8 +6,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  *   # Explain of a resolved view must be executed by mongos.
  *   directly_against_shardsvrs_incompatible,
  * ]
diff --git a/jstests/core/timeseries/timeseries_metric_index_ascending_descending.js b/jstests/core/timeseries/timeseries_metric_index_ascending_descending.js
index bf1dc10e625..690b3f34327 100644
--- a/jstests/core/timeseries/timeseries_metric_index_ascending_descending.js
+++ b/jstests/core/timeseries/timeseries_metric_index_ascending_descending.js
@@ -4,10 +4,7 @@
  * @tags: [
  *     does_not_support_stepdowns,
  *     does_not_support_transactions,
- *     requires_fcv_50,
- *     requires_fcv_51,
  *     requires_find_command,
- *     requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_metric_index_compound.js b/jstests/core/timeseries/timeseries_metric_index_compound.js
index 2ecc4732b70..76051e62456 100644
--- a/jstests/core/timeseries/timeseries_metric_index_compound.js
+++ b/jstests/core/timeseries/timeseries_metric_index_compound.js
@@ -4,9 +4,7 @@
  * @tags: [
  *     does_not_support_stepdowns,
  *     does_not_support_transactions,
- *     requires_fcv_51,
  *     requires_find_command,
- *     requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_metric_index_hashed.js b/jstests/core/timeseries/timeseries_metric_index_hashed.js
index 2f88ca8b249..6fb7c70ac14 100644
--- a/jstests/core/timeseries/timeseries_metric_index_hashed.js
+++ b/jstests/core/timeseries/timeseries_metric_index_hashed.js
@@ -4,9 +4,7 @@
  * @tags: [
  *     does_not_support_stepdowns,
  *     does_not_support_transactions,
- *     requires_fcv_51,
  *     requires_find_command,
- *     requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_metric_index_wildcard.js b/jstests/core/timeseries/timeseries_metric_index_wildcard.js
index 4764ba66508..c704bdd25d3 100644
--- a/jstests/core/timeseries/timeseries_metric_index_wildcard.js
+++ b/jstests/core/timeseries/timeseries_metric_index_wildcard.js
@@ -4,9 +4,7 @@
  * @tags: [
  *     does_not_support_stepdowns,
  *     does_not_support_transactions,
- *     requires_fcv_51,
  *     requires_find_command,
- *     requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_min_max.js b/jstests/core/timeseries/timeseries_min_max.js
index 6eb16f9c7a7..71887d4f00b 100644
--- a/jstests/core/timeseries/timeseries_min_max.js
+++ b/jstests/core/timeseries/timeseries_min_max.js
@@ -5,8 +5,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_52
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_out_of_order.js b/jstests/core/timeseries/timeseries_out_of_order.js
index db1b6798f81..e12708d61ec 100644
--- a/jstests/core/timeseries/timeseries_out_of_order.js
+++ b/jstests/core/timeseries/timeseries_out_of_order.js
@@ -5,7 +5,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_predicates.js b/jstests/core/timeseries/timeseries_predicates.js
index 54e63c0b7d5..a273cc8b128 100644
--- a/jstests/core/timeseries/timeseries_predicates.js
+++ b/jstests/core/timeseries/timeseries_predicates.js
@@ -39,7 +39,7 @@ function checkPredicateResult(predicate, documents) {
 function checkAllBucketings(predicate, documents) {
     for (const doc of documents) {
         doc._id = ObjectId();
-        doc.time = ISODate();
+        doc.time = doc.time || ISODate();
     }
 
     // For N documents, there are 2^N ways to assign them to buckets A and B.
@@ -232,4 +232,127 @@ checkAllBucketings({
                        {meta: {a: +1, b: -1}, x: 'asdf', time: ISODate('2020-02-01')},
                        {meta: {a: +1, b: -1}, x: 'asdf', time: ISODate('2019-12-31')},
                    ]);
+
+// Test $exists on meta, inside $or.
+checkAllBucketings({
+    $or: [
+        {"meta.a": {$exists: true}},
+        {"x": {$gt: 2}},
+    ]
+},
+                   [
+                       {meta: {a: 1}, x: 1},
+                       {meta: {a: 2}, x: 2},
+                       {meta: {a: 3}, x: 3},
+                       {meta: {a: 4}, x: 4},
+                       {meta: {}, x: 1},
+                       {meta: {}, x: 2},
+                       {meta: {}, x: 3},
+                       {meta: {}, x: 4},
+                   ]);
+
+// Test $in on meta, inside $or.
+checkAllBucketings({
+    $or: [
+        {"meta.a": {$in: [1, 3]}},
+        {"x": {$gt: 2}},
+    ]
+},
+                   [
+                       {meta: {a: 1}, x: 1},
+                       {meta: {a: 2}, x: 2},
+                       {meta: {a: 3}, x: 3},
+                       {meta: {a: 4}, x: 4},
+                       {meta: {}, x: 1},
+                       {meta: {}, x: 2},
+                       {meta: {}, x: 3},
+                       {meta: {}, x: 4},
+                   ]);
+
+// Test geo predicates on meta, inside $or.
+for (const pred of ['$geoWithin', '$geoIntersects']) {
+    checkAllBucketings({
+        $or: [
+            {
+                "meta.location": {
+                    [pred]: {
+                        $geometry: {
+                            type: "Polygon",
+                            coordinates: [[
+                                [0, 0],
+                                [0, 3],
+                                [3, 3],
+                                [3, 0],
+                                [0, 0],
+                            ]]
+                        }
+                    }
+                }
+            },
+            {x: {$gt: 2}},
+        ]
+    },
+                       [
+                           {meta: {location: [1, 1]}, x: 1},
+                           {meta: {location: [1, 1]}, x: 2},
+                           {meta: {location: [1, 1]}, x: 3},
+                           {meta: {location: [1, 1]}, x: 4},
+                           {meta: {location: [5, 5]}, x: 1},
+                           {meta: {location: [5, 5]}, x: 2},
+                           {meta: {location: [5, 5]}, x: 3},
+                           {meta: {location: [5, 5]}, x: 4},
+                       ]);
+}
+
+// Test $mod on meta, inside $or.
+// $mod is an example of a predicate that we don't handle specially in time-series optimizations:
+// it can be pushed down if and only if it's on a metadata field.
+checkAllBucketings({
+    $or: [
+        {"meta.a": {$mod: [2, 0]}},
+        {"x": {$gt: 4}},
+    ]
+},
+                   [
+                       {meta: {a: 1}, x: 1},
+                       {meta: {a: 2}, x: 2},
+                       {meta: {a: 3}, x: 3},
+                       {meta: {a: 4}, x: 4},
+                       {meta: {a: 5}, x: 5},
+                       {meta: {a: 6}, x: 6},
+                       {meta: {a: 7}, x: 7},
+                       {meta: {a: 8}, x: 8},
+                   ]);
+
+// Test $elemMatch on meta, inside $or.
+checkAllBucketings({
+    $or: [
+        {"meta.a": {$elemMatch: {b: 3}}},
+        {"x": {$gt: 4}},
+    ]
+},
+                   [
+                       {x: 1, meta: {a: []}},
+                       {x: 2, meta: {a: [{b: 2}]}},
+                       {x: 3, meta: {a: [{b: 3}]}},
+                       {x: 4, meta: {a: [{b: 2}, {b: 3}]}},
+                       {x: 5, meta: {a: []}},
+                       {x: 6, meta: {a: [{b: 2}]}},
+                       {x: 7, meta: {a: [{b: 3}]}},
+                       {x: 8, meta: {a: [{b: 2}, {b: 3}]}},
+                   ]);
+checkAllBucketings({
+    $or: [
+        {"meta.a": {$elemMatch: {b: 2, c: 3}}},
+        {"x": {$gt: 3}},
+    ]
+},
+                   [
+                       {x: 1, meta: {a: []}},
+                       {x: 2, meta: {a: [{b: 2, c: 3}]}},
+                       {x: 3, meta: {a: [{b: 2}, {c: 3}]}},
+                       {x: 4, meta: {a: []}},
+                       {x: 5, meta: {a: [{b: 2, c: 3}]}},
+                       {x: 6, meta: {a: [{b: 2}, {c: 3}]}},
+                   ]);
 })();
diff --git a/jstests/core/timeseries/timeseries_resume_after.js b/jstests/core/timeseries/timeseries_resume_after.js
index a2b31972c85..34ccfa06d65 100644
--- a/jstests/core/timeseries/timeseries_resume_after.js
+++ b/jstests/core/timeseries/timeseries_resume_after.js
@@ -7,7 +7,7 @@
  *   assumes_against_mongod_not_mongos,
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
+ *   tenant_migration_incompatible,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_show_record_id.js b/jstests/core/timeseries/timeseries_show_record_id.js
index 77ab94dcc48..c2c7f1d62ba 100644
--- a/jstests/core/timeseries/timeseries_show_record_id.js
+++ b/jstests/core/timeseries/timeseries_show_record_id.js
@@ -4,7 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_simple.js b/jstests/core/timeseries/timeseries_simple.js
index ab20a90d096..fc20739c62f 100644
--- a/jstests/core/timeseries/timeseries_simple.js
+++ b/jstests/core/timeseries/timeseries_simple.js
@@ -2,9 +2,12 @@
  * Tests inserting sample data into the time-series buckets collection.
  * This test is for the simple case of only one measurement per bucket.
  * @tags: [
+ *   # This test depends on certain writes ending up in the same bucket. Stepdowns may result in
+ *   # writes splitting between two primaries, and thus different buckets.
  *   does_not_support_stepdowns,
+ *   # Same goes for tenant migrations.
+ *   tenant_migration_incompatible,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_sparse.js b/jstests/core/timeseries/timeseries_sparse.js
index 6fa18856fa2..3ea3cdeb1f6 100644
--- a/jstests/core/timeseries/timeseries_sparse.js
+++ b/jstests/core/timeseries/timeseries_sparse.js
@@ -4,7 +4,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_sparse_index.js b/jstests/core/timeseries/timeseries_sparse_index.js
index 9ffd008ff62..4f0c3e96e43 100644
--- a/jstests/core/timeseries/timeseries_sparse_index.js
+++ b/jstests/core/timeseries/timeseries_sparse_index.js
@@ -4,9 +4,7 @@
  * @tags: [
  *     does_not_support_stepdowns,
  *     does_not_support_transactions,
- *     requires_fcv_51,
  *     requires_find_command,
- *     requires_getmore,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_special_indexes_metadata.js b/jstests/core/timeseries/timeseries_special_indexes_metadata.js
index 17600465193..3abc58fd950 100644
--- a/jstests/core/timeseries/timeseries_special_indexes_metadata.js
+++ b/jstests/core/timeseries/timeseries_special_indexes_metadata.js
@@ -6,8 +6,6 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  *   requires_pipeline_optimization,
  * ]
  */
diff --git a/jstests/core/timeseries/timeseries_update.js b/jstests/core/timeseries/timeseries_update.js
index 82ae774089a..00d42fdb289 100644
--- a/jstests/core/timeseries/timeseries_update.js
+++ b/jstests/core/timeseries/timeseries_update.js
@@ -3,8 +3,7 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_51,
+ *   tenant_migration_incompatible,
  * ]
  */
 (function() {
diff --git a/jstests/core/timeseries/timeseries_update_concurrent.js b/jstests/core/timeseries/timeseries_update_concurrent.js
index 6ca2745b5f5..3c9c1b5efb5 100644
--- a/jstests/core/timeseries/timeseries_update_concurrent.js
+++ b/jstests/core/timeseries/timeseries_update_concurrent.js
@@ -5,8 +5,6 @@
  *   assumes_unsharded_collection, # TODO SERVER-60233: Remove this tag.
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  *   # $currentOp can't run with a readConcern other than 'local'.
  *   assumes_read_concern_unchanged,
  *   # This test only synchronizes updates on the primary.
diff --git a/jstests/core/timeseries/timeseries_update_hint.js b/jstests/core/timeseries/timeseries_update_hint.js
index e51e021c356..c907ea7ea49 100644
--- a/jstests/core/timeseries/timeseries_update_hint.js
+++ b/jstests/core/timeseries/timeseries_update_hint.js
@@ -4,8 +4,6 @@
  *   assumes_unsharded_collection, # TODO SERVER-60233: Remove this tag.
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_fcv_51,
- *   requires_getmore,
  *   # $currentOp can't run with a readConcern other than 'local'.
  *   assumes_read_concern_unchanged,
  *   # This test only synchronizes updates on the primary.
diff --git a/jstests/core/views/views_all_commands.js b/jstests/core/views/views_all_commands.js
index fb47ebe42f0..d0a15afe8f6 100644
--- a/jstests/core/views/views_all_commands.js
+++ b/jstests/core/views/views_all_commands.js
@@ -368,7 +368,6 @@ let viewsCommandTests = {
     fsyncUnlock: {skip: isUnrelated},
     getAuditConfig: {skip: isUnrelated},
     getDatabaseVersion: {skip: isUnrelated},
-    getChangeStreamOptions: {skip: isUnrelated},  // TODO SERVER-65353 remove in 6.1.
     getClusterParameter: {skip: isUnrelated},
     getCmdLineOpts: {skip: isUnrelated},
     getDefaultRWConcern: {skip: isUnrelated},
@@ -496,7 +495,7 @@ let viewsCommandTests = {
         expectedErrorCode: ErrorCodes.NamespaceNotSharded,
     },
     moveChunk: {
-        command: {moveChunk: "test.view"},
+        command: {moveChunk: "test.view", find: {}, to: "a"},
         skipStandalone: true,
         isAdminCommand: true,
         expectFailure: true,
@@ -609,7 +608,6 @@ let viewsCommandTests = {
     saslStart: {skip: isUnrelated},
     sbe: {skip: isAnInternalCommand},
     serverStatus: {command: {serverStatus: 1}, skip: isUnrelated},
-    setChangeStreamOptions: {skip: isUnrelated},  // TODO SERVER-65353 remove in 6.1.
     setIndexCommitQuorum: {skip: isUnrelated},
     setAuditConfig: {skip: isUnrelated},
     setCommittedSnapshot: {skip: isAnInternalCommand},
@@ -647,7 +645,7 @@ let viewsCommandTests = {
             max: {x: 0},
             keyPattern: {x: 1},
             splitKeys: [{x: -2}, {x: -1}],
-            shardVersion: [Timestamp(1, 2), ObjectId(), Timestamp(1, 1)]
+            shardVersion: {t: Timestamp(1, 2), e: ObjectId(), v: Timestamp(1, 1)}
         },
         skipSharded: true,
         expectFailure: true,
diff --git a/jstests/cqf/array_match.js b/jstests/cqf/array_match.js
index 11abaf2a990..01120310947 100644
--- a/jstests/cqf/array_match.js
+++ b/jstests/cqf/array_match.js
@@ -10,15 +10,17 @@ if (!checkCascadesOptimizerEnabled(db)) {
 const t = db.cqf_array_match;
 t.drop();
 
-assert.commandWorked(t.insert({a: 2, b: 1}));
-assert.commandWorked(t.insert({a: [2], b: 1}));
-assert.commandWorked(t.insert({a: [[2]], b: 1}));
+for (let i = 0; i < 10; i++) {
+    assert.commandWorked(t.insert({a: 2, b: 1}));
+    assert.commandWorked(t.insert({a: [2], b: 1}));
+    assert.commandWorked(t.insert({a: [[2]], b: 1}));
+}
 
 assert.commandWorked(t.createIndex({a: 1}));
 
 {
     const res = t.explain("executionStats").aggregate([{$match: {a: {$eq: [2]}}}]);
-    assert.eq(2, res.executionStats.nReturned);
+    assert.eq(20, res.executionStats.nReturned);
     assert.eq("PhysicalScan", res.queryPlanner.winningPlan.optimizerPlan.child.child.nodeType);
 }
 
@@ -31,7 +33,7 @@ assert.commandWorked(bulk.execute());
 
 {
     const res = t.explain("executionStats").aggregate([{$match: {a: {$eq: [2]}}}]);
-    assert.eq(2, res.executionStats.nReturned);
+    assert.eq(20, res.executionStats.nReturned);
 
     const indexUnionNode = res.queryPlanner.winningPlan.optimizerPlan.child.child.leftChild.child;
     assert.eq("Union", indexUnionNode.nodeType);
@@ -46,7 +48,7 @@ assert.commandWorked(t.createIndex({b: 1, a: 1}));
 
 {
     const res = t.explain("executionStats").aggregate([{$match: {b: 1, a: {$eq: [2]}}}]);
-    assert.eq(2, res.executionStats.nReturned);
+    assert.eq(20, res.executionStats.nReturned);
 
     // Verify we still get index scan even if the field appears as second index field.
     const indexUnionNode = res.queryPlanner.winningPlan.optimizerPlan.child.child.leftChild.child;
diff --git a/jstests/cqf/basic_agg_expr.js b/jstests/cqf/basic_agg_expr.js
new file mode 100644
index 00000000000..4c66a8799e0
--- /dev/null
+++ b/jstests/cqf/basic_agg_expr.js
@@ -0,0 +1,84 @@
+(function() {
+"use strict";
+
+load('jstests/aggregation/extras/utils.js');  // For assertArrayEq.
+
+load("jstests/libs/optimizer_utils.js");  // For checkCascadesOptimizerEnabled.
+if (!checkCascadesOptimizerEnabled(db)) {
+    jsTestLog("Skipping test because the optimizer is not enabled");
+    return;
+}
+
+const t = db.cqf_agg_expr;
+
+{
+    t.drop();
+    assert.commandWorked(t.insert({a: "a1", b: "b1", c: "c1"}));
+
+    const res =
+        t.aggregate([{$project: {concat: {$concat: ["$a", " - ", "$b", " - ", "$c"]}}}]).toArray();
+
+    assert.eq(1, res.length);
+    assert.eq("a1 - b1 - c1", res[0].concat);
+}
+
+{
+    t.drop();
+    assert.commandWorked(t.insert({a: 5, b: 10, c: 20, d: 25, e: -5, f: 2.4}));
+
+    const res = t.aggregate([{
+                     $project: {
+                         res1: {$divide: ["$a", "$b"]},
+                         res2: {$divide: ["$c", "$a"]},
+                         res3: {$mod: ["$d", "$b"]},
+                         res4: {$abs: "$e"},
+                         res5: {$floor: "$f"},
+                         res6: {$ceil: {$ln: "$d"}}
+                     }
+                 }]).toArray();
+
+    assert.eq(1, res.length);
+    assert.eq(0.5, res[0].res1);
+    assert.eq(4, res[0].res2);
+    assert.eq(5, res[0].res3);
+    assert.eq(5, res[0].res4);
+    assert.eq(2, res[0].res5);
+    assert.eq(4, res[0].res6);
+}
+
+{
+    t.drop();
+    assert.commandWorked(t.insert({a: 1, b: [{c: 2}, {c: 3}]}));
+    assert.commandWorked(t.insert({a: 1, b: [[{c: 2}, {c: 3}]]}));
+
+    const res = t.aggregate([{$project: {a: "$b.c"}}]).toArray();
+
+    assert.eq(2, res.length);
+    assert.eq([2, 3], res[0].a);
+
+    // TODO: SERVER-67153: Clarify behavior of array traversal in agg expression.
+    assert.eq([[2, 3]], res[1].a);
+}
+
+{
+    t.drop();
+    assert.commandWorked(t.insert({_id: 0, a: {b: 1}}));
+    assert.commandWorked(t.insert({_id: 1, a: [{b: 1}]}));
+    assert.commandWorked(t.insert({_id: 2, a: [[{b: 1}]]}));
+
+    assert.commandWorked(t.insert({_id: 3, a: {b: [1]}}));
+    assert.commandWorked(t.insert({_id: 4, a: [{b: [1]}]}));
+    assert.commandWorked(t.insert({_id: 5, a: [[{b: [1]}]]}));
+
+    {
+        const res = t.aggregate([{$match: {$expr: {$eq: ['$a.b', 1]}}}]).toArray();
+
+        assert.eq(1, res.length);
+        assert.eq({b: 1}, res[0].a);
+    }
+    {
+        const res = t.aggregate([{$match: {$expr: {$eq: ['$a.b', [1]]}}}]).toArray();
+        assertArrayEq({actual: res, expected: [{_id: 1, a: [{b: 1}]}, {_id: 3, a: {b: [1]}}]});
+    }
+}
+}());
diff --git a/jstests/cqf/match_with_in.js b/jstests/cqf/match_with_in.js
index 788f91d99f9..58909bd8775 100644
--- a/jstests/cqf/match_with_in.js
+++ b/jstests/cqf/match_with_in.js
@@ -3,6 +3,7 @@
  */
 
 load('jstests/aggregation/extras/utils.js');  // For assertArrayEq.
+load('jstests/libs/optimizer_utils.js');
 
 (function() {
 "use strict";
@@ -37,6 +38,11 @@ const runTest = (filter, expected) => {
         const result = coll.aggregate({$match: filter}).toArray();
         assertArrayEq(
             {actual: result, expected: expected, extraErrorMsg: tojson({filter: filter})});
+
+        // Sanity check that the query uses the bonsai optimizer.
+        const explain = assert.commandWorked(db.runCommand(
+            {explain: {aggregate: coll.getName(), pipeline: [{$match: filter}], cursor: {}}}));
+        assert(usedBonsaiOptimizer(explain), tojson(explain));
     } finally {
         assert.commandWorked(
             db.adminCommand({'configureFailPoint': 'disablePipelineOptimization', 'mode': 'off'}));
diff --git a/jstests/libs/cluster_server_parameter_utils.js b/jstests/libs/cluster_server_parameter_utils.js
index 3602bb78212..072fac11d9a 100644
--- a/jstests/libs/cluster_server_parameter_utils.js
+++ b/jstests/libs/cluster_server_parameter_utils.js
@@ -6,13 +6,13 @@
  *    it to the end of nonTestClusterParameterNames.
  * 2. Add the clusterParameter document that's expected as default to the end of
  * testOnlyClusterParametersDefault if it's test-only. Otherwise, add it to the end of
- * nonTestOnlyClusterParametersDefault.
+ * nonTestClusterParametersDefault.
  * 3. Add the clusterParameter document that setClusterParameter is expected to insert after its
  *    first invocation to the end of testOnlyClusterParametersInsert if it's test-only. Otherwise,
- *    add it to the end of nonTestOnlyClusterParametersInsert.
+ *    add it to the end of nonTestClusterParametersInsert.
  * 4. Add the clusterParameter document that setClusterParameter is expected to update to after its
  *    second invocation to the end of testOnlyClusterParametersUpdate if it's test-only. Otherwise,
- *    add it to the end of nonTestOnlyClusterParametersUpdate.
+ *    add it to the end of nonTestClusterParametersUpdate.
  *
  */
 
@@ -21,9 +21,7 @@ const testOnlyClusterParameterNames = [
     "testIntClusterParameter",
     "testBoolClusterParameter",
 ];
-const nonTestClusterParameterNames = [
-    "changeStreamOptions",
-];
+const nonTestClusterParameterNames = ["changeStreamOptions", "changeStreams"];
 const clusterParameterNames = testOnlyClusterParameterNames.concat(nonTestClusterParameterNames);
 
 const testOnlyClusterParametersDefault = [
@@ -40,12 +38,15 @@ const testOnlyClusterParametersDefault = [
         boolData: false,
     },
 ];
-const nonTestClusterParametersDefault = [{
-    _id: "changeStreamOptions",
-    preAndPostImages: {
-        expireAfterSeconds: "off",
+const nonTestClusterParametersDefault = [
+    {
+        _id: "changeStreamOptions",
+        preAndPostImages: {
+            expireAfterSeconds: "off",
+        },
     },
-}];
+    {_id: "changeStreams", enabled: false, expireAfterSeconds: NumberLong(0)}
+];
 const clusterParametersDefault =
     testOnlyClusterParametersDefault.concat(nonTestClusterParametersDefault);
 
@@ -63,12 +64,19 @@ const testOnlyClusterParametersInsert = [
         boolData: true,
     },
 ];
-const nonTestClusterParametersInsert = [{
-    _id: "changeStreamOptions",
-    preAndPostImages: {
-        expireAfterSeconds: 30,
+const nonTestClusterParametersInsert = [
+    {
+        _id: "changeStreamOptions",
+        preAndPostImages: {
+            expireAfterSeconds: 30,
+        },
     },
-}];
+    {
+        _id: "changeStreams",
+        enabled: true,
+        expireAfterSeconds: 30,
+    }
+];
 const clusterParametersInsert =
     testOnlyClusterParametersInsert.concat(nonTestClusterParametersInsert);
 
@@ -86,12 +94,15 @@ const testOnlyClusterParametersUpdate = [
         boolData: false,
     },
 ];
-const nonTestClusterParametersUpdate = [{
-    _id: "changeStreamOptions",
-    preAndPostImages: {
-        expireAfterSeconds: "off",
+const nonTestClusterParametersUpdate = [
+    {
+        _id: "changeStreamOptions",
+        preAndPostImages: {
+            expireAfterSeconds: "off",
+        },
     },
-}];
+    {_id: "changeStreams", enabled: false, expireAfterSeconds: NumberLong(0)}
+];
 const clusterParametersUpdate =
     testOnlyClusterParametersUpdate.concat(nonTestClusterParametersUpdate);
 
@@ -347,14 +358,12 @@ function testInvalidClusterParameterCommands(conn) {
         testInvalidGetClusterParameter(conn.getPrimary());
 
         // Assert that setting a nonexistent parameter on the primary returns an error.
-        assert.commandFailedWithCode(
-            adminDB.runCommand({setClusterParameter: {nonexistentParam: {intData: 5}}}),
-            ErrorCodes.NoSuchKey);
+        assert.commandFailed(
+            adminDB.runCommand({setClusterParameter: {nonexistentParam: {intData: 5}}}));
 
         // Assert that running setClusterParameter with a scalar value fails.
-        assert.commandFailedWithCode(
-            adminDB.runCommand({setClusterParameter: {testIntClusterParameter: 5}}),
-            ErrorCodes.IllegalOperation);
+        assert.commandFailed(
+            adminDB.runCommand({setClusterParameter: {testIntClusterParameter: 5}}));
 
         conn.getSecondaries().forEach(function(secondary) {
             // Assert that setClusterParameter cannot be run on a secondary.
@@ -373,14 +382,12 @@ function testInvalidClusterParameterCommands(conn) {
         testInvalidGetClusterParameter(conn.s0);
 
         // Assert that setting a nonexistent parameter on the mongos returns an error.
-        assert.commandFailedWithCode(
-            adminDB.runCommand({setClusterParameter: {nonexistentParam: {intData: 5}}}),
-            ErrorCodes.NoSuchKey);
+        assert.commandFailed(
+            adminDB.runCommand({setClusterParameter: {nonexistentParam: {intData: 5}}}));
 
         // Assert that running setClusterParameter with a scalar value fails.
-        assert.commandFailedWithCode(
-            adminDB.runCommand({setClusterParameter: {testIntClusterParameter: 5}}),
-            ErrorCodes.IllegalOperation);
+        assert.commandFailed(
+            adminDB.runCommand({setClusterParameter: {testIntClusterParameter: 5}}));
 
         const shards = [conn.rs0, conn.rs1, conn.rs2];
         shards.forEach(function(shard) {
diff --git a/jstests/libs/feature_flag_util.js b/jstests/libs/feature_flag_util.js
index 99b33bb0d30..4f7e7eb344a 100644
--- a/jstests/libs/feature_flag_util.js
+++ b/jstests/libs/feature_flag_util.js
@@ -7,7 +7,13 @@ var FeatureFlagUtil = class {
      */
     static isEnabled(db, featureFlag) {
         return eval(
-            `const admin = db.getSiblingDB("admin");
+            `if (db["_mongo"] != undefined &&
+                    db["_mongo"]["fullOptions"] != undefined &&
+                    db["_mongo"]["fullOptions"]["pathOpts"] != undefined &&
+                    db["_mongo"]["fullOptions"]["pathOpts"]["mongos"] != undefined) {
+                throw new Error("Database must not be taken from mongos");
+            }
+            const admin = db.getSiblingDB("admin");
             const flagDoc = admin.runCommand({getParameter: 1, featureFlag${featureFlag}: 1});
             const fcvDoc = admin.runCommand({getParameter: 1, featureCompatibilityVersion: 1});
             flagDoc.hasOwnProperty("featureFlag${featureFlag}") &&
diff --git a/jstests/libs/optimizer_utils.js b/jstests/libs/optimizer_utils.js
index ff2a179388a..35346e02c67 100644
--- a/jstests/libs/optimizer_utils.js
+++ b/jstests/libs/optimizer_utils.js
@@ -1,3 +1,5 @@
+load("jstests/libs/analyze_plan.js");
+
 /*
  * Utility for checking if the query optimizer is enabled.
  */
@@ -6,3 +8,23 @@ function checkCascadesOptimizerEnabled(theDB) {
     return param.hasOwnProperty("featureFlagCommonQueryFramework") &&
         param.featureFlagCommonQueryFramework.value;
 }
+
+/**
+ * Given the result of an explain command, returns whether the bonsai optimizer was used.
+ */
+function usedBonsaiOptimizer(explain) {
+    if (explain.hasOwnProperty("queryPlanner") &&
+        !explain.queryPlanner.winningPlan.hasOwnProperty("optimizerPlan")) {
+        // Find command explain which means new optimizer was not used.
+        // TODO SERVER-62407 this assumption may no longer hold true if the translation to ABT
+        // happens directly from a find command.
+        return false;
+    }
+
+    const plannerOutput = getAggPlanStage(explain, "$cursor");
+    if (plannerOutput != null) {
+        return plannerOutput["$cursor"].queryPlanner.winningPlan.hasOwnProperty("optimizerPlan");
+    } else {
+        return explain.queryPlanner.winningPlan.hasOwnProperty("optimizerPlan");
+    }
+}
+\ No newline at end of file
diff --git a/jstests/libs/override_methods/hide_column_store_indexes_from_get_indexes.js b/jstests/libs/override_methods/hide_column_store_indexes_from_get_indexes.js
new file mode 100644
index 00000000000..43e16bca453
--- /dev/null
+++ b/jstests/libs/override_methods/hide_column_store_indexes_from_get_indexes.js
@@ -0,0 +1,27 @@
+/**
+ * Loading this file overrides DBCollection.prototype.getIndexes() and aliases 'getIndices' and
+ * 'getIndexSpecs' with an implementation that hides column store indexes from the output. This is
+ * intended to increase the number of tests that can run when a column store index is implicitly
+ * added to every collection.
+ */
+(function() {
+'use strict';
+
+load("jstests/libs/override_methods/override_helpers.js");  // For 'OverrideHelpers'.
+
+DBCollection.prototype.getIndexes = function(filter) {
+    return this
+        .aggregate({$indexStats: {}},
+                   {$match: filter || {}},
+                   // Hide the implicitly created index from tests that look for indexes
+                   {$match: {name: {$ne: "$**_columnstore"}}},
+                   {$project: {host: 0, accesses: 0}})
+        .toArray();
+};
+
+DBCollection.prototype.getIndices = DBCollection.prototype.getIndexes;
+DBCollection.prototype.getIndexSpecs = DBCollection.prototype.getIndexes;
+
+OverrideHelpers.prependOverrideInParallelShell(
+    "jstests/libs/override_methods/hide_column_store_indexes_from_get_indexes.js");
+}());
diff --git a/jstests/libs/override_methods/inject_tenant_prefix.js b/jstests/libs/override_methods/inject_tenant_prefix.js
index 92749a1b9de..2d46f138291 100644
--- a/jstests/libs/override_methods/inject_tenant_prefix.js
+++ b/jstests/libs/override_methods/inject_tenant_prefix.js
@@ -435,7 +435,7 @@ function convertServerConnectionStringToURI(input) {
  * that there is only one such operation.
  */
 function getOperationStateDocument(conn) {
-    const collection = isShardSplitPassthrough() ? "tenantSplitDonors" : "tenantMigrationDonors";
+    const collection = isShardSplitPassthrough() ? "shardSplitDonors" : "tenantMigrationDonors";
     const filter =
         isShardSplitPassthrough() ? {tenantIds: TestData.tenantIds} : {tenantId: TestData.tenantId};
     const findRes = assert.commandWorked(
diff --git a/jstests/libs/parallelTester.js b/jstests/libs/parallelTester.js
index 8974cf569dd..bb77debbd59 100644
--- a/jstests/libs/parallelTester.js
+++ b/jstests/libs/parallelTester.js
@@ -244,6 +244,10 @@ if (typeof _threadInject != "undefined") {
             "collmod_convert_to_unique_apply_ops.js",
             "collmod_convert_to_unique_violations.js",
             "collmod_convert_to_unique_violations_size_limit.js",
+
+            // TODO (SERVER-63228): Remove this exclusion once the feature flag is enabled by
+            // default.
+            "timeseries/timeseries_index_ttl_partial.js",
         ]);
 
         // Get files, including files in subdirectories.
@@ -311,7 +315,6 @@ if (typeof _threadInject != "undefined") {
             // run in parallel, they could interfere with the cache and cause failures.
             parallelFilesDir + "/list_all_local_sessions.js",
             parallelFilesDir + "/list_all_sessions.js",
-            parallelFilesDir + "/list_local_sessions.js",
             parallelFilesDir + "/list_sessions.js",
         ];
         var serialTests = makeKeys(serialTestsArr);
diff --git a/jstests/libs/sbe_assert_error_override.js b/jstests/libs/sbe_assert_error_override.js
index 74ec08ef9bc..35e40388a68 100644
--- a/jstests/libs/sbe_assert_error_override.js
+++ b/jstests/libs/sbe_assert_error_override.js
@@ -25,7 +25,7 @@ const equivalentErrorCodesList = [
     [16006, 4997703, 4998202],
     [16007, 5066300],
     [16020, 5066300],
-    [16554, 4974201, 4974203],
+    [16554, ErrorCodes.TypeMismatch, 4974201, 4974203],
     [16555, 5073102],
     [16608, 4848401],
     [16609, 5073101],
diff --git a/jstests/libs/sbe_util.js b/jstests/libs/sbe_util.js
index faed62238d9..be6d3509ea7 100644
--- a/jstests/libs/sbe_util.js
+++ b/jstests/libs/sbe_util.js
@@ -106,14 +106,20 @@ function checkBothEnginesAreRunOnCluster(theDB) {
                 const getParam = conn.adminCommand({
                     getParameter: 1,
                     internalQueryForceClassicEngine: 1,
-                    internalQueryEnableSlotBasedExecutionEngine: 1
+                    internalQueryEnableSlotBasedExecutionEngine: 1,
+                    featureFlagSbeFull: 1,
                 });
 
                 if (getParam.hasOwnProperty("internalQueryForceClassicEngine")) {
-                    if (getParam.internalQueryForceClassicEngine) {
-                        engineMap.classic++;
-                    } else {
+                    // We say SBE is fully enabled if the engine is on and either
+                    // 'featureFlagSbeFull' doesn't exist on the targeted server, or it exists and
+                    // is set to true.
+                    if (!getParam.internalQueryForceClassicEngine &&
+                        (!getParam.hasOwnProperty("featureFlagSbeFull") ||
+                         getParam.featureFlagSbeFull.value)) {
                         engineMap.sbe++;
+                    } else {
+                        engineMap.classic++;
                     }
                 }
 
diff --git a/jstests/multiVersion/genericSetFCVUsage/upgrade_downgrade_sharded_cluster.js b/jstests/multiVersion/genericSetFCVUsage/upgrade_downgrade_sharded_cluster.js
index 31b79da65bc..f7eeaf6578f 100644
--- a/jstests/multiVersion/genericSetFCVUsage/upgrade_downgrade_sharded_cluster.js
+++ b/jstests/multiVersion/genericSetFCVUsage/upgrade_downgrade_sharded_cluster.js
@@ -17,22 +17,7 @@ load('jstests/multiVersion/libs/multi_cluster.js');  // For upgradeCluster
 
 const dbName = jsTestName();
 
-// TODO SERVER-65815 remove feature flag check once enabled
-const orphansTrackingFeatureFlagEnabled = TestData.setParameters.featureFlagOrphanTracking;
-
-//==========//
-// TODO SERVER-64400 remove code delimited with //==========// once 6.0 branches out
-const kRangeDeletionNs = "config.rangeDeletions";
-const testOrphansTrackingNS = dbName + '.testOrphansTracking';
-const numOrphanedDocs = 10;
-if (orphansTrackingFeatureFlagEnabled) {
-    TestData.skipCheckOrphans = true;
-}
-//==========//
-
 function setupClusterAndDatabase(binVersion) {
-    // TODO SERVER-64400 remove params related with no-more-autosplitter once 6.0 branches out
-    const params = orphansTrackingFeatureFlagEnabled ? {disableResumableRangeDeleter: true} : {};
     const st = new ShardingTest({
         mongos: 1,
         config: 1,
@@ -42,10 +27,8 @@ function setupClusterAndDatabase(binVersion) {
             configOptions: {binVersion: binVersion},
             rsOptions: {
                 binVersion: binVersion,
-                setParameter: params,
             },
             rs: {nodes: 2},
-            enableBalancer: orphansTrackingFeatureFlagEnabled ? false : true
         }
     });
     st.configRS.awaitReplication();
@@ -53,25 +36,6 @@ function setupClusterAndDatabase(binVersion) {
     assert.commandWorked(
         st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName}));
 
-    // TODO SERVER-65815 remove feature flag check once enabled
-    if (orphansTrackingFeatureFlagEnabled) {
-        // TODO SERVER-64400 remove this scope once 6.0 branches out
-        // - Shard collection (one big chunk on shard0)
-        // - Insert data in range [0, MaxKey)
-        // - Split chunk at 0
-        // - Move chunks [0, MaxKey] on shard1
-        assert.commandWorked(
-            st.s.adminCommand({shardCollection: testOrphansTrackingNS, key: {_id: 1}}));
-        var batch = st.s.getCollection(testOrphansTrackingNS).initializeOrderedBulkOp();
-        for (var i = 0; i < numOrphanedDocs; i++) {
-            batch.insert({_id: i});
-        }
-        assert.commandWorked(batch.execute());
-        assert.commandWorked(st.splitAt(testOrphansTrackingNS, {_id: 0}));
-        st.s.adminCommand(
-            {moveChunk: testOrphansTrackingNS, find: {_id: 0}, to: st.shard1.shardName});
-    }
-
     return st;
 }
 
@@ -110,23 +74,9 @@ function checkClusterAfterBinaryUpgrade() {
 
 function checkClusterAfterFCVUpgrade(fcv) {
     checkConfigAndShardsFCV(fcv);
-    // TODO SERVER-65815 remove feature flag check once enabled
-    if (orphansTrackingFeatureFlagEnabled) {
-        // TODO SERVER-64400 remove this scope once 6.0 branches out
-        // Check that orphans counter has been populated
-        var doc = st.shard0.getCollection(kRangeDeletionNs).findOne({nss: testOrphansTrackingNS});
-        assert.eq(numOrphanedDocs, doc.numOrphanDocs);
-    }
 }
 
 function checkClusterAfterFCVDowngrade() {
-    // TODO SERVER-65815 remove feature flag check once enabled
-    if (orphansTrackingFeatureFlagEnabled) {
-        // TODO SERVER-64400 remove this scope once 6.0 branches out
-        // Check that orphans counter has been unset
-        var doc = st.shard0.getCollection(kRangeDeletionNs).findOne({nss: testOrphansTrackingNS});
-        assert.eq(undefined, doc.numOrphanDocs);
-    }
 }
 
 function checkClusterAfterBinaryDowngrade(fcv) {
diff --git a/jstests/noPassthrough/agg_group.js b/jstests/noPassthrough/agg_group.js
index 234237baf72..6d186946ae9 100644
--- a/jstests/noPassthrough/agg_group.js
+++ b/jstests/noPassthrough/agg_group.js
@@ -18,24 +18,13 @@
 
 load("jstests/libs/analyze_plan.js");
 
-// As of now, $group pushdown to SBE feature is not enabled by default. So, enables it with a
-// minimal configuration of a sharded cluster.
-//
-// TODO Remove {setParameter: "featureFlagSBEGroupPushdown=true"} when the feature is enabled by
-// default.
-const st = new ShardingTest(
-    {config: 1, shards: 1, shardOptions: {setParameter: "featureFlagSBEGroupPushdown=true"}});
+const st = new ShardingTest({config: 1, shards: 1});
 
 // This database name can provide multiple similar test cases with a good separate namespace and
 // each test case may create a separate collection for its own dataset.
 const db = st.getDB(jsTestName());
 const dbAtShard = st.shard0.getDB(jsTestName());
 
-// Makes sure that $group pushdown to SBE feature is enabled.
-assert(
-    assert.commandWorked(dbAtShard.adminCommand({getParameter: 1, featureFlagSBEGroupPushdown: 1}))
-        .featureFlagSBEGroupPushdown.value);
-
 // Makes sure that the test db is sharded and the data is stored into the only shard.
 assert.commandWorked(st.s0.adminCommand({enableSharding: db.getName()}));
 st.ensurePrimaryShard(db.getName(), st.shard0.shardName);
diff --git a/jstests/noPassthrough/bucket_unpacking_with_sort_granularity_change.js b/jstests/noPassthrough/bucket_unpacking_with_sort_granularity_change.js
new file mode 100644
index 00000000000..719345937ce
--- /dev/null
+++ b/jstests/noPassthrough/bucket_unpacking_with_sort_granularity_change.js
@@ -0,0 +1,102 @@
+// Test that the bucket unpacking with sorting rewrite is performed during a granularity change.
+// This can cause buckets to exceed the max time span which could cause correctness issues.
+// We check that the results are correct, the documents are sorted, and the documents we expect to
+// appear, appear.
+// Note: events in buckets that exceed bucketMaxSpan are not included.
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");  // For TimeseriesTest
+
+const dbName = jsTestName();
+
+// Start a single mongoD using MongoRunner.
+const conn = MongoRunner.runMongod({setParameter: "featureFlagBucketUnpackWithSort=true"});
+assert.neq(null, conn, "mongod was unable to start up");
+
+// Create the test DB and collection.
+const db = conn.getDB(dbName);
+const adminDB = conn.getDB("admin");
+const collName = dbName;
+const coll = db[collName];
+const minsToMillis = (mins) => mins * 60 * 1000;
+
+if (!TimeseriesTest.bucketUnpackWithSortEnabled(db.getMongo())) {
+    jsTestLog("Skipping test because 'BucketUnpackWithSort' is disabled.");
+    return;
+}
+
+printjson(conn.adminCommand({getParameter: 1, featureFlagBucketUnpackWithSort: 1}));
+
+const on = "alwaysOn";
+const off = "off";
+
+function setAggHang(mode) {
+    assert.commandWorked(adminDB.adminCommand(
+        {configureFailPoint: "hangBeforeDocumentSourceCursorLoadBatch", mode: mode}));
+}
+
+// Setup scenario.
+const timeField = "t";
+coll.drop();
+db.createCollection(collName, {timeseries: {timeField: timeField}});
+assert.commandWorked(coll.insert({[timeField]: new Date(minsToMillis(0))}));
+assert.commandWorked(coll.insert({[timeField]: new Date(minsToMillis(60) - 1)}));
+assert.commandWorked(coll.insert({[timeField]: new Date(minsToMillis(60))}));
+
+// Enable the hang point.
+setAggHang(on);
+
+// Start parallel shell that checks that newly inserted info is found in naive sorter.
+const aggregateNaive = `
+  	  const testDB = db.getSiblingDB('${dbName}');
+	  const testColl = testDB['${dbName}'];
+	  const results = testColl.aggregate([{$_internalInhibitOptimization: {}}, {$sort: {t: 1}}]).toArray();
+	  assert.eq(results.length, 4, results);
+	`;
+// To assist in debugging we log the explain output.
+const aggregateExpOptimized = `
+	  const testDB = db.getSiblingDB('${dbName}');
+      const testColl = testDB['${dbName}'];
+      const results = testColl.explain().aggregate([{$sort: {t: -1}}], {hint: {$natural: -1}});
+      jsTestLog(results);
+	`;
+// Start parallel shell that checks that newly inserted info is not found in bounded sorter.
+const aggregateOptimized = `
+  	  const testDB = db.getSiblingDB('${dbName}');
+	  const testColl = testDB['${dbName}'];
+	  const results = testColl.aggregate([{$sort: {t: -1}}], {hint: {$natural: -1}}).toArray();
+	  assert.eq(results.length, 2, results);
+	`;
+const mergeShellNaive = startParallelShell(aggregateNaive, db.getMongo().port);
+const mergeShellExplain = startParallelShell(aggregateExpOptimized, db.getMongo().port);
+const mergeShellOptimized = startParallelShell(aggregateOptimized, db.getMongo().port);
+
+// Wait for the parallel shells to hit the failpoint.
+assert.soon(() => db.currentOp({op: "command", "command.aggregate": dbName}).inprog.length == 2,
+            () => tojson(db.currentOp().inprog));
+
+// Reconfigure collection parameters
+assert.commandWorked(db.runCommand({collMod: dbName, timeseries: {granularity: "hours"}}));
+
+// Insert data that will fit in the new wider bucket.
+assert.commandWorked(coll.insert({[timeField]: new Date(minsToMillis(120) + 1)}));
+
+// Turn off the hang.
+setAggHang(off);
+
+jsTestLog(db.system.buckets[dbName].find().toArray());
+
+// Double check that the number of buckets is expected.
+assert.eq(db.system.buckets[dbName].find().toArray().length, 2);
+
+// Finish the computation.
+let resNaive = mergeShellNaive();
+assert(resNaive == 0);
+let resExp = mergeShellExplain();
+assert(resExp == 0);
+let resOpt = mergeShellOptimized();
+assert(resOpt == 0);
+
+MongoRunner.stopMongod(conn);
+})();
diff --git a/jstests/noPassthrough/change_stream_options.js b/jstests/noPassthrough/change_stream_options.js
index 0366d65bcfb..2a36bb840dd 100644
--- a/jstests/noPassthrough/change_stream_options.js
+++ b/jstests/noPassthrough/change_stream_options.js
@@ -1,8 +1,6 @@
 // Tests setClusterParameter and getClusterParameter for changeStreamOptions on standalone, replica
 // set and sharded cluster configurations.
 // @tags: [
-//  requires_fcv_60,
-//  featureFlagClusterWideConfig,
 //  requires_replication,
 //  requires_sharding,
 // ]
diff --git a/jstests/noPassthrough/change_stream_pre_image_time_based_expiration_replset.js b/jstests/noPassthrough/change_stream_pre_image_time_based_expiration_replset.js
index 0f50af88ebd..d7af9b045cd 100644
--- a/jstests/noPassthrough/change_stream_pre_image_time_based_expiration_replset.js
+++ b/jstests/noPassthrough/change_stream_pre_image_time_based_expiration_replset.js
@@ -1,7 +1,5 @@
 // Tests time-based pre-image retention policy of change stream pre-images remover job.
 // @tags: [
-//  requires_fcv_60,
-//  featureFlagClusterWideConfig,
 //  requires_replication,
 // ]
 (function() {
diff --git a/jstests/noPassthrough/change_stream_pre_image_time_based_expiration_sharded.js b/jstests/noPassthrough/change_stream_pre_image_time_based_expiration_sharded.js
index 399134978c2..551ae495b58 100644
--- a/jstests/noPassthrough/change_stream_pre_image_time_based_expiration_sharded.js
+++ b/jstests/noPassthrough/change_stream_pre_image_time_based_expiration_sharded.js
@@ -1,7 +1,5 @@
 // Tests time-based pre-image retention policy of change stream pre-images remover job.
 // @tags: [
-//  requires_fcv_60,
-//  featureFlagClusterWideConfig,
 //  requires_sharding,
 // ]
 (function() {
diff --git a/jstests/noPassthrough/change_streams_cluster_parameter.js b/jstests/noPassthrough/change_streams_cluster_parameter.js
new file mode 100644
index 00000000000..e76ed12cb04
--- /dev/null
+++ b/jstests/noPassthrough/change_streams_cluster_parameter.js
@@ -0,0 +1,146 @@
+// Tests the 'changeStreams' cluster-wide configuration parameter on the replica sets and the
+// sharded cluster.
+// @tags: [
+//  featureFlagClusterWideConfig,
+//  requires_replication,
+//  requires_sharding,
+//  multiversion_incompatible,
+//  featureFlagServerlessChangeStreams
+//  featureFlagMongoStore
+// ]
+(function() {
+"use strict";
+
+// Verifies that the 'getClusterParameter' on the 'changeStreams' cluster-wide parameter returns the
+// expected response.
+function assertGetResponse(db, expectedChangeStreamParam) {
+    const response = assert.commandWorked(db.runCommand({getClusterParameter: "changeStreams"}));
+    const enabled = response.clusterParameters[0].enabled;
+    assert.eq(enabled, expectedChangeStreamParam.enabled, response);
+    if (enabled) {
+        // TODO SERVER-67145: For some reason the default 'expireAfterSeconds' is not serialized in
+        // mongoS.
+        assert.eq(response.clusterParameters[0].expireAfterSeconds,
+                  expectedChangeStreamParam.expireAfterSeconds,
+                  response);
+    }
+}
+
+// Tests the 'changeStreams' cluster-wide configuration parameter with the 'admin' database.
+function testWithAdminDB(conn) {
+    const adminDB = conn.getDB("admin");
+
+    // Change streams are initialy disabled.
+    assertGetResponse(adminDB, {enabled: false, expireAfterSeconds: NumberLong(0)});
+
+    // TODO SERVER-67293: Make 'enabled' field requiered; setting 'changeStreams' parameter without
+    // 'enabled' field should fail.
+    // TODO SERVER-67146: The expected error on missing 'enabled' field should be 'BadValue' or
+    // 'InvaludClusterParameter'.
+
+    // Invalid string value for the 'enabled' parameter should fail.
+    assert.commandFailedWithCode(
+        adminDB.runCommand({setClusterParameter: {changeStreams: {enabled: "yes"}}}),
+        ErrorCodes.TypeMismatch);
+
+    // Enabling change streams without 'expireAfterSeconds' should fail.
+    assert.commandFailedWithCode(
+        adminDB.runCommand({setClusterParameter: {changeStreams: {enabled: true}}}),
+        ErrorCodes.BadValue);
+
+    // Invalid string value for the 'expireAfterSeconds' parameter should fail.
+    assert.commandFailedWithCode(
+        adminDB.runCommand(
+            {setClusterParameter: {changeStreams: {enabled: true, expireAfterSeconds: "off"}}}),
+        ErrorCodes.TypeMismatch);
+
+    // A negative value of 'expireAfterSeconds' should fail.
+    assert.commandFailedWithCode(adminDB.runCommand({
+        setClusterParameter: {changeStreams: {enabled: true, expireAfterSeconds: NumberLong(-1)}}
+    }),
+                                 ErrorCodes.BadValue);
+
+    // A zero value of 'expireAfterSeconds' should fail.
+    assert.commandFailedWithCode(adminDB.runCommand({
+        setClusterParameter: {changeStreams: {enabled: true, expireAfterSeconds: NumberLong(0)}}
+    }),
+                                 ErrorCodes.BadValue);
+
+    // Enabling change streams with success.
+    assert.commandWorked(adminDB.runCommand({
+        setClusterParameter: {changeStreams: {enabled: true, expireAfterSeconds: NumberLong(3600)}}
+    }));
+    assertGetResponse(adminDB, {enabled: true, expireAfterSeconds: NumberLong(3600)});
+
+    // Modifying expireAfterSeconds while enabled should succeed.
+    assert.commandWorked(adminDB.runCommand({
+        setClusterParameter: {changeStreams: {enabled: true, expireAfterSeconds: NumberLong(100)}}
+    }));
+    assertGetResponse(adminDB, {enabled: true, expireAfterSeconds: NumberLong(100)});
+
+    // Disabling with (non-zero) 'expireAfterSeconds' should fail.
+    assert.commandFailedWithCode(adminDB.runCommand({
+        setClusterParameter: {changeStreams: {enabled: false, expireAfterSeconds: NumberLong(1)}}
+    }),
+                                 ErrorCodes.BadValue);
+
+    // Disabling without 'expireAfterSeconds' should succeed.
+    assert.commandWorked(
+        adminDB.runCommand({setClusterParameter: {changeStreams: {enabled: false}}}));
+    assertGetResponse(adminDB, {enabled: false, expireAfterSeconds: NumberLong(0)});
+
+    // Disabling again should succeed.
+    assert.commandWorked(
+        adminDB.runCommand({setClusterParameter: {changeStreams: {enabled: false}}}));
+    assertGetResponse(adminDB, {enabled: false, expireAfterSeconds: NumberLong(0)});
+}
+
+function testWithoutAdminDB(conn) {
+    const db = conn.getDB(jsTestName());
+    assert.commandFailedWithCode(db.runCommand({getClusterParameter: "changeStreams"}),
+                                 ErrorCodes.Unauthorized);
+    assert.commandFailedWithCode(db.runCommand({
+        setClusterParameter: {changeStreams: {enabled: true, expireAfterSeconds: NumberLong(3600)}}
+    }),
+                                 ErrorCodes.Unauthorized);
+}
+
+// Tests the set and get change streams parameter on the replica-set.
+{
+    const rst = new ReplSetTest({name: "replSet", nodes: 2});
+    rst.startSet();
+    rst.initiate();
+
+    const primary = rst.getPrimary();
+    const secondary = rst.getSecondaries()[0];
+
+    // Verify that the set and get commands cannot be issued on database other than the 'admin'.
+    [primary, secondary].forEach(conn => {
+        testWithoutAdminDB(conn);
+    });
+
+    // Tests the set and get commands on the primary node.
+    testWithAdminDB(primary);
+
+    rst.stopSet();
+}
+
+// Tests the set and get change streams parameter on the sharded cluster.
+{
+    const st = new ShardingTest({shards: 1, mongos: 1});
+    const adminDB = st.rs0.getPrimary().getDB("admin");
+
+    // Test that setClusterParameter cannot be issued directly on shards in the sharded cluster,
+    // while getClusterParameter can.
+    assert.commandFailedWithCode(adminDB.runCommand({
+        setClusterParameter: {changeStreams: {enabled: true, expireAfterSeconds: NumberLong(3600)}}
+    }),
+                                 ErrorCodes.NotImplemented);
+    assertGetResponse(adminDB, {enabled: false, expireAfterSeconds: NumberLong(0)});
+
+    // Run the set and get commands on the mongoS.
+    testWithAdminDB(st.s);
+
+    st.stop();
+}
+}());
diff --git a/jstests/noPassthrough/check_sbe_lookup_feature.js b/jstests/noPassthrough/check_sbe_lookup_feature.js
deleted file mode 100644
index 9841522c806..00000000000
--- a/jstests/noPassthrough/check_sbe_lookup_feature.js
+++ /dev/null
@@ -1,15 +0,0 @@
-(function() {
-'use strict';
-
-const conn = MongoRunner.runMongod();
-
-const res = assert.commandWorked(
-    conn.getDB("admin").adminCommand({getParameter: 1, featureFlagSBELookupPushdown: 1}),
-    "featureFlagSBELookupPushdown must have been turned on by default since 6.0");
-assert(res.hasOwnProperty("featureFlagSBELookupPushdown"), res);
-const featureFlag = res.featureFlagSBELookupPushdown;
-assert(featureFlag.hasOwnProperty("value") && featureFlag.value, res);
-assert(featureFlag.hasOwnProperty("version") && featureFlag.version == "6.0", res);
-
-MongoRunner.stopMongod(conn);
-}());
diff --git a/jstests/noPassthrough/cluster-server-parameter-op-observer.js b/jstests/noPassthrough/cluster-server-parameter-op-observer.js
index b0bc07fee1a..ce8f7dbcb11 100644
--- a/jstests/noPassthrough/cluster-server-parameter-op-observer.js
+++ b/jstests/noPassthrough/cluster-server-parameter-op-observer.js
@@ -4,10 +4,6 @@
 (function() {
 'use strict';
 
-if (!TestData.setParameters.featureFlagClusterWideConfig) {
-    return;
-}
-
 const kUnknownCSPLogId = 6226300;
 const kUnknownCSPLogComponent = 'control';
 const kUnknownCSPLogLevel = 3;
diff --git a/jstests/noPassthrough/cluster_server_parameter_refresher.js b/jstests/noPassthrough/cluster_server_parameter_refresher.js
index a7aaf4cd880..8edc13641eb 100644
--- a/jstests/noPassthrough/cluster_server_parameter_refresher.js
+++ b/jstests/noPassthrough/cluster_server_parameter_refresher.js
@@ -2,9 +2,8 @@
  * Checks that the mongos cluster server parameter refresh job runs as expected.
  *
  * @tags: [
- *   # Requires all nodes to be running the latest binary.
+ *   # Requires all nodes to be running at least 6.1.
  *   requires_fcv_61,
- *   featureFlagClusterWideConfig,
  *   featureFlagClusterWideConfigM2,
  *   does_not_support_stepdowns,
  *   requires_replication,
diff --git a/jstests/noPassthrough/cqf_fallback.js b/jstests/noPassthrough/cqf_fallback.js
new file mode 100644
index 00000000000..8e0b34b64e6
--- /dev/null
+++ b/jstests/noPassthrough/cqf_fallback.js
@@ -0,0 +1,212 @@
+/**
+ * Verify that expressions and operators are correctly routed to CQF where eligible. This decision
+ * is based on several factors including the query text, collection metadata, etc..
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/analyze_plan.js");
+load("jstests/libs/optimizer_utils.js");
+
+let conn = MongoRunner.runMongod({setParameter: {featureFlagCommonQueryFramework: true}});
+assert.neq(null, conn, "mongod was unable to start up");
+
+let db = conn.getDB("test");
+let coll = db[jsTestName()];
+coll.drop();
+
+// This test relies on the bonsai optimizer being enabled.
+if (assert.commandWorked(db.adminCommand({getParameter: 1, internalQueryForceClassicEngine: 1}))
+        .internalQueryForceClassicEngine == true) {
+    jsTestLog("Skipping test due to internalQueryForceClassicEngine");
+    MongoRunner.stopMongod(conn);
+    return;
+}
+
+function assertUsesFallback(cmd, testOnly) {
+    // An unsupported stage should not use the new optimizer.
+    assert.commandWorked(
+        db.adminCommand({setParameter: 1, internalQueryForceCommonQueryFramework: false}));
+    const defaultExplain = assert.commandWorked(db.runCommand({explain: cmd}));
+    assert(!usedBonsaiOptimizer(defaultExplain), tojson(defaultExplain));
+
+    // Non-explain should also work and use the fallback mechanism, but we cannnot verify exactly
+    // this without looking at the logs.
+    assert.commandWorked(db.runCommand(cmd));
+
+    // Force the bonsai optimizer and expect the query to either fail if unsupported, or pass if
+    // marked as "test only".
+    assert.commandWorked(
+        db.adminCommand({setParameter: 1, internalQueryForceCommonQueryFramework: true}));
+    if (testOnly) {
+        const explain = assert.commandWorked(db.runCommand({explain: cmd}));
+        assert(usedBonsaiOptimizer(explain), tojson(explain));
+    } else {
+        assert.commandFailedWithCode(db.runCommand(cmd), ErrorCodes.InternalErrorNotSupported);
+    }
+
+    // Forcing the classic engine should override the CQF flag.
+    {
+        assert.commandWorked(
+            db.adminCommand({setParameter: 1, internalQueryForceClassicEngine: true}));
+        const explain = assert.commandWorked(db.runCommand({explain: cmd}));
+        assert(!usedBonsaiOptimizer(explain), tojson(explain));
+        assert.commandWorked(
+            db.adminCommand({setParameter: 1, internalQueryForceClassicEngine: false}));
+    }
+}
+
+// Unsupported aggregation stage.
+assertUsesFallback({aggregate: coll.getName(), pipeline: [{$sample: {size: 1}}], cursor: {}},
+                   false);
+
+// Test-only aggregation stage.
+assertUsesFallback(
+    {aggregate: coll.getName(), pipeline: [{$group: {_id: null, a: {$sum: "$b"}}}], cursor: {}},
+    true);
+
+// Unsupported match expression.
+assertUsesFallback({find: coll.getName(), filter: {a: {$mod: [4, 0]}}}, false);
+assertUsesFallback(
+    {aggregate: coll.getName(), pipeline: [{$match: {a: {$mod: [4, 0]}}}], cursor: {}}, false);
+assertUsesFallback({find: coll.getName(), filter: {a: {$in: [/^b/, 1]}}}, false);
+
+// Test-only match expression.
+assertUsesFallback({find: coll.getName(), filter: {$alwaysFalse: 1}}, true);
+assertUsesFallback({aggregate: coll.getName(), pipeline: [{$match: {$alwaysFalse: 1}}], cursor: {}},
+                   true);
+
+// Unsupported projection expression.
+assertUsesFallback(
+    {find: coll.getName(), filter: {}, projection: {a: {$concatArrays: [["$b"], ["suppported"]]}}},
+    false);
+assertUsesFallback({
+    aggregate: coll.getName(),
+    pipeline: [{$project: {a: {$concatArrays: [["$b"], ["suppported"]]}}}],
+    cursor: {}
+},
+                   false);
+
+// Test-only projection spec.
+assertUsesFallback(
+    {find: coll.getName(), filter: {}, projection: {a: {$concat: ["test", "-only"]}}}, true);
+assertUsesFallback({
+    aggregate: coll.getName(),
+    pipeline: [{$project: {a: {$concat: ["test", "-only"]}}}],
+    cursor: {}
+},
+                   true);
+
+// Numeric path components are not supported, either in a match expression or projection.
+assertUsesFallback({find: coll.getName(), filter: {'a.0': 5}});
+assertUsesFallback({find: coll.getName(), filter: {'a.0.b': 5}});
+assertUsesFallback({find: coll.getName(), filter: {}, projection: {'a.0': 1}});
+assertUsesFallback({find: coll.getName(), filter: {}, projection: {'a.5.c': 0}});
+
+// Test for unsupported expressions within a branching expression such as $or.
+assertUsesFallback({find: coll.getName(), filter: {$or: [{'a.0': 5}, {a: 1}]}});
+assertUsesFallback({find: coll.getName(), filter: {$or: [{a: 5}, {a: {$mod: [4, 0]}}]}});
+
+// Unsupported command options.
+assertUsesFallback({find: coll.getName(), filter: {}, collation: {locale: "fr_CA"}}, true);
+assertUsesFallback({
+    aggregate: coll.getName(),
+    pipeline: [{$match: {$alwaysFalse: 1}}],
+    collation: {locale: "fr_CA"},
+    cursor: {}
+},
+                   true);
+
+// Unsupported index type.
+assert.commandWorked(coll.createIndex({a: 1}, {sparse: true}));
+assertUsesFallback({find: coll.getName(), filter: {}});
+assertUsesFallback({aggregate: coll.getName(), pipeline: [], cursor: {}});
+coll.drop();
+assert.commandWorked(coll.insert({a: 1}));
+assert.commandWorked(coll.createIndex({"$**": 1}));
+assertUsesFallback({find: coll.getName(), filter: {}});
+assertUsesFallback({aggregate: coll.getName(), pipeline: [], cursor: {}});
+
+// Test-only index type.
+coll.drop();
+assert.commandWorked(coll.insert({a: 1}));
+assert.commandWorked(coll.createIndex({a: 1}, {partialFilterExpression: {a: {$gt: 0}}}));
+assertUsesFallback({find: coll.getName(), filter: {}}, true);
+assertUsesFallback({aggregate: coll.getName(), pipeline: [], cursor: {}}, true);
+
+// Unsupported collection types. Note that a query against the user-facing timeseries collection
+// will fail due to the unsupported $unpackBucket stage.
+coll.drop();
+assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField: "time"}}));
+assertUsesFallback({find: coll.getName(), filter: {}}, false);
+assertUsesFallback({aggregate: coll.getName(), pipeline: [], cursor: {}}, false);
+
+const bucketColl = db.getCollection('system.buckets.' + coll.getName());
+assertUsesFallback({find: bucketColl.getName(), filter: {}}, false);
+assertUsesFallback({aggregate: bucketColl.getName(), pipeline: [], cursor: {}}, false);
+
+// Collection-default collation is not supported if non-simple.
+coll.drop();
+assert.commandWorked(db.createCollection(coll.getName(), {collation: {locale: "fr_CA"}}));
+assertUsesFallback({find: coll.getName(), filter: {}}, false);
+assertUsesFallback({aggregate: coll.getName(), pipeline: [], cursor: {}}, false);
+
+// Queries over views are supported as long as the resolved pipeline is valid in CQF.
+coll.drop();
+assert.commandWorked(coll.insert({a: 1}));
+assert.commandWorked(
+    db.runCommand({create: "view", viewOn: coll.getName(), pipeline: [{$match: {a: 1}}]}));
+
+// Unsupported expression on top of the view.
+assertUsesFallback({find: "view", filter: {a: {$mod: [4, 0]}}}, false);
+
+// Supported expression on top of the view.
+assert.commandWorked(
+    db.adminCommand({setParameter: 1, internalQueryForceCommonQueryFramework: true}));
+assert.commandWorked(db.runCommand({find: "view", filter: {b: 4}}));
+
+// Test-only expression on top of a view.
+assertUsesFallback({find: "view", filter: {$alwaysFalse: 1}}, true);
+
+// Create a view with an unsupported expression.
+assert.commandWorked(db.runCommand(
+    {create: "invalidView", viewOn: coll.getName(), pipeline: [{$match: {a: {$mod: [4, 0]}}}]}));
+
+// Any expression, supported or not, should not use CQF over the invalid view.
+assertUsesFallback({find: "invalidView", filter: {b: 4}}, false);
+
+// Test only expression should also fail.
+assertUsesFallback({find: "invalidView", filter: {$alwaysFalse: 1}}, true);
+
+MongoRunner.stopMongod(conn);
+
+// Restart the mongod and verify that we never use the bonsai optimizer if the feature flag is not
+// set.
+conn = MongoRunner.runMongod();
+assert.neq(null, conn, "mongod was unable to start up");
+
+db = conn.getDB("test");
+coll = db[jsTestName()];
+coll.drop();
+
+const supportedExpression = {
+    a: {$eq: 4}
+};
+
+let explain = coll.explain().find(supportedExpression).finish();
+assert(!usedBonsaiOptimizer(explain), tojson(explain));
+
+explain = coll.explain().aggregate([{$match: supportedExpression}]);
+assert(!usedBonsaiOptimizer(explain), tojson(explain));
+
+// Setting the force CQF flag has no effect.
+assert.commandWorked(
+    db.adminCommand({setParameter: 1, internalQueryForceCommonQueryFramework: true}));
+explain = coll.explain().find(supportedExpression).finish();
+assert(!usedBonsaiOptimizer(explain), tojson(explain));
+
+explain = coll.explain().aggregate([{$match: supportedExpression}]);
+assert(!usedBonsaiOptimizer(explain), tojson(explain));
+
+MongoRunner.stopMongod(conn);
+}());
+\ No newline at end of file
diff --git a/jstests/noPassthrough/disabled_cluster_server_parameters.js b/jstests/noPassthrough/disabled_cluster_server_parameters.js
index e2da17b49c5..5e160a139b1 100644
--- a/jstests/noPassthrough/disabled_cluster_server_parameters.js
+++ b/jstests/noPassthrough/disabled_cluster_server_parameters.js
@@ -3,9 +3,6 @@
  * is false.
  *
  * @tags: [
- *   # Requires all nodes to be running the latest binary.
- *   requires_fcv_60,
- *   featureFlagClusterWideConfig,
  *   does_not_support_stepdowns,
  *   requires_replication,
  *   requires_sharding
diff --git a/jstests/noPassthrough/explain_group_stage_exec_stats.js b/jstests/noPassthrough/explain_group_stage_exec_stats.js
index 2a171485424..a1cef128ace 100644
--- a/jstests/noPassthrough/explain_group_stage_exec_stats.js
+++ b/jstests/noPassthrough/explain_group_stage_exec_stats.js
@@ -13,7 +13,7 @@ const testDB = conn.getDB('test');
 const coll = testDB.explain_group_stage_exec_stats;
 coll.drop();
 
-if (checkSBEEnabled(testDB, ["featureFlagSBEGroupPushdown"])) {
+if (checkSBEEnabled(testDB)) {
     // When the SBE $group pushdown feature is enabled, a $group alone is pushed down and the
     // memory usage tracking isn't on a per-accumulator basis so this test is exercising
     // spilling behavior of the classic DocumentSourceGroup stage.
diff --git a/jstests/noPassthrough/group_tmp_file_cleanup.js b/jstests/noPassthrough/group_tmp_file_cleanup.js
index 42c7b95de88..4482b5d61fe 100644
--- a/jstests/noPassthrough/group_tmp_file_cleanup.js
+++ b/jstests/noPassthrough/group_tmp_file_cleanup.js
@@ -12,6 +12,7 @@ const memoryLimitBytes = memoryLimitMb * 1024 * 1024;
 const conn = MongoRunner.runMongod({
     setParameter: {
         internalDocumentSourceGroupMaxMemoryBytes: memoryLimitBytes,
+        internalQuerySlotBasedExecutionHashAggApproxMemoryUseInBytesBeforeSpill: memoryLimitBytes
     }
 });
 const testDb = conn.getDB(jsTestName());
@@ -23,8 +24,9 @@ for (let i = 0; i < memoryLimitMb + 1; ++i)
     assert.commandWorked(testDb.largeColl.insert({x: i, largeStr: largeStr + i}));
 
 // Inhibit optimization so that $group runs in the classic engine.
-const pipeline =
+let pipeline =
     [{$_internalInhibitOptimization: {}}, {$group: {_id: '$largeStr', minId: {$min: '$_id'}}}];
+
 // Make sure that the pipeline needs to spill to disk.
 assert.throwsWithCode(() => testDb.largeColl.aggregate(pipeline, {allowDiskUse: false}),
                       ErrorCodes.QueryExceededMemoryLimitNoDiskUseAllowed);
@@ -32,5 +34,13 @@ assert.throwsWithCode(() => testDb.largeColl.aggregate(pipeline, {allowDiskUse:
 testDb.largeColl.aggregate(pipeline);
 assert.eq(listFiles(conn.dbpath + "/_tmp").length, 0);
 
+// Run the pipeline without $_internalInhibitOptimization so that $group runs in the sbe engine.
+pipeline = [{$group: {_id: '$largeStr', minId: {$min: '$_id'}}}];
+
+// Make sure that the pipeline needs to spill to disk.
+assert.throwsWithCode(() => testDb.largeColl.aggregate(pipeline, {allowDiskUse: false}),
+                      ErrorCodes.QueryExceededMemoryLimitNoDiskUseAllowed);
+testDb.largeColl.aggregate(pipeline);
+
 MongoRunner.stopMongod(conn);
-})();
-\ No newline at end of file
+})();
diff --git a/jstests/core/list_local_sessions.js b/jstests/noPassthrough/list_local_sessions.js
index bee5c084ca7..60b73f7f6ad 100644
--- a/jstests/core/list_local_sessions.js
+++ b/jstests/noPassthrough/list_local_sessions.js
@@ -8,12 +8,20 @@
 //   # Sessions are asynchronously flushed to disk, so a stepdown immediately after calling
 //   # startSession may cause this test to fail to find the returned sessionId.
 //   does_not_support_stepdowns,
+//   requires_sharding,
 // ]
 
 (function() {
 'use strict';
 
-const admin = db.getSiblingDB('admin');
+const st = new ShardingTest({
+    shards: 1,
+    mongos: 1,
+    other: {mongosOptions: {setParameter: {disableLogicalSessionCacheRefresh: true}}}
+});
+
+const admin = st.s.getDB("admin");
+
 function listLocalSessions() {
     return admin.aggregate([{'$listLocalSessions': {allUsers: false}}]);
 }
@@ -23,7 +31,7 @@ let originalLogLevel = assert.commandWorked(admin.setLogLevel(1)).was.verbosity;
 
 try {
     // Start a new session and capture its sessionId.
-    const myid = assert.commandWorked(db.runCommand({startSession: 1})).id.id;
+    const myid = assert.commandWorked(st.s.adminCommand({startSession: 1})).id.id;
     assert(myid !== undefined);
 
     // Ensure that the cache now contains the session and is visible.
@@ -80,4 +88,6 @@ try {
 } finally {
     admin.setLogLevel(originalLogLevel);
 }
+
+st.stop();
 })();
diff --git a/jstests/noPassthrough/lookup_max_intermediate_size.js b/jstests/noPassthrough/lookup_max_intermediate_size.js
index f2d5704c7da..939bfa93a57 100644
--- a/jstests/noPassthrough/lookup_max_intermediate_size.js
+++ b/jstests/noPassthrough/lookup_max_intermediate_size.js
@@ -93,9 +93,7 @@ const db = standalone.getDB("test");
 
 db.lookUp.drop();
 const expectedErrorCode =
-    (checkSBEEnabled(db, ["featureFlagSBELookupPushdown", "featureFlagSbeFull"]))
-    ? ErrorCodes.ExceededMemoryLimit
-    : 4568;
+    (checkSBEEnabled(db, ["featureFlagSbeFull"])) ? ErrorCodes.ExceededMemoryLimit : 4568;
 runTest(db.lookUp, db.from, expectedErrorCode);
 
 MongoRunner.stopMongod(standalone);
diff --git a/jstests/noPassthrough/lookup_pushdown.js b/jstests/noPassthrough/lookup_pushdown.js
index 9497e9d135c..09ad334a16c 100644
--- a/jstests/noPassthrough/lookup_pushdown.js
+++ b/jstests/noPassthrough/lookup_pushdown.js
@@ -18,10 +18,8 @@ const JoinAlgorithm = {
 };
 
 // Standalone cases.
-const conn = MongoRunner.runMongod({
-    setParameter:
-        {featureFlagSBELookupPushdown: true, featureFlagSbeFull: true, allowDiskUseByDefault: false}
-});
+const conn =
+    MongoRunner.runMongod({setParameter: {featureFlagSbeFull: true, allowDiskUseByDefault: false}});
 assert.neq(null, conn, "mongod was unable to start up");
 const name = "lookup_pushdown";
 const foreignCollName = "foreign_lookup_pushdown";
@@ -118,7 +116,7 @@ function runTest(coll,
 }
 
 let db = conn.getDB(name);
-if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
+if (!checkSBEEnabled(db)) {
     jsTestLog("Skipping test because either the sbe lookup pushdown feature flag is disabled or" +
               " sbe itself is disabled");
     MongoRunner.stopMongod(conn);
@@ -742,8 +740,7 @@ MongoRunner.stopMongod(conn);
 
 // Verify that pipeline stages get pushed down according to the subset of SBE that is enabled.
 (function verifyPushdownLogicSbePartiallyEnabled() {
-    const conn = MongoRunner.runMongod(
-        {setParameter: {featureFlagSBELookupPushdown: true, allowDiskUseByDefault: false}});
+    const conn = MongoRunner.runMongod({setParameter: {allowDiskUseByDefault: false}});
     const db = conn.getDB(name);
     if (checkSBEEnabled(db, ["featureFlagSbeFull"])) {
         jsTestLog("Skipping test case because SBE is fully enabled, but this test case assumes" +
@@ -826,8 +823,7 @@ MongoRunner.stopMongod(conn);
 (function testHashJoinQueryKnobs() {
     // Create a new scope and start a new mongod so that the mongod-wide global state changes do not
     // affect subsequent tests if any.
-    const conn = MongoRunner.runMongod(
-        {setParameter: {featureFlagSBELookupPushdown: true, featureFlagSbeFull: true}});
+    const conn = MongoRunner.runMongod({setParameter: {featureFlagSbeFull: true}});
     const db = conn.getDB(name);
     const lcoll = db.query_knobs_local;
     const fcoll = db.query_knobs_foreign;
@@ -1002,15 +998,7 @@ MongoRunner.stopMongod(conn);
 const st = new ShardingTest({
     shards: 2,
     mongos: 1,
-    other: {
-        shardOptions: {
-            setParameter: {
-                featureFlagSBELookupPushdown: true,
-                featureFlagSbeFull: true,
-                allowDiskUseByDefault: false
-            }
-        }
-    }
+    other: {shardOptions: {setParameter: {featureFlagSbeFull: true, allowDiskUseByDefault: false}}}
 });
 db = st.s.getDB(name);
 
diff --git a/jstests/noPassthrough/lookup_with_limit_sharded.js b/jstests/noPassthrough/lookup_with_limit_sharded.js
index cb4d6953d1e..6846db7f0f8 100644
--- a/jstests/noPassthrough/lookup_with_limit_sharded.js
+++ b/jstests/noPassthrough/lookup_with_limit_sharded.js
@@ -19,7 +19,7 @@ load("jstests/libs/sbe_util.js");      // For checkSBEEnabled.
 const st = new ShardingTest({shards: 2, config: 1});
 const db = st.s.getDB("test");
 
-if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
+if (!checkSBEEnabled(db)) {
     jsTestLog("Skipping test because SBE $lookup is not enabled.");
     st.stop();
     return;
diff --git a/jstests/noPassthrough/pid_testing_log.js b/jstests/noPassthrough/pid_testing_log.js
new file mode 100644
index 00000000000..91ff3d7612e
--- /dev/null
+++ b/jstests/noPassthrough/pid_testing_log.js
@@ -0,0 +1,51 @@
+load("jstests/libs/parallelTester.js");
+
+/**
+ * @tags: [requires_replication, requires_sharding]
+ *
+ * Test that servers set up in testing mode print the
+ * pid when they connect as a client to a mongod.
+ *
+ */
+
+(function() {
+"use strict";
+
+const rsMin = 10;
+const rsMax = 20;
+
+const baselineParameters = {
+    ShardingTaskExecutorPoolMinSize: rsMin,
+    ShardingTaskExecutorPoolMaxSize: rsMax,
+    ShardingTaskExecutorPoolMinSizeForConfigServers: 4,
+    ShardingTaskExecutorPoolMaxSizeForConfigServers: 6,
+};
+
+const mongosParameters = Object.assign(
+    {logComponentVerbosity: tojson({network: {connectionPool: 5}})}, baselineParameters);
+
+const st = new ShardingTest({
+    config: {nodes: 1},
+    shards: 1,
+    rs0: {nodes: 1},
+    mongos: [{setParameter: mongosParameters}],
+});
+const mongos = st.s0;
+
+const populateTestDb = () => {
+    const db = mongos.getDB('test');
+    const coll = db.test;
+    assert.commandWorked(coll.insert({x: 1}));
+};
+
+populateTestDb();
+
+let log = checkLog.getGlobalLog(mongos);
+let hits = log.map(line => JSON.parse(line))
+               .filter(o => o.msg == "client metadata")
+               .filter(o => o.attr.doc.application.pid !== null);
+
+assert(hits.length > 0);
+
+st.stop();
+})();
diff --git a/jstests/noPassthrough/plan_cache_group_lookup.js b/jstests/noPassthrough/plan_cache_group_lookup.js
new file mode 100644
index 00000000000..c23b7cfd566
--- /dev/null
+++ b/jstests/noPassthrough/plan_cache_group_lookup.js
@@ -0,0 +1,236 @@
+/**
+ * Test that plans with $group and $lookup lowered to SBE are cached and invalidated correctly.
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/profiler.js");  // For getLatestProfilerEntry.
+load("jstests/libs/sbe_util.js");  // For checkSBEEnabled.
+
+const conn = MongoRunner.runMongod();
+const db = conn.getDB("test");
+const coll = db.plan_cache_pipeline;
+const foreignColl = db.plan_cache_pipeline_foreign;
+
+if (!checkSBEEnabled(db)) {
+    jsTest.log("Skipping test because SBE is not enabled");
+    MongoRunner.stopMongod(conn);
+    return;
+}
+
+const sbeFullEnabled = checkSBEEnabled(db, ["featureFlagSbeFull"]);
+const sbePlanCacheEnabled = checkSBEEnabled(db, ["featureFlagSbePlanCache"]);
+
+assert.commandWorked(coll.insert({a: 1}));
+assert.commandWorked(coll.createIndex({a: 1, a1: 1}));
+assert.commandWorked(coll.createIndex({a: 1, a2: 1}));
+function setupForeignColl(index) {
+    foreignColl.drop();
+    assert.commandWorked(foreignColl.insert({b: 1}));
+    if (index) {
+        assert.commandWorked(foreignColl.createIndex(index));
+    }
+}
+assert.commandWorked(db.setProfilingLevel(2));
+
+/**
+ * Assert that the last aggregation command has a corresponding plan cache entry with the desired
+ * properties. 'version' is 1 if it's classic cache, 2 if it's SBE cache. 'isActive' is true if the
+ * cache entry is active. 'fromMultiPlanner' is true if the query part of aggregation has been
+ * multi-planned. 'forcesClassicEngine' is true if the query is forced to use classic engine.
+ */
+function assertCacheUsage({version, fromMultiPlanner, isActive, forcesClassicEngine = false}) {
+    const profileObj = getLatestProfilerEntry(
+        db, {op: "command", "command.pipeline": {$exists: true}, ns: coll.getFullName()});
+    assert.eq(fromMultiPlanner, !!profileObj.fromMultiPlanner, profileObj);
+
+    const entries = coll.getPlanCache().list();
+    assert.eq(entries.length, 1, entries);
+    const entry = entries[0];
+    assert.eq(entry.version, version, entry);
+    assert.eq(entry.isActive, isActive, entry);
+    assert.eq(entry.planCacheKey, profileObj.planCacheKey, entry);
+
+    const explain = coll.explain().aggregate(profileObj.command.pipeline);
+    const queryPlanner = explain.hasOwnProperty("queryPlanner")
+        ? explain.queryPlanner
+        : explain.stages[0].$cursor.queryPlanner;
+    if (!forcesClassicEngine) {
+        assert(queryPlanner.winningPlan.hasOwnProperty("slotBasedPlan"), explain);
+    }
+    assert.eq(queryPlanner.planCacheKey, entry.planCacheKey, explain);
+
+    return entry;
+}
+
+/**
+ * Run the pipeline three times, assert that we have the following plan cache entries of "version".
+ *      1. The pipeline runs from the multi-planner, saving an inactive cache entry.
+ *      2. The pipeline runs from the multi-planner, activating the cache entry.
+ *      3. The pipeline runs from cached solution planner, using the active cache entry.
+ */
+function testLoweredPipeline({pipeline, version, forcesClassicEngine = false}) {
+    let results = coll.aggregate(pipeline).toArray();
+    assert.eq(results.length, 1, results);
+    const entry = assertCacheUsage(
+        {version: version, fromMultiPlanner: true, isActive: false, forcesClassicEngine});
+
+    results = coll.aggregate(pipeline).toArray();
+    assert.eq(results.length, 1, results);
+    let nextEntry = assertCacheUsage(
+        {version: version, fromMultiPlanner: true, isActive: true, forcesClassicEngine});
+    assert.eq(entry.planCacheKey, nextEntry.planCacheKey, {entry, nextEntry});
+
+    results = coll.aggregate(pipeline).toArray();
+    assert.eq(results.length, 1, results);
+    nextEntry = assertCacheUsage(
+        {version: version, fromMultiPlanner: false, isActive: true, forcesClassicEngine});
+    assert.eq(entry.planCacheKey, nextEntry.planCacheKey, {entry, nextEntry});
+
+    return nextEntry;
+}
+
+const multiPlanningQueryStage = {
+    $match: {a: 1}
+};
+const lookupStage = {
+    $lookup: {from: foreignColl.getName(), localField: "a", foreignField: "b", as: "matched"}
+};
+const groupStage = {
+    $group: {_id: "$a", out: {"$sum": 1}}
+};
+
+(function testLoweredPipelineCombination() {
+    setupForeignColl();
+
+    coll.getPlanCache().clear();
+    testLoweredPipeline(
+        {pipeline: [multiPlanningQueryStage, lookupStage], version: sbePlanCacheEnabled ? 2 : 1});
+
+    // TODO SERVER-61507: Update tests on $group when it's integrated to the SBE cache.
+    coll.getPlanCache().clear();
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, groupStage], version: 1});
+
+    coll.getPlanCache().clear();
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage, groupStage], version: 1});
+
+    coll.getPlanCache().clear();
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, groupStage, lookupStage], version: 1});
+})();
+
+(function testPartiallyLoweredPipeline() {
+    coll.getPlanCache().clear();
+    setupForeignColl();
+    testLoweredPipeline({
+        pipeline: [multiPlanningQueryStage, lookupStage, {$_internalInhibitOptimization: {}}],
+        version: sbePlanCacheEnabled ? 2 : 1
+    });
+})();
+
+(function testNonExistentForeignCollectionCache() {
+    if (!sbePlanCacheEnabled) {
+        jsTestLog(
+            "Skipping testNonExistentForeignCollectionCache when SBE plan cache is not enabled");
+        return;
+    }
+
+    coll.getPlanCache().clear();
+    foreignColl.drop();
+    const entryWithoutForeignColl =
+        testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+
+    coll.getPlanCache().clear();
+    setupForeignColl();
+    const entryWithForeignColl =
+        testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+
+    assert.neq(entryWithoutForeignColl.planCacheKey,
+               entryWithForeignColl.planCacheKey,
+               {entryWithoutForeignColl, entryWithForeignColl});
+    assert.eq(entryWithoutForeignColl.queryHash,
+              entryWithForeignColl.queryHash,
+              {entryWithoutForeignColl, entryWithForeignColl});
+})();
+
+(function testForeignCollectionDropCacheInvalidation() {
+    if (!sbePlanCacheEnabled) {
+        jsTestLog(
+            "Skipping testForeignCollectionDropCacheInvalidation when SBE plan cache is not enabled");
+        return;
+    }
+
+    coll.getPlanCache().clear();
+    setupForeignColl();
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+
+    foreignColl.drop();
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+})();
+
+(function testForeignIndexDropCacheInvalidation() {
+    if (!sbePlanCacheEnabled) {
+        jsTestLog(
+            "Skipping testForeignIndexDropCacheInvalidation when SBE plan cache is not enabled");
+        return;
+    }
+
+    coll.getPlanCache().clear();
+    setupForeignColl({b: 1} /* index */);
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+
+    assert.commandWorked(foreignColl.dropIndex({b: 1}));
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+})();
+
+(function testForeignIndexBuildCacheInvalidation() {
+    if (!sbePlanCacheEnabled) {
+        jsTestLog(
+            "Skipping testForeignIndexBuildCacheInvalidation when SBE plan cache is not enabled");
+        return;
+    }
+
+    coll.getPlanCache().clear();
+    setupForeignColl({b: 1} /* index */);
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+
+    assert.commandWorked(foreignColl.createIndex({c: 1}));
+    testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+})();
+
+(function testLookupSbeAndClassicPlanCacheKey() {
+    if (!sbeFullEnabled || !sbePlanCacheEnabled) {
+        jsTestLog(
+            "Skipping testLookupWithClassicPlanCache when SBE full or SBE plan cache is not enabled");
+        return;
+    }
+
+    setupForeignColl({b: 1} /* index */);
+
+    // When using SBE engine, the plan cache key of $match vs. $match + $lookup should be different.
+    coll.getPlanCache().clear();
+    let matchEntry = testLoweredPipeline({pipeline: [multiPlanningQueryStage], version: 2});
+
+    coll.getPlanCache().clear();
+    let lookupEntry =
+        testLoweredPipeline({pipeline: [multiPlanningQueryStage, lookupStage], version: 2});
+    assert.neq(matchEntry.planCacheKey, lookupEntry.planCacheKey, {matchEntry, lookupEntry});
+
+    // When using classic engine, the plan cache key of $match vs. $match + $lookup should be the
+    // same.
+    assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryForceClassicEngine: true}));
+
+    coll.getPlanCache().clear();
+    matchEntry = testLoweredPipeline(
+        {pipeline: [multiPlanningQueryStage], version: 1, forcesClassicEngine: true});
+
+    coll.getPlanCache().clear();
+    lookupEntry = testLoweredPipeline(
+        {pipeline: [multiPlanningQueryStage, lookupStage], version: 1, forcesClassicEngine: true});
+    assert.eq(matchEntry.planCacheKey, lookupEntry.planCacheKey, {matchEntry, lookupEntry});
+
+    assert.commandWorked(
+        db.adminCommand({setParameter: 1, internalQueryForceClassicEngine: false}));
+})();
+
+MongoRunner.stopMongod(conn);
+})();
diff --git a/jstests/noPassthrough/plan_cache_replan_group_lookup.js b/jstests/noPassthrough/plan_cache_replan_group_lookup.js
index 2f749227316..8b0dee3cb2c 100644
--- a/jstests/noPassthrough/plan_cache_replan_group_lookup.js
+++ b/jstests/noPassthrough/plan_cache_replan_group_lookup.js
@@ -19,32 +19,37 @@ const coll = db.plan_cache_replan_group_lookup;
 const foreignCollName = "foreign";
 coll.drop();
 
+const sbePlanCacheEnabled = checkSBEEnabled(db, ["featureFlagSbePlanCache"]);
+const sbeFullEnabled = checkSBEEnabled(db, ["featureFlagSbeFull"]);
+
 function getPlansForCacheEntry(match) {
     const matchingCacheEntries = coll.getPlanCache().list([{$match: match}]);
     assert.eq(matchingCacheEntries.length, 1, coll.getPlanCache().list());
     return matchingCacheEntries[0];
 }
 
-function planHasIxScanStageForKey(planStats, keyPattern) {
+function planHasIxScanStageForIndex(planStats, indexName) {
     const stage = getPlanStage(planStats, "IXSCAN");
     if (stage === null) {
         return false;
     }
 
-    return bsonWoCompare(keyPattern, stage.keyPattern) === 0;
+    return indexName === stage.indexName;
 }
 
-function assertCacheUsage(
-    multiPlanning, cacheEntryIsActive, cachedIndex, pipeline, aggOptions = {}) {
+function assertCacheUsage(multiPlanning,
+                          cacheEntryVersion,
+                          cacheEntryIsActive,
+                          cachedIndexName,
+                          pipeline,
+                          aggOptions = {}) {
     const profileObj = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()});
     const queryHash = profileObj.queryHash;
     const planCacheKey = profileObj.planCacheKey;
     assert.eq(multiPlanning, !!profileObj.fromMultiPlanner);
 
     const entry = getPlansForCacheEntry({queryHash: queryHash});
-    // TODO(SERVER-61507): Convert the assertion to SBE cache once lowered $lookup integrates
-    // with SBE plan cache.
-    assert.eq(entry.version, 1);
+    assert.eq(cacheEntryVersion, entry.version);
     assert.eq(cacheEntryIsActive, entry.isActive);
 
     // If the entry is active, we should have a plan cache key.
@@ -59,7 +64,11 @@ function assertCacheUsage(
             : explain.stages[0].$cursor.queryPlanner.planCacheKey;
         assert.eq(explainKey, entry.planCacheKey);
     }
-    assert.eq(planHasIxScanStageForKey(getCachedPlan(entry.cachedPlan), cachedIndex), true, entry);
+    if (cacheEntryVersion === 2) {
+        assert(entry.cachedPlan.stages.includes(cachedIndexName), entry);
+    } else {
+        assert(planHasIxScanStageForIndex(getCachedPlan(entry.cachedPlan), cachedIndexName), entry);
+    }
 }
 
 assert.commandWorked(db.setProfilingLevel(2));
@@ -79,22 +88,35 @@ for (let i = 1000; i < 1100; i++) {
 assert.commandWorked(coll.createIndex({a: 1}));
 assert.commandWorked(coll.createIndex({b: 1}));
 
-function setUpActiveCacheEntry(pipeline, cachedIndex) {
+function setUpActiveCacheEntry(pipeline, cacheEntryVersion, cachedIndexName) {
     // For the first run, the query should go through multiplanning and create inactive cache entry.
     assert.eq(2, coll.aggregate(pipeline).toArray()[0].n);
-    assertCacheUsage(true /*multiPlanning*/, false /*cacheEntryIsActive*/, cachedIndex, pipeline);
+    assertCacheUsage(true /*multiPlanning*/,
+                     cacheEntryVersion,
+                     false /*cacheEntryIsActive*/,
+                     cachedIndexName,
+                     pipeline);
 
     // After the second run, the inactive cache entry should be promoted to an active entry.
     assert.eq(2, coll.aggregate(pipeline).toArray()[0].n);
-    assertCacheUsage(true /*multiPlanning*/, true /*cacheEntryIsActive*/, cachedIndex, pipeline);
+    assertCacheUsage(true /*multiPlanning*/,
+                     cacheEntryVersion,
+                     true /*cacheEntryIsActive*/,
+                     cachedIndexName,
+                     pipeline);
 
     // For the third run, the active cached query should be used.
     assert.eq(2, coll.aggregate(pipeline).toArray()[0].n);
-    assertCacheUsage(false /*multiPlanning*/, true /*cacheEntryIsActive*/, cachedIndex, pipeline);
+    assertCacheUsage(false /*multiPlanning*/,
+                     cacheEntryVersion,
+                     true /*cacheEntryIsActive*/,
+                     cachedIndexName,
+                     pipeline);
 }
 
 function testFn(aIndexPipeline,
                 bIndexPipeline,
+                cacheEntryVersion,
                 setUpFn = undefined,
                 tearDownFn = undefined,
                 explainFn = undefined) {
@@ -107,7 +129,7 @@ function testFn(aIndexPipeline,
         explainFn(bIndexPipeline);
     }
 
-    setUpActiveCacheEntry(aIndexPipeline, {a: 1} /* cachedIndex */);
+    setUpActiveCacheEntry(aIndexPipeline, cacheEntryVersion, "a_1" /* cachedIndexName */);
 
     // Now run the other pipeline, which has the same query shape but is faster with a different
     // index. It should trigger re-planning of the query.
@@ -115,15 +137,17 @@ function testFn(aIndexPipeline,
 
     // The other pipeline again, The cache should be used now.
     assertCacheUsage(true /*multiPlanning*/,
+                     cacheEntryVersion,
                      true /*cacheEntryIsActive*/,
-                     {b: 1} /*cachedIndex*/,
+                     "b_1" /*cachedIndexName*/,
                      bIndexPipeline);
 
     // Run it once again so that the cache entry is reused.
     assert.eq(3, coll.aggregate(bIndexPipeline).toArray()[0].n);
     assertCacheUsage(false /*multiPlanning*/,
+                     cacheEntryVersion,
                      true /*cacheEntryIsActive*/,
-                     {b: 1} /*cachedIndex*/,
+                     "b_1" /*cachedIndexName*/,
                      bIndexPipeline);
 
     if (tearDownFn) {
@@ -144,7 +168,9 @@ const bIndexPredicate = [{$match: {a: 1, b: 1042}}];
 
 // $group tests.
 const groupSuffix = [{$group: {_id: "$c"}}, {$count: "n"}];
-testFn(aIndexPredicate.concat(groupSuffix), bIndexPredicate.concat(groupSuffix));
+testFn(aIndexPredicate.concat(groupSuffix),
+       bIndexPredicate.concat(groupSuffix),
+       1 /* cacheEntryVersion */);
 
 // $lookup tests.
 const lookupStage =
@@ -163,9 +189,8 @@ function dropLookupForeignColl() {
     assert(db[foreignCollName].drop());
 }
 
-const lookupPushdownEnabled = checkSBEEnabled(db, ["featureFlagSBELookupPushdown"]);
-const lookupPushdownNLJEnabled =
-    checkSBEEnabled(db, ["featureFlagSBELookupPushdown", "featureFlagSbeFull"]);
+const lookupPushdownEnabled = checkSBEEnabled(db);
+const lookupPushdownNLJEnabled = checkSBEEnabled(db, ["featureFlagSbeFull"]);
 function verifyCorrectLookupAlgorithmUsed(targetJoinAlgorithm, pipeline, aggOptions = {}) {
     if (!lookupPushdownEnabled) {
         return;
@@ -189,9 +214,13 @@ function verifyCorrectLookupAlgorithmUsed(targetJoinAlgorithm, pipeline, aggOpti
     }
 }
 
+// TODO SERVER-61507: The following test cases are $lookup followed by $group. Update them when
+// $group is integrated with SBE plan cache.
+//
 // NLJ.
 testFn(aLookup,
        bLookup,
+       1 /* cacheEntryVersion */,
        createLookupForeignColl,
        dropLookupForeignColl,
        (pipeline) =>
@@ -200,6 +229,7 @@ testFn(aLookup,
 // INLJ.
 testFn(aLookup,
        bLookup,
+       1 /* cacheEntryVersion */,
        () => {
            createLookupForeignColl();
            assert.commandWorked(db[foreignCollName].createIndex({foreignKey: 1}));
@@ -209,7 +239,7 @@ testFn(aLookup,
            verifyCorrectLookupAlgorithmUsed("IndexedLoopJoin", pipeline, {allowDiskUse: false}));
 
 // HJ.
-testFn(aLookup, bLookup, () => {
+testFn(aLookup, bLookup, 1 /* cacheEntryVersion */, () => {
     createLookupForeignColl();
 }, dropLookupForeignColl, (pipeline) => verifyCorrectLookupAlgorithmUsed("HashJoin", pipeline, {
                               allowDiskUse: true
@@ -222,29 +252,38 @@ testFn(aLookup, bLookup, () => {
 createLookupForeignColl();
 assert.commandWorked(db[foreignCollName].createIndex({foreignKey: 1}));
 verifyCorrectLookupAlgorithmUsed("IndexedLoopJoin", aLookup, {allowDiskUse: true});
-setUpActiveCacheEntry(aLookup, {a: 1} /* cachedIndex */);
+setUpActiveCacheEntry(aLookup, 1 /* cacheEntryVersion */, "a_1" /* cachedIndexName */);
 
 // Drop the index. This should result in using the active plan, but switching to HJ.
 assert.commandWorked(db[foreignCollName].dropIndex({foreignKey: 1}));
 verifyCorrectLookupAlgorithmUsed("HashJoin", aLookup, {allowDiskUse: true});
 assert.eq(2, coll.aggregate(aLookup).toArray()[0].n);
-assertCacheUsage(
-    false /*multiPlanning*/, true /*cacheEntryIsActive*/, {a: 1} /*cachedIndex*/, aLookup);
+assertCacheUsage(false /*multiPlanning*/,
+                 1 /* cacheEntryVersion */,
+                 true /*cacheEntryIsActive*/,
+                 "a_1" /*cachedIndexName*/,
+                 aLookup);
 
 // Set 'allowDiskUse' to 'false'. This should still result in using the active plan, but switching
 // to NLJ.
 verifyCorrectLookupAlgorithmUsed("NestedLoopJoin", aLookup, {allowDiskUse: false});
 assert.eq(2, coll.aggregate(aLookup).toArray()[0].n);
-assertCacheUsage(
-    false /*multiPlanning*/, true /*cacheEntryIsActive*/, {a: 1} /*cachedIndex*/, aLookup);
+assertCacheUsage(false /*multiPlanning*/,
+                 1 /* cacheEntryVersion */,
+                 true /*cacheEntryIsActive*/,
+                 "a_1" /*cachedIndexName*/,
+                 aLookup);
 
 // Drop the foreign collection. This should still result in using the active plan with a special
 // empty collection plan.
 dropLookupForeignColl();
 verifyCorrectLookupAlgorithmUsed("NonExistentForeignCollection", aLookup, {allowDiskUse: true});
 assert.eq(2, coll.aggregate(aLookup).toArray()[0].n);
-assertCacheUsage(
-    false /*multiPlanning*/, true /*cacheEntryIsActive*/, {a: 1} /*cachedIndex*/, aLookup);
+assertCacheUsage(false /*multiPlanning*/,
+                 1 /* cacheEntryVersion */,
+                 true /*cacheEntryIsActive*/,
+                 "a_1" /*cachedIndexName*/,
+                 aLookup);
 
 // Verify that changing the plan for the right side does not trigger a replan.
 const foreignColl = db[foreignCollName];
@@ -281,15 +320,17 @@ verifyCorrectLookupAlgorithmUsed(
 
 runLookupQuery({allowDiskUse: false});
 assertCacheUsage(true /*multiPlanning*/,
+                 sbePlanCacheEnabled ? 2 : 1 /* cacheEntryVersion */,
                  false /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline,
                  {allowDiskUse: false});
 
 runLookupQuery({allowDiskUse: false});
 assertCacheUsage(true /*multiPlanning*/,
+                 sbePlanCacheEnabled ? 2 : 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline,
                  {allowDiskUse: false});
 
@@ -301,17 +342,39 @@ assert.commandWorked(foreignColl.dropIndex({c: 1}));
 verifyCorrectLookupAlgorithmUsed(
     "NestedLoopJoin", avoidReplanLookupPipeline, {allowDiskUse: false});
 
+// If SBE plan cache is enabled, after dropping index, the $lookup plan cache will be invalidated.
+// We will need to rerun the multi-planner.
+if (sbePlanCacheEnabled) {
+    runLookupQuery({allowDiskUse: false});
+    assertCacheUsage(true /*multiPlanning*/,
+                     sbeFullEnabled ? 2 : 1 /* cacheEntryVersion */,
+                     false /*activeCacheEntry*/,
+                     "b_1" /*cachedIndexName*/,
+                     avoidReplanLookupPipeline,
+                     {allowDiskUse: false});
+
+    runLookupQuery({allowDiskUse: false});
+    assertCacheUsage(true /*multiPlanning*/,
+                     sbeFullEnabled ? 2 : 1 /* cacheEntryVersion */,
+                     true /*activeCacheEntry*/,
+                     "b_1" /*cachedIndexName*/,
+                     avoidReplanLookupPipeline,
+                     {allowDiskUse: false});
+}
+
 runLookupQuery({allowDiskUse: false});
 assertCacheUsage(false /*multiPlanning*/,
+                 sbePlanCacheEnabled && sbeFullEnabled ? 2 : 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline,
                  {allowDiskUse: false});
 
 runLookupQuery({allowDiskUse: false});
 assertCacheUsage(false /*multiPlanning*/,
+                 sbePlanCacheEnabled && sbeFullEnabled ? 2 : 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline,
                  {allowDiskUse: false});
 
@@ -319,16 +382,38 @@ assertCacheUsage(false /*multiPlanning*/,
 // replanning the cached query.
 verifyCorrectLookupAlgorithmUsed("HashJoin", avoidReplanLookupPipeline, {allowDiskUse: true});
 
+// If SBE plan cache is enabled, using different 'allowDiskUse' option will result in
+// different plan cache key.
+if (sbePlanCacheEnabled) {
+    runLookupQuery({allowDiskUse: true});
+    assertCacheUsage(true /*multiPlanning*/,
+                     2 /* cacheEntryVersion */,
+                     false /*activeCacheEntry*/,
+                     "b_1" /*cachedIndexName*/,
+                     avoidReplanLookupPipeline,
+                     {allowDiskUse: true});
+
+    runLookupQuery({allowDiskUse: true});
+    assertCacheUsage(true /*multiPlanning*/,
+                     2 /* cacheEntryVersion */,
+                     true /*activeCacheEntry*/,
+                     "b_1" /*cachedIndexName*/,
+                     avoidReplanLookupPipeline,
+                     {allowDiskUse: true});
+}
+
 runLookupQuery({allowDiskUse: true});
 assertCacheUsage(false /*multiPlanning*/,
+                 sbePlanCacheEnabled ? 2 : 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline,
                  {allowDiskUse: true});
 runLookupQuery({allowDiskUse: true});
 assertCacheUsage(false /*multiPlanning*/,
+                 sbePlanCacheEnabled ? 2 : 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline,
                  {allowDiskUse: true});
 
@@ -352,23 +437,27 @@ verifyCorrectLookupAlgorithmUsed("IndexedLoopJoin", avoidReplanLookupPipeline);
 // Set up an active cache entry.
 runLookupQuery();
 assertCacheUsage(true /*multiPlanning*/,
+                 sbePlanCacheEnabled ? 2 : 1 /* cacheEntryVersion */,
                  false /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline);
 runLookupQuery();
 assertCacheUsage(true /*multiPlanning*/,
+                 sbePlanCacheEnabled ? 2 : 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline);
 runLookupQuery();
 assertCacheUsage(false /*multiPlanning*/,
+                 sbePlanCacheEnabled ? 2 : 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline);
 runLookupQuery();
 assertCacheUsage(false /*multiPlanning*/,
+                 sbePlanCacheEnabled ? 2 : 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanLookupPipeline);
 
 // Disable $lookup pushdown. This should not invalidate the cache entry, but it should prevent
@@ -381,7 +470,7 @@ let explain = coll.explain().aggregate(avoidReplanLookupPipeline);
 const eqLookupNodes = getAggPlanStages(explain, "EQ_LOOKUP");
 assert.eq(eqLookupNodes.length, 0, "expected no EQ_LOOKUP nodes; got " + tojson(explain));
 
-if (checkSBEEnabled(db, ["featureFlagSbePlanCache"])) {
+if (sbePlanCacheEnabled) {
     runLookupQuery();
     const profileObj = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()});
     const matchingCacheEntries =
@@ -391,13 +480,15 @@ if (checkSBEEnabled(db, ["featureFlagSbePlanCache"])) {
     // When the SBE plan cache is disabled, we will be able to reuse the same cache entry.
     runLookupQuery();
     assertCacheUsage(false /*multiPlanning*/,
+                     1 /* cacheEntryVersion */,
                      true /*activeCacheEntry*/,
-                     {b: 1} /*cachedIndex*/,
+                     "b_1" /*cachedIndexName*/,
                      avoidReplanLookupPipeline);
     runLookupQuery();
     assertCacheUsage(false /*multiPlanning*/,
+                     1 /* cacheEntryVersion */,
                      true /*activeCacheEntry*/,
-                     {b: 1} /*cachedIndex*/,
+                     "b_1" /*cachedIndexName*/,
                      avoidReplanLookupPipeline);
 }
 
@@ -407,7 +498,7 @@ coll.getPlanCache().clear();
 
 // Verify that $group gets pushed down, provided that SBE is enabled.
 let groupNodes;
-if (checkSBEEnabled(db, ["featureFlagSBEGroupPushdown"])) {
+if (checkSBEEnabled(db)) {
     explain = coll.explain().aggregate(avoidReplanGroupPipeline);
     let groupNodes = getAggPlanStages(explain, "GROUP");
     assert.eq(groupNodes.length, 1);
@@ -416,23 +507,27 @@ if (checkSBEEnabled(db, ["featureFlagSBEGroupPushdown"])) {
 // Set up an active cache entry.
 runGroupQuery();
 assertCacheUsage(true /*multiPlanning*/,
+                 1 /* cacheEntryVersion */,
                  false /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanGroupPipeline);
 runGroupQuery();
 assertCacheUsage(true /*multiPlanning*/,
+                 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanGroupPipeline);
 runGroupQuery();
 assertCacheUsage(false /*multiPlanning*/,
+                 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanGroupPipeline);
 runGroupQuery();
 assertCacheUsage(false /*multiPlanning*/,
+                 1 /* cacheEntryVersion */,
                  true /*activeCacheEntry*/,
-                 {b: 1} /*cachedIndex*/,
+                 "b_1" /*cachedIndexName*/,
                  avoidReplanGroupPipeline);
 
 // Disable $group pushdown. This should not invalidate the cache entry, but it should prevent $group
@@ -444,7 +539,7 @@ explain = coll.explain().aggregate(avoidReplanLookupPipeline);
 groupNodes = getAggPlanStages(explain, "GROUP");
 assert.eq(groupNodes.length, 0);
 
-if (checkSBEEnabled(db, ["featureFlagSbePlanCache"])) {
+if (sbePlanCacheEnabled) {
     runGroupQuery();
     const profileObj = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()});
     const matchingCacheEntries =
@@ -454,13 +549,15 @@ if (checkSBEEnabled(db, ["featureFlagSbePlanCache"])) {
     // When the SBE plan cache is disabled, we will be able to reuse the same cache entry.
     runGroupQuery();
     assertCacheUsage(false /*multiPlanning*/,
+                     1 /* cacheEntryVersion */,
                      true /*activeCacheEntry*/,
-                     {b: 1} /*cachedIndex*/,
+                     "b_1" /*cachedIndexName*/,
                      avoidReplanGroupPipeline);
     runGroupQuery();
     assertCacheUsage(false /*multiPlanning*/,
+                     1 /* cacheEntryVersion */,
                      true /*activeCacheEntry*/,
-                     {b: 1} /*cachedIndex*/,
+                     "b_1" /*cachedIndexName*/,
                      avoidReplanGroupPipeline);
 }
 
diff --git a/jstests/noPassthrough/profile_operation_metrics.js b/jstests/noPassthrough/profile_operation_metrics.js
index 5cf86e36029..3dd372cad65 100644
--- a/jstests/noPassthrough/profile_operation_metrics.js
+++ b/jstests/noPassthrough/profile_operation_metrics.js
@@ -10,7 +10,8 @@
 (function() {
 "use strict";
 
-load("jstests/libs/fixture_helpers.js");  // For isReplSet().
+load("jstests/core/timeseries/libs/timeseries.js");  // For 'TimeseriesTest'.
+load("jstests/libs/fixture_helpers.js");             // For isReplSet().
 
 const dbName = jsTestName();
 const collName = 'coll';
@@ -23,10 +24,7 @@ const isGroupPushdownEnabled = (db) => {
     const internalQueryForceClassicEngine =
         assert.commandWorked(db.adminCommand({getParameter: 1, internalQueryForceClassicEngine: 1}))
             .internalQueryForceClassicEngine;
-    const featureFlagSBEGroupPushdown =
-        assert.commandWorked(db.adminCommand({getParameter: 1, featureFlagSBEGroupPushdown: 1}))
-            .featureFlagSBEGroupPushdown.value;
-    return !internalQueryForceClassicEngine && featureFlagSBEGroupPushdown;
+    return !internalQueryForceClassicEngine;
 };
 
 const assertMetricsExist = (profilerEntry) => {
@@ -1308,7 +1306,11 @@ const operations = [
             assert.eq(profileDoc.docUnitsRead, 0);
             assert.eq(profileDoc.idxEntryBytesRead, 0);
             assert.eq(profileDoc.idxEntryUnitsRead, 0);
-            assert.eq(profileDoc.docBytesWritten, 207);
+            if (TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db)) {
+                assert.eq(profileDoc.docBytesWritten, 216);
+            } else {
+                assert.eq(profileDoc.docBytesWritten, 207);
+            }
             assert.eq(profileDoc.docUnitsWritten, 2);
             assert.eq(profileDoc.idxEntryBytesWritten, 0);
             assert.eq(profileDoc.idxEntryUnitsWritten, 0);
@@ -1330,7 +1332,11 @@ const operations = [
             assert.eq(profileDoc.docUnitsRead, 0);
             assert.eq(profileDoc.idxEntryBytesRead, 0);
             assert.eq(profileDoc.idxEntryUnitsRead, 0);
-            assert.eq(profileDoc.docBytesWritten, 207);
+            if (TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db)) {
+                assert.eq(profileDoc.docBytesWritten, 216);
+            } else {
+                assert.eq(profileDoc.docBytesWritten, 207);
+            }
             assert.eq(profileDoc.docUnitsWritten, 2);
             assert.eq(profileDoc.idxEntryBytesWritten, 0);
             assert.eq(profileDoc.idxEntryUnitsWritten, 0);
@@ -1348,11 +1354,16 @@ const operations = [
         },
         profileFilter: {op: 'insert', 'command.insert': 'ts', 'command.ordered': true},
         profileAssert: (db, profileDoc) => {
-            assert.eq(profileDoc.docBytesRead, 207);
+            if (TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db)) {
+                assert.eq(profileDoc.docBytesRead, 216);
+                assert.eq(profileDoc.docBytesWritten, 242);
+            } else {
+                assert.eq(profileDoc.docBytesRead, 207);
+                assert.eq(profileDoc.docBytesWritten, 233);
+            }
             assert.eq(profileDoc.docUnitsRead, 2);
             assert.eq(profileDoc.idxEntryBytesRead, 0);
             assert.eq(profileDoc.idxEntryUnitsRead, 0);
-            assert.eq(profileDoc.docBytesWritten, 233);
             assert.eq(profileDoc.docUnitsWritten, 2);
             assert.eq(profileDoc.idxEntryBytesWritten, 0);
             assert.eq(profileDoc.idxEntryUnitsWritten, 0);
@@ -1370,11 +1381,16 @@ const operations = [
         },
         profileFilter: {op: 'insert', 'command.insert': 'ts', 'command.ordered': false},
         profileAssert: (db, profileDoc) => {
-            assert.eq(profileDoc.docBytesRead, 207);
+            if (TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db)) {
+                assert.eq(profileDoc.docBytesRead, 216);
+                assert.eq(profileDoc.docBytesWritten, 242);
+            } else {
+                assert.eq(profileDoc.docBytesRead, 207);
+                assert.eq(profileDoc.docBytesWritten, 233);
+            }
             assert.eq(profileDoc.docUnitsRead, 2);
             assert.eq(profileDoc.idxEntryBytesRead, 0);
             assert.eq(profileDoc.idxEntryUnitsRead, 0);
-            assert.eq(profileDoc.docBytesWritten, 233);
             assert.eq(profileDoc.docUnitsWritten, 2);
             assert.eq(profileDoc.idxEntryBytesWritten, 0);
             assert.eq(profileDoc.idxEntryUnitsWritten, 0);
@@ -1390,7 +1406,11 @@ const operations = [
         },
         profileFilter: {op: 'query', 'command.find': 'ts'},
         profileAssert: (db, profileDoc) => {
-            assert.eq(profileDoc.docBytesRead, 466);
+            if (TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db)) {
+                assert.eq(profileDoc.docBytesRead, 484);
+            } else {
+                assert.eq(profileDoc.docBytesRead, 466);
+            }
             assert.eq(profileDoc.docUnitsRead, 4);
             assert.eq(profileDoc.idxEntryBytesRead, 0);
             assert.eq(profileDoc.idxEntryUnitsRead, 0);
diff --git a/jstests/noPassthrough/read_write_concern_defaults_metrics.js b/jstests/noPassthrough/read_write_concern_defaults_metrics.js
index 6c2b8300bc1..699937687fa 100644
--- a/jstests/noPassthrough/read_write_concern_defaults_metrics.js
+++ b/jstests/noPassthrough/read_write_concern_defaults_metrics.js
@@ -128,7 +128,16 @@ function testFTDC(conn, ftdcDirPath, expectNothingOnRotation = false) {
     // Read from the first non-interim file.
     const firstFullFile =
         ftdcFiles.filter(fileDesc => fileDesc.baseName.indexOf("interim") == -1)[0];
-    const ftdcData = _readDumpFile(firstFullFile.name);
+    var ftdcData;
+    assert.soon(() => {
+        try {
+            ftdcData = _readDumpFile(firstFullFile.name);
+            return true;
+        } catch (error) {
+            jsTestLog(`Caught unexpected error: ${tojson(error)}`);
+            return false;
+        }
+    });
     assert.hasFields(ftdcData[0], ["doc"], tojson(ftdcData));
 
     // Look for the defaults in the first metadata object.
diff --git a/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js b/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js
index 45a139d3b75..8865544471c 100644
--- a/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js
+++ b/jstests/noPassthrough/serverStatus_does_not_block_on_RSTL.js
@@ -18,7 +18,8 @@ load("jstests/libs/parallel_shell_helpers.js");  // startParallelShell
 load("jstests/libs/wait_for_command.js");        // waitForCommand
 
 // Use a sharding environment in order to exercise the sharding specific serverStatus sections.
-const st = new ShardingTest({mongos: 1, config: 1, shards: 1, rs: {nodes: 1}});
+const st = new ShardingTest(
+    {mongos: 1, config: 1, shards: 1, rs: {nodes: 1, setParameter: {watchdogPeriodSeconds: 60}}});
 const testDB = st.rs0.getPrimary().getDB("test");
 
 jsTestLog("Starting the sleep command in a parallel thread to take the RSTL MODE_X lock");
diff --git a/jstests/noPassthrough/set_cluster_parameter_fcv.js b/jstests/noPassthrough/set_cluster_parameter_fcv.js
deleted file mode 100644
index 3b9075e1d53..00000000000
--- a/jstests/noPassthrough/set_cluster_parameter_fcv.js
+++ /dev/null
@@ -1,109 +0,0 @@
-// Test setClusterParameter command against FCV.
-//
-// @tags: [
-//   creates_and_authenticates_user,
-//   requires_auth,
-//   requires_fcv_60,
-//   requires_non_retryable_commands,
-//   requires_persistence,
-//   requires_replication,
-//   disabled_for_fcv_6_1_upgrade,
-// ]
-
-load("jstests/noPassthrough/libs/user_write_blocking.js");
-
-(function() {
-'use strict';
-
-const {
-    WriteBlockState,
-    ShardingFixture,
-    ReplicaFixture,
-    bypassUser,
-    noBypassUser,
-    password,
-    keyfile
-} = UserWriteBlockHelpers;
-
-function mapToClusterParamsColl(db) {
-    return db.getSiblingDB('config').clusterParameters;
-}
-
-function runTest(fixture) {
-    // When the cluster is started at FCV 6.0, it is possible to run setClusterParameter.
-    fixture.asAdmin(({admin}) => assert.commandWorked(admin.runCommand(
-                        {setClusterParameter: {testIntClusterParameter: {intData: 102}}})));
-
-    // Check that the config.clusterParameters collection has been created with a document for the
-    // parameter.
-    fixture.asAdmin(
-        ({db}) => assert.eq(1, mapToClusterParamsColl(db).count({_id: "testIntClusterParameter"})));
-
-    // When the cluster is at FCV 6.0 without an ongoing setClusterParameter operation in progress,
-    // it should be possible to downgrade the cluster.
-    fixture.asAdmin(({admin}) => assert.commandWorked(
-                        admin.runCommand({setFeatureCompatibilityVersion: "5.0"})));
-
-    // After downgrade, config.clusterParameters should not exist.
-    fixture.asAdmin(({db}) => assert.isnull(mapToClusterParamsColl(db).exists()));
-    fixture.asAdmin(
-        ({db}) => assert.eq(
-            0, mapToClusterParamsColl(db).count({_id: "testIntClusterParameter", intData: 102})));
-
-    // While the cluster is downgraded, it should not be possible to run setClusterParameter.
-    fixture.asAdmin(({admin}) => assert.commandFailed(admin.runCommand(
-                        {setClusterParameter: {testIntClusterParameter: {intData: 102}}})));
-
-    // Upgrading the cluster back to 6.0 should permit setClusterParameter to work again.
-    fixture.asAdmin(({admin}) => assert.commandWorked(
-                        admin.runCommand({setFeatureCompatibilityVersion: "6.0"})));
-    fixture.asAdmin(({admin}) => assert.commandWorked(admin.runCommand(
-                        {setClusterParameter: {testIntClusterParameter: {intData: 103}}})));
-
-    // Set a failpoint to make setClusterParameter hang on a sharded cluster. FCV downgrade should
-    // fail while setClusterParameter is in progress.
-    if (fixture.hangTransition) {
-        let hangWaiter =
-            fixture.hangTransition({setClusterParameter: {testIntClusterParameter: {intData: 105}}},
-                                   'hangInShardsvrSetClusterParameter');
-
-        fixture.asAdmin(({admin}) => assert.commandFailedWithCode(
-                            admin.runCommand({setFeatureCompatibilityVersion: "5.0"}),
-                            ErrorCodes.CannotDowngrade));
-
-        // Restart the config server primary and verify that FCV downgrade still fails.
-        fixture.restartConfigPrimary();
-        fixture.asAdmin(({admin}) => assert.commandFailedWithCode(
-                            admin.runCommand({setFeatureCompatibilityVersion: "5.0"}),
-                            ErrorCodes.CannotDowngrade));
-
-        // Turn off the failpoint and wait for the hung setClusterParameter operation to drain.
-        hangWaiter.failpoint.off();
-        hangWaiter.waiter();
-
-        // Verify that the updated value was successfully updated and is visible despite the restart
-        // and failed FCV downgrade attempts.
-        fixture.asAdmin(({admin}) => assert.eq(
-                            105,
-                            admin.runCommand({getClusterParameter: "testIntClusterParameter"})
-                                .clusterParameters[0]
-                                .intData));
-
-        // Verify that FCV downgrade succeeds after the setClusterParameter operation has drained.
-        fixture.asAdmin(({admin}) => assert.commandWorked(
-                            admin.runCommand({setFeatureCompatibilityVersion: "5.0"})));
-    }
-}
-
-{
-    const rst = new ReplicaFixture();
-    runTest(rst);
-    rst.stop();
-}
-
-{
-    const st = new ShardingFixture();
-    runTest(st);
-    st.stop();
-}
-}());
diff --git a/jstests/noPassthrough/spill_to_disk_secondary_read.js b/jstests/noPassthrough/spill_to_disk_secondary_read.js
index f4c2ef20104..242620b987b 100644
--- a/jstests/noPassthrough/spill_to_disk_secondary_read.js
+++ b/jstests/noPassthrough/spill_to_disk_secondary_read.js
@@ -35,7 +35,7 @@ const readColl = secondary.getDB("test").foo;
  * Test spilling of $group, when explicitly run on a secondary.
  */
 (function testGroupSpilling() {
-    if (!checkSBEEnabled(secondary.getDB("test"), ["featureFlagSBEGroupPushdown"])) {
+    if (!checkSBEEnabled(secondary.getDB("test"))) {
         jsTestLog("Skipping test for HashAgg stage: $group lowering into SBE isn't enabled");
         return;
     }
@@ -100,7 +100,7 @@ const readColl = secondary.getDB("test").foo;
  * Test spilling of $lookup when explicitly run on a secondary.
  */
 (function testLookupSpillingInSbe() {
-    if (!checkSBEEnabled(secondary.getDB("test"), ["featureFlagSBELookupPushdown"])) {
+    if (!checkSBEEnabled(secondary.getDB("test"))) {
         jsTestLog("Skipping test for HashLookup stage: $lookup lowering into SBE isn't enabled");
         return;
     }
diff --git a/jstests/core/timeseries/timeseries_bucket_limit_size.js b/jstests/noPassthrough/timeseries_bucket_limit_size.js
index 830cf9014d5..1553de495d8 100644
--- a/jstests/core/timeseries/timeseries_bucket_limit_size.js
+++ b/jstests/noPassthrough/timeseries_bucket_limit_size.js
@@ -3,8 +3,8 @@
  * @tags: [
  *   does_not_support_stepdowns,
  *   does_not_support_transactions,
- *   requires_getmore,
- *   requires_fcv_52,
+ *   tenant_migration_incompatible,
+ *   requires_fcv_61,
  * ]
  */
 (function() {
@@ -12,9 +12,16 @@
 
 load("jstests/core/timeseries/libs/timeseries.js");  // For 'TimeseriesTest'.
 
+const conn = MongoRunner.runMongod({setParameter: {timeseriesBucketMinCount: 1}});
+
+const dbName = jsTestName();
+const db = conn.getDB(dbName);
+
 TimeseriesTest.run((insert) => {
     const isTimeseriesBucketCompressionEnabled =
         TimeseriesTest.timeseriesBucketCompressionEnabled(db);
+    const areTimeseriesScalabilityImprovementsEnabled =
+        TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db);
 
     const collNamePrefix = 'timeseries_bucket_limit_size_';
 
@@ -25,9 +32,9 @@ TimeseriesTest.run((insert) => {
     const numDocs = 3;
 
     // The measurement data should not take up all of the 'bucketMaxSizeKB' limit because we need to
-    // leave a little room for the _id and the time fields. We need to fit two measurements within
-    // this limit to trigger compression if enabled.
-    const largeValue = 'x'.repeat(((bucketMaxSizeKB - 1) / 2) * 1024);
+    // leave room for the control.min and control.max summaries (two measurements worth of data). We
+    // need to fit two measurements within this limit to trigger compression if enabled.
+    const largeValue = 'x'.repeat(((bucketMaxSizeKB - 1) / 4) * 1024);
 
     const runTest = function(numDocsPerInsert) {
         const coll = db.getCollection(collNamePrefix + numDocsPerInsert);
@@ -80,6 +87,15 @@ TimeseriesTest.run((insert) => {
                   bucketDocs[0].control.version,
                   'unexpected control.version in first bucket: ' + tojson(bucketDocs));
 
+        if (areTimeseriesScalabilityImprovementsEnabled) {
+            assert.eq(true,
+                      bucketDocs[0].control.closed,
+                      'unexpected control.closed in first bucket: ' + tojson(bucketDocs));
+        } else {
+            assert(!bucketDocs[0].control.hasOwnProperty("closed"),
+                   'unexpected control.closed in first bucket: ' + tojson(bucketDocs));
+        }
+
         // Second bucket should contain the remaining document.
         assert.eq(numDocs - 1,
                   bucketDocs[1].control.min._id,
@@ -95,10 +111,21 @@ TimeseriesTest.run((insert) => {
                   'invalid control.max for x in second bucket: ' + tojson(bucketDocs[1].control));
         assert.eq(1,
                   bucketDocs[1].control.version,
-                  'unexpected control.version in first bucket: ' + tojson(bucketDocs));
+                  'unexpected control.version in second bucket: ' + tojson(bucketDocs));
+
+        if (areTimeseriesScalabilityImprovementsEnabled) {
+            assert.eq(false,
+                      bucketDocs[1].control.closed,
+                      'unexpected control.closed in second bucket: ' + tojson(bucketDocs));
+        } else {
+            assert(!bucketDocs[1].control.hasOwnProperty("closed"),
+                   'unexpected control.closed in second bucket: ' + tojson(bucketDocs));
+        }
     };
 
     runTest(1);
     runTest(numDocs);
 });
+
+MongoRunner.stopMongod(conn);
 })();
diff --git a/jstests/noPassthrough/timeseries_collStats.js b/jstests/noPassthrough/timeseries_collStats.js
index 8b9c47502e5..00bffbf4d53 100644
--- a/jstests/noPassthrough/timeseries_collStats.js
+++ b/jstests/noPassthrough/timeseries_collStats.js
@@ -14,14 +14,18 @@ load("jstests/core/timeseries/libs/timeseries.js");
 
 const kIdleBucketExpiryMemoryUsageThreshold = 1024 * 1024 * 10;
 const conn = MongoRunner.runMongod({
-    setParameter:
-        {timeseriesIdleBucketExpiryMemoryUsageThreshold: kIdleBucketExpiryMemoryUsageThreshold}
+    setParameter: {
+        timeseriesIdleBucketExpiryMemoryUsageThreshold: kIdleBucketExpiryMemoryUsageThreshold,
+        timeseriesBucketMinCount: 1
+    }
 });
 
 const dbName = jsTestName();
 const testDB = conn.getDB(dbName);
 const isTimeseriesBucketCompressionEnabled =
     TimeseriesTest.timeseriesBucketCompressionEnabled(testDB);
+const isTimeseriesScalabilityImprovementsEnabled =
+    TimeseriesTest.timeseriesScalabilityImprovementsEnabled(testDB);
 
 assert.commandWorked(testDB.dropDatabase());
 
@@ -50,12 +54,22 @@ const clearCollection = function() {
     expectedStats.numBucketsClosedDueToTimeForward = 0;
     expectedStats.numBucketsClosedDueToTimeBackward = 0;
     expectedStats.numBucketsClosedDueToMemoryThreshold = 0;
+    if (isTimeseriesScalabilityImprovementsEnabled) {
+        expectedStats.numBucketsArchivedDueToTimeForward = 0;
+        expectedStats.numBucketsArchivedDueToTimeBackward = 0;
+        expectedStats.numBucketsArchivedDueToMemoryThreshold = 0;
+    }
     expectedStats.numCommits = 0;
     expectedStats.numWaits = 0;
     expectedStats.numMeasurementsCommitted = 0;
     expectedStats.numCompressedBuckets = 0;
     expectedStats.numUncompressedBuckets = 0;
     expectedStats.numSubObjCompressionRestart = 0;
+
+    if (TimeseriesTest.timeseriesScalabilityImprovementsEnabled(testDB)) {
+        expectedStats.numBucketsReopened = 0;
+        expectedStats.numBucketsKeptOpenDueToLargeMeasurements = 0;
+    }
 };
 clearCollection();
 
@@ -65,7 +79,8 @@ const checkCollStats = function(empty = false) {
     assert.eq(coll.getFullName(), stats.ns);
 
     for (let [stat, value] of Object.entries(expectedStats)) {
-        if (stat === 'numBucketsClosedDueToMemoryThreshold') {
+        if (stat === 'numBucketsClosedDueToMemoryThreshold' ||
+            stat === 'numBucketsArchivedDueToMemoryThreshold') {
             // Idle bucket expiration behavior will be non-deterministic since buckets are hashed
             // into shards within the catalog based on metadata, and expiration is done on a
             // per-shard basis. We just want to make sure that if we are expecting the number to be
@@ -149,9 +164,13 @@ assert.commandWorked(coll.insert(
 expectedStats.bucketCount++;
 expectedStats.numBucketInserts++;
 expectedStats.numCommits++;
-expectedStats.numBucketsClosedDueToTimeBackward++;
+if (isTimeseriesScalabilityImprovementsEnabled) {
+    expectedStats.numBucketsArchivedDueToTimeBackward++;
+} else {
+    expectedStats.numBucketsClosedDueToTimeBackward++;
+}
 expectedStats.numMeasurementsCommitted++;
-if (isTimeseriesBucketCompressionEnabled) {
+if (isTimeseriesBucketCompressionEnabled && !isTimeseriesScalabilityImprovementsEnabled) {
     expectedStats.numCompressedBuckets++;
 }
 checkCollStats();
@@ -232,7 +251,11 @@ assert.commandWorked(coll.insert(docs, {ordered: false}));
 expectedStats.bucketCount += numDocs;
 expectedStats.numBucketInserts += numDocs;
 expectedStats.numBucketsOpenedDueToMetadata++;
-expectedStats.numBucketsClosedDueToTimeForward++;
+if (isTimeseriesScalabilityImprovementsEnabled) {
+    expectedStats.numBucketsArchivedDueToTimeForward++;
+} else {
+    expectedStats.numBucketsClosedDueToTimeForward++;
+}
 expectedStats.numCommits += numDocs;
 expectedStats.numMeasurementsCommitted += numDocs;
 expectedStats.avgNumMeasurementsPerCommit =
@@ -255,7 +278,11 @@ const testIdleBucketExpiry = function(docFn) {
         expectedStats.numBucketInserts++;
         expectedStats.numBucketsOpenedDueToMetadata++;
         if (shouldExpire) {
-            expectedStats.numBucketsClosedDueToMemoryThreshold++;
+            if (isTimeseriesScalabilityImprovementsEnabled) {
+                expectedStats.numBucketsArchivedDueToMemoryThreshold++;
+            } else {
+                expectedStats.numBucketsClosedDueToMemoryThreshold++;
+            }
         }
         expectedStats.numCommits++;
         expectedStats.numMeasurementsCommitted++;
diff --git a/jstests/noPassthrough/timeseries_expires_with_partial_index.js b/jstests/noPassthrough/timeseries_expires_with_partial_index.js
new file mode 100644
index 00000000000..c5bd39b2387
--- /dev/null
+++ b/jstests/noPassthrough/timeseries_expires_with_partial_index.js
@@ -0,0 +1,119 @@
+/**
+ * Tests that a time-series collection created with the 'expireAfterSeconds' option will remove
+ * buckets older than 'expireAfterSeconds' based on the bucket creation time while also regarding
+ * the partial filter on the metafield.
+ *
+ * @tags: [
+ *   does_not_support_stepdowns,
+ *   does_not_support_transactions,
+ *   featureFlagTimeseriesScalabilityImprovements
+ * ]
+ */
+(function() {
+"use strict";
+
+load('jstests/libs/fixture_helpers.js');  // For 'FixtureHelpers'
+load("jstests/libs/clustered_collections/clustered_collection_util.js");
+load("jstests/core/timeseries/libs/timeseries.js");
+
+const conn = MongoRunner.runMongod({setParameter: 'ttlMonitorSleepSecs=1'});
+const testDB = conn.getDB(jsTestName());
+assert.commandWorked(testDB.dropDatabase());
+
+TimeseriesTest.run((insert) => {
+    const coll = testDB.timeseries_expires_with_partial_index;
+    const bucketsColl = testDB.getCollection('system.buckets.' + coll.getName());
+
+    const timeFieldName = 'tm';
+    const metaFieldName = "mm";
+    const indexName = "partialTTLIndex";
+    const timeSpec = {[timeFieldName]: 1};
+    const expireAfterSecond = NumberLong(1);
+    const expireAfterSeconds = NumberLong(10000);
+
+    const startDate = new Date();
+    const expiredDate = new Date(startDate - ((expireAfterSeconds / 2) * 1000));
+    const collectionTTLExpiredDate = new Date(startDate - ((expireAfterSeconds * 2) * 1000));
+    const futureDate = new Date(startDate.getTime() + (10000 * 10));
+
+    assert.lt(expiredDate, startDate);
+    assert.gt(futureDate, startDate);
+
+    const expiredDoc = {_id: 0, [timeFieldName]: expiredDate, [metaFieldName]: 8, x: 0};
+    const expiredDocLowMeta = {_id: 1, [timeFieldName]: expiredDate, [metaFieldName]: 0, x: 1};
+    const collectionTTLExpiredDocLowMeta =
+        {_id: 2, [timeFieldName]: collectionTTLExpiredDate, [metaFieldName]: 0, x: 2};
+    const futureDoc = {_id: 3, [timeFieldName]: futureDate, [metaFieldName]: 10, x: 3};
+
+    const partialIndexOptions = {
+        name: indexName,
+        partialFilterExpression: {[metaFieldName]: {$gt: 5}},
+        expireAfterSeconds: expireAfterSecond
+    };
+
+    const checkInsertion = function(coll, doc, expectDeletion) {
+        jsTestLog("Inserting doc into collection.");
+        const prevCount = bucketsColl.find().itcount();
+        assert.commandWorked(insert(coll, doc), 'failed to insert doc: ' + tojson(doc));
+
+        // Wait for the TTL monitor to process the indexes.
+        jsTestLog("Waiting for TTL monitor to process...");
+        ClusteredCollectionUtil.waitForTTL(testDB);
+
+        // Check the number of bucket documents.
+        const expectedCount = (expectDeletion) ? prevCount : prevCount + 1;
+        const bucketDocs = bucketsColl.find().sort({'control.min._id': 1}).toArray();
+
+        assert.eq(expectedCount, bucketDocs.length, bucketDocs);
+        jsTestLog("Doc deleted: " + expectDeletion + ".");
+    };
+
+    const testTTLIndex = function(coll) {
+        // Inserts a measurement with a time in the past to ensure the measurement will be removed
+        // immediately.
+        checkInsertion(coll, expiredDoc, true);
+
+        // Inserts a measurement that does not meet the partialFilterExpression to ensure it will
+        // not be removed (even though it is 'expired').
+        checkInsertion(coll, expiredDocLowMeta, false);
+
+        // Inserts a measurement with a time in the future to ensure the measurement is not removed.
+        checkInsertion(coll, futureDoc, false);
+    };
+
+    {
+        coll.drop();
+        assert.commandWorked(testDB.createCollection(
+            coll.getName(), {timeseries: {timeField: timeFieldName, metaField: metaFieldName}}));
+        assert.contains(bucketsColl.getName(), testDB.getCollectionNames());
+
+        // Create a TTL index on time, with a partial filter expression on the metaField.
+        assert.commandWorked(coll.createIndex(timeSpec, partialIndexOptions));
+    }
+
+    // Test the TTL Deleter on a time-series collection with a TTL index and partialFilter.
+    testTTLIndex(coll);
+
+    {
+        coll.drop();
+        assert.commandWorked(testDB.createCollection(coll.getName(), {
+            timeseries: {timeField: timeFieldName, metaField: metaFieldName},
+            expireAfterSeconds: expireAfterSeconds
+        }));
+        assert.contains(bucketsColl.getName(), testDB.getCollectionNames());
+
+        // Create a secondary TTL index on time, with a partial filter expression on the metaField.
+        assert.commandWorked(coll.createIndex(timeSpec, partialIndexOptions));
+    }
+
+    // Test the TTL Deleter on a time-series collection with a TTL index and partialFilter and a
+    // pre-existing TTL index.
+    testTTLIndex(coll);
+
+    // As a sanity check, check that the TTL deleter deletes a document that does not match partial
+    // filter but is expired, with respect to the collection TTL index.
+    checkInsertion(coll, collectionTTLExpiredDocLowMeta, true);
+});
+
+MongoRunner.stopMongod(conn);
+})();
diff --git a/jstests/noPassthrough/timeseries_sort.js b/jstests/noPassthrough/timeseries_sort.js
new file mode 100644
index 00000000000..53842e20c1c
--- /dev/null
+++ b/jstests/noPassthrough/timeseries_sort.js
@@ -0,0 +1,131 @@
+/**
+ * Test that we correctly use the index created when a time series collection is sharded.
+ *
+ * @tags: [
+ *   requires_fcv_51,
+ *   requires_find_command,
+ *   requires_sharding,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");
+load("jstests/libs/analyze_plan.js");  // For getAggPlanStage
+
+Random.setRandomSeed();
+
+const dbName = 'testDB';
+const collName = 'timeseries_sort';
+const timeField = 't';
+const metaField = 'm';
+
+const bucketsCollName = `system.buckets.${collName}`;
+const fullBucketsCollName = `${dbName}.system.buckets.${collName}`;
+
+const st = new ShardingTest({shards: 2});
+const sDB = st.s.getDB(dbName);
+assert.commandWorked(sDB.adminCommand({enableSharding: dbName}));
+
+if (!TimeseriesTest.shardedtimeseriesCollectionsEnabled(st.shard0)) {
+    jsTestLog("Skipping test because the sharded time-series collection feature flag is disabled");
+    st.stop();
+    return;
+}
+
+st.ensurePrimaryShard(dbName, st.shard0.shardName);
+
+// Shard time-series collection.
+const shardKey = {
+    [timeField]: 1
+};
+assert.commandWorked(sDB.adminCommand({
+    shardCollection: `${dbName}.${collName}`,
+    key: shardKey,
+    timeseries: {timeField, metaField, granularity: "hours"}
+}));
+
+// Split the chunks.
+const splitPoint = {
+    [`control.min.${timeField}`]: new Date(50 * 1000)
+};
+assert.commandWorked(sDB.adminCommand({split: fullBucketsCollName, middle: splitPoint}));
+
+// // Move one of the chunks into the second shard.
+const primaryShard = st.getPrimaryShard(dbName);
+const otherShard = st.getOther(primaryShard);
+assert.commandWorked(sDB.adminCommand(
+    {movechunk: fullBucketsCollName, find: splitPoint, to: otherShard.name, _waitForDelete: true}));
+
+const coll = sDB.getCollection(collName);
+const bucketsColl = sDB.getCollection(bucketsCollName);
+
+const hasInternalBoundedSort = (explain) => {
+    for (const shardName in explain.shards) {
+        const pipeline = explain.shards[shardName].stages;
+        if (!pipeline.some((stage) => stage.hasOwnProperty("$_internalBoundedSort"))) {
+            return false;
+        }
+    }
+    return true;
+};
+
+const assertAccessPath = (pipeline, hint, accessPath, direction) => {
+    const options = (hint) ? {hint: hint} : {};
+    const explain = coll.explain().aggregate(pipeline, options);
+    assert(hasInternalBoundedSort(explain));
+
+    const paths = getAggPlanStages(explain, accessPath);
+    for (const path of paths) {
+        assert.eq(path.stage, accessPath);
+        assert.eq(path.direction, direction > 0 ? "forward" : "backward");
+    }
+};
+
+const assertNoRewrite = (pipeline) => {
+    const explain = coll.explain().aggregate(pipeline);
+    assert(!hasInternalBoundedSort(explain));
+};
+
+for (let i = 0; i < 100; i++) {
+    assert.commandWorked(
+        sDB.getCollection(collName).insert({t: new Date(i * 1000), m: i % 4, k: i}));
+}
+
+// Ensure that each shard owns one chunk.
+const counts = st.chunkCounts(bucketsCollName, dbName);
+assert.eq(1, counts[primaryShard.shardName], counts);
+assert.eq(1, counts[otherShard.shardName], counts);
+
+assert.eq(coll.count(), 100);
+assert.eq(bucketsColl.count(), 4);
+
+assert.eq(coll.getIndexes().length, 1);
+assert.eq(coll.getIndexes()[0].name, "control.min.t_1");
+
+const forwardSort = {
+    $sort: {t: 1}
+};
+const backwardSort = {
+    $sort: {t: -1}
+};
+// One match before the split, one after the split.
+for (const matchDate of [new Date(25 * 1000), new Date(75 * 1000)]) {
+    const match = {$match: {t: matchDate}};
+    assertAccessPath([match, forwardSort], null, "IXSCAN", 1);
+    assertAccessPath([match, backwardSort], null, "IXSCAN", -1);
+    assertNoRewrite([match, {$sort: {t: -1, m: 1}}]);
+    assertNoRewrite([match, {$sort: {t: 1, m: 1}}]);
+}
+const kMatch = {
+    $match: {k: 1}
+};
+assertAccessPath([forwardSort], null, "COLLSCAN", 1);
+assertAccessPath([backwardSort], null, "COLLSCAN", -1);
+assertAccessPath([kMatch, forwardSort], null, "COLLSCAN", 1);
+assertAccessPath([kMatch, backwardSort], null, "COLLSCAN", -1);
+assertAccessPath([forwardSort], {$natural: -1}, "COLLSCAN", 1);
+assertAccessPath([backwardSort], {$natural: 1}, "COLLSCAN", -1);
+
+st.stop();
+})();
diff --git a/jstests/noPassthroughWithMongod/group_pushdown.js b/jstests/noPassthroughWithMongod/group_pushdown.js
index a8b2df80620..8cf292ec3a9 100644
--- a/jstests/noPassthroughWithMongod/group_pushdown.js
+++ b/jstests/noPassthroughWithMongod/group_pushdown.js
@@ -7,7 +7,7 @@
 load("jstests/libs/analyze_plan.js");
 load("jstests/libs/sbe_util.js");  // For checkSBEEnabled.
 
-if (!checkSBEEnabled(db, ["featureFlagSBEGroupPushdown"])) {
+if (!checkSBEEnabled(db)) {
     jsTestLog("Skipping test because the sbe group pushdown feature flag is disabled");
     return;
 }
diff --git a/jstests/noPassthroughWithMongod/lookup_with_limit.js b/jstests/noPassthroughWithMongod/lookup_with_limit.js
index 28dbb4f4702..ba5c3ae9529 100644
--- a/jstests/noPassthroughWithMongod/lookup_with_limit.js
+++ b/jstests/noPassthroughWithMongod/lookup_with_limit.js
@@ -7,7 +7,7 @@
 load('jstests/libs/analyze_plan.js');  // For getWinningPlan().
 load("jstests/libs/sbe_util.js");      // For checkSBEEnabled.
 
-if (!checkSBEEnabled(db, ["featureFlagSBELookupPushdown"])) {
+if (!checkSBEEnabled(db)) {
     jsTestLog("Skipping test because SBE $lookup is not enabled.");
     return;
 }
diff --git a/jstests/noPassthroughWithMongod/timeseries_large_measurements_max_size.js b/jstests/noPassthroughWithMongod/timeseries_large_measurements_max_size.js
new file mode 100644
index 00000000000..34039e5f281
--- /dev/null
+++ b/jstests/noPassthroughWithMongod/timeseries_large_measurements_max_size.js
@@ -0,0 +1,80 @@
+/**
+ * Tests that buckets which are kept open until the number of measurements reaches the threshold
+ * (timeseriesBucketMinCount) are closed when the bucket is close to the max BSON size limit.
+ *
+ * @tags: [
+ *   requires_collstats,
+ *   requires_fcv_61,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");  // For 'TimeseriesTest'.
+
+const coll = db.getCollection(jsTestName());
+const bucketColl = db.getCollection("system.buckets." + jsTestName());
+
+const timeFieldName = "time";
+const resetCollection = (() => {
+    coll.drop();
+    assert.commandWorked(
+        db.createCollection(jsTestName(), {timeseries: {timeField: timeFieldName}}));
+});
+
+const areTimeseriesScalabilityImprovementsEnabled =
+    TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db);
+
+const numMeasurements = 4;
+const checkBucketSize = (() => {
+    const timeseriesStats = assert.commandWorked(coll.stats()).timeseries;
+
+    if (areTimeseriesScalabilityImprovementsEnabled) {
+        // Buckets with large measurements are kept open after exceeding timeseriesBucketMaxSize
+        // until they have 10 measurements. However, if the bucket size were to exceed 12MB, it gets
+        // closed regardless.
+        const bucketDocs = bucketColl.find().sort({'control.min._id': 1}).toArray();
+        assert.eq(2, bucketDocs.length, bucketDocs);
+
+        // First bucket should be full with three documents.
+        assert.eq(0, bucketDocs[0].control.min._id);
+        assert.eq(2, bucketDocs[0].control.max._id);
+
+        // Second bucket should contain the remaining document.
+        assert.eq(numMeasurements - 1, bucketDocs[1].control.min._id);
+        assert.eq(numMeasurements - 1, bucketDocs[1].control.max._id);
+
+        assert.eq(1, timeseriesStats.numBucketsClosedDueToSize);
+        assert.eq(1, timeseriesStats.numBucketsKeptOpenDueToLargeMeasurements);
+    } else {
+        // Only one measurement per bucket without time-series scalability improvements.
+        const bucketDocs = bucketColl.find().sort({'control.min._id': 1}).toArray();
+        assert.eq(numMeasurements, bucketDocs.length, bucketDocs);
+
+        assert(!timeseriesStats.hasOwnProperty("numBucketsKeptOpenDueToLargeMeasurements"));
+    }
+});
+
+const measurementValueLength = 2 * 1024 * 1024;
+
+jsTestLog("Testing single inserts");
+resetCollection();
+
+for (let i = 0; i < numMeasurements; i++) {
+    const doc = {_id: i, [timeFieldName]: ISODate(), value: "a".repeat(measurementValueLength)};
+    assert.commandWorked(coll.insert(doc));
+}
+checkBucketSize();
+
+jsTestLog("Testing batched inserts");
+resetCollection();
+
+let batch = [];
+for (let i = 0; i < numMeasurements; i++) {
+    const doc = {_id: i, [timeFieldName]: ISODate(), value: "a".repeat(measurementValueLength)};
+    batch.push(doc);
+}
+assert.commandWorked(coll.insertMany(batch));
+
+checkBucketSize();
+}());
diff --git a/jstests/noPassthroughWithMongod/timeseries_server_status_measurements.js b/jstests/noPassthroughWithMongod/timeseries_server_status_measurements.js
new file mode 100644
index 00000000000..2d666ebf609
--- /dev/null
+++ b/jstests/noPassthroughWithMongod/timeseries_server_status_measurements.js
@@ -0,0 +1,69 @@
+/**
+ * Tests that buckets which need to be closed due to size (timeseriesBucketMaxSize) are kept open
+ * until the number of measurements reaches the threshold (timeseriesBucketMinCount).
+ *
+ * @tags: [
+ *   requires_collstats,
+ *   requires_fcv_61,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");  // For 'TimeseriesTest'.
+
+const coll = db.getCollection(jsTestName());
+
+const timeFieldName = "localTime";
+const metaFieldName = "host";
+const resetCollection = (() => {
+    coll.drop();
+    assert.commandWorked(db.createCollection(
+        jsTestName(), {timeseries: {timeField: timeFieldName, metaField: metaFieldName}}));
+});
+
+const areTimeseriesScalabilityImprovementsEnabled =
+    TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db);
+
+const numMeasurements = 50;
+const checkBucketSize = (() => {
+    const timeseriesStats = assert.commandWorked(coll.stats()).timeseries;
+
+    if (areTimeseriesScalabilityImprovementsEnabled) {
+        // Need at least 10 measurements before closing buckets exceeding timeseriesBucketMaxSize.
+        assert.eq(numMeasurements / 10, timeseriesStats.bucketCount);
+
+        assert(timeseriesStats.hasOwnProperty("numBucketsKeptOpenDueToLargeMeasurements"));
+        assert.eq(numMeasurements / 10, timeseriesStats.numBucketsKeptOpenDueToLargeMeasurements);
+    } else {
+        // After accounting for the control.min and control.max summaries, one measurement of server
+        // status exceeds the bucket max size. Which means we'll only have one measurement per
+        // bucket.
+        assert.eq(numMeasurements, timeseriesStats.bucketCount);
+
+        assert(!timeseriesStats.hasOwnProperty("numBucketsKeptOpenDueToLargeMeasurements"));
+    }
+});
+
+jsTestLog("Testing single inserts");
+resetCollection();
+
+for (let i = 0; i < numMeasurements; i++) {
+    const doc = assert.commandWorked(db.runCommand({serverStatus: 1}));
+    assert.commandWorked(coll.insert(doc));
+}
+
+checkBucketSize();
+
+jsTestLog("Testing batched inserts");
+resetCollection();
+
+let batch = [];
+for (let i = 0; i < numMeasurements; i++) {
+    const doc = assert.commandWorked(db.runCommand({serverStatus: 1}));
+    batch.push(doc);
+}
+assert.commandWorked(coll.insertMany(batch));
+
+checkBucketSize();
+}());
diff --git a/jstests/replsets/cluster_server_parameter_commands_replset.js b/jstests/replsets/cluster_server_parameter_commands_replset.js
index 7c780307561..79e1356f47c 100644
--- a/jstests/replsets/cluster_server_parameter_commands_replset.js
+++ b/jstests/replsets/cluster_server_parameter_commands_replset.js
@@ -2,11 +2,9 @@
  * Checks that set/getClusterParameter runs as expected on replica set nodes.
  *
  * @tags: [
- *   # Requires all nodes to be running the latest binary.
- *   requires_fcv_60,
- *   featureFlagClusterWideConfig,
  *   does_not_support_stepdowns,
- *   requires_replication
+ *   requires_replication,
+ *   multiversion_incompatible
  *  ]
  */
 (function() {
diff --git a/jstests/replsets/db_reads_while_recovering_all_commands.js b/jstests/replsets/db_reads_while_recovering_all_commands.js
index 02fc2486ca4..26fe57fdc7d 100644
--- a/jstests/replsets/db_reads_while_recovering_all_commands.js
+++ b/jstests/replsets/db_reads_while_recovering_all_commands.js
@@ -217,7 +217,6 @@ const allCommands = {
     fsync: {skip: isNotAUserDataRead},
     fsyncUnlock: {skip: isNotAUserDataRead},
     getAuditConfig: {skip: isNotAUserDataRead},
-    getChangeStreamOptions: {skip: isNotAUserDataRead},  // TODO SERVER-65353 remove in 6.1.
     getClusterParameter: {skip: isNotAUserDataRead},
     getCmdLineOpts: {skip: isNotAUserDataRead},
     getDatabaseVersion: {skip: isNotAUserDataRead},
@@ -333,7 +332,6 @@ const allCommands = {
     saslStart: {skip: isPrimaryOnly},
     serverStatus: {skip: isNotAUserDataRead},
     setAuditConfig: {skip: isNotAUserDataRead},
-    setChangeStreamOptions: {skip: isPrimaryOnly},  // TODO SERVER-65353 remove in 6.1.
     setCommittedSnapshot: {skip: isNotAUserDataRead},
     setDefaultRWConcern: {skip: isPrimaryOnly},
     setIndexCommitQuorum: {skip: isPrimaryOnly},
diff --git a/jstests/replsets/reconfig_only_counts_voters_for_config_commitment.js b/jstests/replsets/reconfig_only_counts_voters_for_config_commitment.js
index 6f2a92ce060..e6fff47893c 100644
--- a/jstests/replsets/reconfig_only_counts_voters_for_config_commitment.js
+++ b/jstests/replsets/reconfig_only_counts_voters_for_config_commitment.js
@@ -46,7 +46,7 @@ assert.soon(() => isConfigCommitted(primary));
 // Subsequent reconfig should now succeed.
 config.version++;
 assert.commandWorked(primary.getDB("admin").runCommand({replSetReconfig: config}));
-assert(isConfigCommitted(primary));
+assert.soon(() => isConfigCommitted(primary));
 
 replTest.stopSet();
 }());
 \ No newline at end of file
diff --git a/jstests/replsets/set_cluster_parameter_replset.js b/jstests/replsets/set_cluster_parameter_replset.js
index a9469c0aa06..65beea7ad70 100644
--- a/jstests/replsets/set_cluster_parameter_replset.js
+++ b/jstests/replsets/set_cluster_parameter_replset.js
@@ -3,9 +3,6 @@
  *
  * @tags: [
  *   does_not_support_stepdowns,
- *   featureFlagClusterWideConfig,
- *   # Requires all nodes to be running the latest binary.
- *   requires_fcv_60,
  *   # Restarts all replica set member nodes mid-test.
  *   requires_persistence,
  *  ]
diff --git a/jstests/replsets/tenant_migration_cloner_stats_with_failover.js b/jstests/replsets/tenant_migration_cloner_stats_with_failover.js
index 3fc4e996968..a47b816a65e 100644
--- a/jstests/replsets/tenant_migration_cloner_stats_with_failover.js
+++ b/jstests/replsets/tenant_migration_cloner_stats_with_failover.js
@@ -123,8 +123,10 @@ jsTestLog("Bytes copied after first batch of second database: " + bytesCopiedInc
 // original primary to the new primary. Then, step up the new primary.
 const fpAfterCreatingCollectionOfSecondDB =
     configureFailPoint(newRecipientPrimary, "tenantCollectionClonerHangAfterCreateCollection");
-tenantMigrationTest.getRecipientRst().awaitReplication();
-newRecipientPrimary.adminCommand({replSetStepUp: 1});
+assert.soon(() => {
+    tenantMigrationTest.getRecipientRst().awaitReplication();
+    return newRecipientPrimary.adminCommand({replSetStepUp: 1}).ok;
+});
 fpAfterBatchOfSecondDB.off();
 
 jsTestLog("Wait until the new primary creates collection of second database.");
diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
index 40d7c00f63c..8ced7f53232 100644
--- a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
+++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
@@ -3,16 +3,12 @@
  *
  * Incompatible with shard merge, which can't handle restart.
  *
- * TODO SERVER-66034: Remove requires_fcv_61 tag.
- *
  * @tags: [
  *   incompatible_with_macos,
  *   incompatible_with_shard_merge,
  *   incompatible_with_windows_tls,
  *   requires_majority_read_concern,
  *   requires_persistence,
- *   # failpoint pauseTenantMigrationAfterUpdatingToCommittedState only exists on the latest branch.
- *   requires_fcv_61,
  *   # Tenant migrations are only used in serverless.
  *   serverless,
  * ]
diff --git a/jstests/replsets/tenant_migration_recipient_rollback_recovery.js b/jstests/replsets/tenant_migration_recipient_rollback_recovery.js
index 1837baf2cc6..32c562c4779 100644
--- a/jstests/replsets/tenant_migration_recipient_rollback_recovery.js
+++ b/jstests/replsets/tenant_migration_recipient_rollback_recovery.js
@@ -1,15 +1,12 @@
 /**
  * Tests that tenant migrations that go through recipient rollback are recovered correctly.
  *
- * TODO SERVER-66034: Remove requires_fcv_61 tag.
- *
  * @tags: [
  *   incompatible_with_macos,
  *   incompatible_with_shard_merge,
  *   incompatible_with_windows_tls,
  *   requires_majority_read_concern,
  *   requires_persistence,
- *   requires_fcv_61,
  *   serverless,
  * ]
  */
diff --git a/jstests/replsets/write_change_stream_change_collection.js b/jstests/replsets/write_change_stream_change_collection.js
index 32fce968ecc..d4e1a75d04b 100644
--- a/jstests/replsets/write_change_stream_change_collection.js
+++ b/jstests/replsets/write_change_stream_change_collection.js
@@ -8,18 +8,20 @@
 (function() {
 "use strict";
 
-const replSetTest = new ReplSetTest({nodes: 1});
+const replSetTest = new ReplSetTest({nodes: 2});
 replSetTest.startSet({setParameter: "multitenancySupport=true"});
 replSetTest.initiate();
 
 const primary = replSetTest.getPrimary();
-const oplogColl = primary.getDB("local").oplog.rs;
-const changeColl = primary.getDB("config").system.change_collection;
+const secondary = replSetTest.getSecondary();
 const testDb = primary.getDB("test");
 
 // Verifies that the oplog and change collection entries are the same for the specified start and
 // end duration of the oplog timestamp.
-function verifyChangeCollectionEntries(startOplogTimestamp, endOplogTimestamp) {
+function verifyChangeCollectionEntries(connection, startOplogTimestamp, endOplogTimestamp) {
+    const oplogColl = connection.getDB("local").oplog.rs;
+    const changeColl = connection.getDB("config").system.change_collection;
+
     // Fetch all oplog and change collection entries for the duration: [startOplogTimestamp,
     // endOplogTimestamp].
     const oplogEntries =
@@ -69,6 +71,7 @@ function performWrites(coll) {
 
 // Test the change collection entries with the oplog by performing some basic writes.
 (function testBasicWritesInChangeCollection() {
+    const oplogColl = primary.getDB("local").oplog.rs;
     const startOplogTimestamp = oplogColl.find().toArray().at(-1).ts;
     assert(startOplogTimestamp != undefined);
 
@@ -79,11 +82,18 @@ function performWrites(coll) {
     assert(endOplogTimestamp !== undefined);
     assert(timestampCmp(endOplogTimestamp, startOplogTimestamp) > 0);
 
-    verifyChangeCollectionEntries(startOplogTimestamp, endOplogTimestamp);
+    // Wait for the replication to finish.
+    replSetTest.awaitReplication();
+
+    // Verify that the change collection entries are the same as the oplog in the primary and the
+    // secondary node.
+    verifyChangeCollectionEntries(primary, startOplogTimestamp, endOplogTimestamp);
+    verifyChangeCollectionEntries(secondary, startOplogTimestamp, endOplogTimestamp);
 })();
 
 // Test the change collection entries with the oplog by performing writes in a transaction.
 (function testWritesinChangeCollectionWithTrasactions() {
+    const oplogColl = primary.getDB("local").oplog.rs;
     const startOplogTimestamp = oplogColl.find().toArray().at(-1).ts;
     assert(startOplogTimestamp != undefined);
 
@@ -97,7 +107,13 @@ function performWrites(coll) {
     assert(endOplogTimestamp != undefined);
     assert(timestampCmp(endOplogTimestamp, startOplogTimestamp) > 0);
 
-    verifyChangeCollectionEntries(startOplogTimestamp, endOplogTimestamp);
+    // Wait for the replication to finish.
+    replSetTest.awaitReplication();
+
+    // Verify that the change collection entries are the same as the oplog in the primary and the
+    // secondary node for the applyOps.
+    verifyChangeCollectionEntries(primary, startOplogTimestamp, endOplogTimestamp);
+    verifyChangeCollectionEntries(secondary, startOplogTimestamp, endOplogTimestamp);
 })();
 
 replSetTest.stopSet();
diff --git a/jstests/serverless/libs/basic_serverless_test.js b/jstests/serverless/libs/basic_serverless_test.js
index 79c71f845fb..131aa394c46 100644
--- a/jstests/serverless/libs/basic_serverless_test.js
+++ b/jstests/serverless/libs/basic_serverless_test.js
@@ -7,13 +7,9 @@ const runForgetShardSplitAsync = function(primaryHost, migrationIdString) {
     return primary.adminCommand({forgetShardSplit: 1, migrationId: UUID(migrationIdString)});
 };
 
-const runAbortShardSplitAsync = function(rstArgs, migrationIdString) {
-    load("jstests/replsets/rslib.js");
-
-    const donorRst = createRst(rstArgs, true);
-    const admin = donorRst.getPrimary().getDB("admin");
-
-    return admin.runCommand({abortShardSplit: 1, migrationId: UUID(migrationIdString)});
+const runAbortShardSplitAsync = function(primaryHost, migrationIdString) {
+    const primary = new Mongo(primaryHost);
+    return primary.adminCommand({abortShardSplit: 1, migrationId: UUID(migrationIdString)});
 };
 
 const runCommitShardSplitAsync = function(rstArgs,
@@ -147,9 +143,12 @@ class ShardSplitOperation {
         jsTestLog("Running forgetShardSplit command");
 
         this.basicServerlessTest.removeRecipientNodesFromDonor();
+        const donorRstArgs = createRstArgs(this.donorSet);
+        this.basicServerlessTest.removeRecipientsFromRstArgs(donorRstArgs);
+        const donorSet = createRst(donorRstArgs, true);
 
         const cmdObj = {forgetShardSplit: 1, migrationId: this.migrationId};
-        assert.commandWorked(runShardSplitCommand(this.donorSet,
+        assert.commandWorked(runShardSplitCommand(donorSet,
                                                   cmdObj,
                                                   true /* retryableOnErrors */,
                                                   false /*enableDonorStartMigrationFsync*/));
@@ -175,11 +174,11 @@ class ShardSplitOperation {
      */
     abortAsync() {
         jsTestLog("Running abortShardSplit command asynchronously");
-        const donorRstArgs = createRstArgs(this.donorSet);
+        const primary = this.basicServerlessTest.getDonorPrimary();
         const migrationIdString = extractUUIDFromObject(this.migrationId);
 
         const abortShardSplitThread =
-            new Thread(runAbortShardSplitAsync, donorRstArgs, migrationIdString);
+            new Thread(runAbortShardSplitAsync, primary.host, migrationIdString);
 
         abortShardSplitThread.start();
 
@@ -191,8 +190,8 @@ class ShardSplitOperation {
      */
     abort() {
         jsTestLog("Running abort command");
-
-        const admin = this.donorSet.getPrimary().getDB("admin");
+        const primary = this.basicServerlessTest.getDonorPrimary();
+        const admin = primary.getDB("admin");
 
         return admin.runCommand({abortShardSplit: 1, migrationId: this.migrationId});
     }
@@ -348,7 +347,7 @@ class BasicServerlessTest {
 
     /*
      *  Wait for state document garbage collection by polling for when the document has been removed
-     * from the tenantSplitDonors namespace, and all access blockers have been removed.
+     * from the 'shardSplitDonors' namespace, and all access blockers have been removed.
      * @param {migrationId} id that was used for the commitShardSplit command.
      * @param {tenantIds} tenant ids of the shard split.
      */
@@ -529,6 +528,33 @@ class BasicServerlessTest {
     }
 
     /**
+     * Asserts that the TenantMigrationAccessBlocker for the given tenant on the given node has the
+     * expected statistics.
+     */
+    static checkShardSplitAccessBlocker(node, tenantId, {
+        numBlockedWrites = 0,
+        numBlockedReads = 0,
+        numTenantMigrationCommittedErrors = 0,
+        numTenantMigrationAbortedErrors = 0
+    }) {
+        const mtab = BasicServerlessTest.getTenantMigrationAccessBlocker({node, tenantId}).donor;
+        if (!mtab) {
+            assert.eq(0, numBlockedWrites);
+            assert.eq(0, numTenantMigrationCommittedErrors);
+            assert.eq(0, numTenantMigrationAbortedErrors);
+            return;
+        }
+
+        assert.eq(mtab.numBlockedReads, numBlockedReads, tojson(mtab));
+        assert.eq(mtab.numBlockedWrites, numBlockedWrites, tojson(mtab));
+        assert.eq(mtab.numTenantMigrationCommittedErrors,
+                  numTenantMigrationCommittedErrors,
+                  tojson(mtab));
+        assert.eq(
+            mtab.numTenantMigrationAbortedErrors, numTenantMigrationAbortedErrors, tojson(mtab));
+    }
+
+    /**
      * Get the current donor primary by ignoring all the recipient nodes from the current donor set.
      */
     getDonorPrimary() {
@@ -539,7 +565,7 @@ class BasicServerlessTest {
     }
 }
 
-BasicServerlessTest.kConfigSplitDonorsNS = "config.tenantSplitDonors";
+BasicServerlessTest.kConfigSplitDonorsNS = "config.shardSplitDonors";
 BasicServerlessTest.DonorState = {
     kUninitialized: "uninitialized",
     kBlocking: "blocking",
diff --git a/jstests/serverless/shard_split_abort_while_committing.js b/jstests/serverless/shard_split_abort_while_committing.js
new file mode 100644
index 00000000000..a72e0e78144
--- /dev/null
+++ b/jstests/serverless/shard_split_abort_while_committing.js
@@ -0,0 +1,56 @@
+/*
+ * Test that a well timed abortShardSplit command does not abort an already committed split.
+ *
+ * @tags: [requires_fcv_52, featureFlagShardSplit, serverless]
+ */
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+const failpoints = [
+    "pauseShardSplitBeforeSendingStepUpToRecipients",
+    "pauseShardSplitAfterUpdatingToCommittedState"
+];
+
+function testAbortAfterSplitIsAppliedStillsCommits(failpoint) {
+    "use strict";
+
+    const tenantIds = ["tenant1", "tenant2"];
+
+    const test = new BasicServerlessTest({
+        recipientTagName: "recipientNode",
+        recipientSetName: "recipient",
+        quickGarbageCollection: true
+    });
+    test.addRecipientNodes();
+
+    const donorPrimary = test.getDonorPrimary();
+    const operation = test.createSplitOperation(tenantIds);
+
+    let blockFp = configureFailPoint(donorPrimary, failpoint);
+    let splitThread = operation.commitAsync();
+    blockFp.wait();
+
+    // abortCmd expects to have the decisionPromise fullfilled which would be blocked by the
+    // `failpoint` already blocking from returning the promise.
+    let ranAbortFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterReceivingAbortCmd");
+    let abortThread = operation.abortAsync();
+    ranAbortFp.wait();
+
+    blockFp.off();
+
+    assert.commandWorked(splitThread.returnData());
+
+    // now that the decisionPromise is fullfilled we can remove the failpoint.
+    ranAbortFp.off();
+    assert.commandFailed(abortThread.returnData());
+    assertMigrationState(donorPrimary, operation.migrationId, "committed");
+
+    operation.forget();
+    test.waitForGarbageCollection(operation.migrationId, tenantIds);
+    test.stop();
+}
+
+failpoints.forEach(fp => {
+    testAbortAfterSplitIsAppliedStillsCommits(fp);
+});
diff --git a/jstests/serverless/shard_split_apply_splitconfig.js b/jstests/serverless/shard_split_apply_splitconfig.js
index 6ff940828a8..5527fb61321 100644
--- a/jstests/serverless/shard_split_apply_splitconfig.js
+++ b/jstests/serverless/shard_split_apply_splitconfig.js
@@ -51,20 +51,15 @@ function runReconfigToSplitConfig() {
         return status.set === kRecipientSetName;
     }, "waiting for split config to take", 30000, 2000);
 
-    jsTestLog("Waiting for recipient to elect a primary");
-    assert.soon(() => {
-        const recipientNode = test.recipientNodes[0];
-        const status =
-            assert.commandWorked(recipientNode.getDB('admin').runCommand({replSetGetStatus: 1}));
-        return status.members.some(member => member.stateStr === 'PRIMARY');
-    }, "waiting for recipient to elect primary", 30000, 2000);
-
     jsTestLog("Confirming we can write to recipient");
-
-    const recipientPrimary = test.recipientNodes.filter(node => {
-        const n = node.getDB('admin')._helloOrLegacyHello();
-        return n.isWritablePrimary || n.ismaster;
-    })[0];
+    let recipientPrimary = undefined;
+    assert.soon(function() {
+        recipientPrimary = test.recipientNodes.find(node => {
+            const n = node.adminCommand('hello');
+            return n.isWritablePrimary || n.ismaster;
+        });
+        return recipientPrimary != undefined;
+    }, "waiting for primary to be available", 30000, 1000);
 
     assert(recipientPrimary);
     assert.commandWorked(recipientPrimary.getDB('foo').bar.insert({fake: 'document'}));
diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_aborted.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_aborted.js
new file mode 100644
index 00000000000..1c5af907e79
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_aborted.js
@@ -0,0 +1,108 @@
+/**
+ * Tests that the donor
+ * - does not rejects reads with atClusterTime/afterClusterTime >= blockTimestamp reads and
+ * linearizable reads after the split aborts.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js");
+
+const kCollName = "testColl";
+const kTenantDefinedDbName = "0";
+
+/**
+ * Tests that after the split abort, the donor does not reject linearizable reads or reads with
+ * atClusterTime/afterClusterTime >= blockTimestamp.
+ */
+function testDoNotRejectReadsAfterMigrationAborted(testCase, dbName, collName) {
+    const tenantId = dbName.split('_')[0];
+    const donorDoc = findSplitOperation(donorPrimary, operation.migrationId);
+    const nodes = testCase.isSupportedOnSecondaries ? donorRst.nodes : [donorPrimary];
+    nodes.forEach(node => {
+        const db = node.getDB(dbName);
+        if (testCase.requiresReadTimestamp) {
+            runCommandForConcurrentReadTest(db,
+                                            testCase.command(collName, donorDoc.blockTimestamp),
+                                            null,
+                                            testCase.isTransaction);
+            runCommandForConcurrentReadTest(
+                db,
+                testCase.command(collName, donorDoc.commitOrAbortOpTime.ts),
+                null,
+                testCase.isTransaction);
+            BasicServerlessTest.checkShardSplitAccessBlocker(
+                node, tenantId, {numTenantMigrationAbortedErrors: 0});
+        } else {
+            runCommandForConcurrentReadTest(
+                db, testCase.command(collName), null, testCase.isTransaction);
+            BasicServerlessTest.checkShardSplitAccessBlocker(
+                node, tenantId, {numTenantMigrationAbortedErrors: 0});
+        }
+    });
+}
+
+const testCases = shardSplitConcurrentReadTestCases;
+
+const test = new BasicServerlessTest({
+    recipientTagName: "recipientTag",
+    recipientSetName: "recipientSet",
+    quickGarbageCollection: true
+});
+test.addRecipientNodes();
+
+const tenantId = "tenantId";
+
+const donorRst = test.donor;
+const donorPrimary = test.getDonorPrimary();
+
+// Force the donor to preserve all snapshot history to ensure that transactional reads do not
+// fail with TransientTransactionError "Read timestamp is older than the oldest available
+// timestamp".
+donorRst.nodes.forEach(node => {
+    configureFailPoint(node, "WTPreserveSnapshotHistoryIndefinitely");
+});
+
+let blockFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking");
+
+const operation = test.createSplitOperation([tenantId]);
+const splitThread = operation.commitAsync();
+
+blockFp.wait();
+operation.abort();
+
+blockFp.off();
+
+splitThread.join();
+assert.commandFailed(splitThread.returnData());
+assertMigrationState(donorPrimary, operation.migrationId, "aborted");
+
+// Wait for the last oplog entry on the primary to be visible in the committed snapshot view of
+// the oplog on all the secondaries. This is to ensure that snapshot reads on secondaries with
+// unspecified atClusterTime have read timestamp >= abortTimestamp.
+donorRst.awaitLastOpCommitted();
+
+for (const [testCaseName, testCase] of Object.entries(testCases)) {
+    jsTest.log(`Testing inAborted with testCase ${testCaseName}`);
+    const dbName = `${tenantId}_${testCaseName}-inAborted-${kTenantDefinedDbName}`;
+    testDoNotRejectReadsAfterMigrationAborted(testCase, dbName, kCollName);
+}
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js
new file mode 100644
index 00000000000..d3bb1d3e85b
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js
@@ -0,0 +1,112 @@
+/**
+ * Tests that the donor
+ * - blocks reads with atClusterTime/afterClusterTime >= blockTimestamp that are executed while the
+ *   split is in the blocking state but does not block linearizable reads.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js");
+
+const kCollName = "testColl";
+const kTenantDefinedDbName = "0";
+
+const kMaxTimeMS = 1 * 1000;
+
+/**
+ * Tests that in the blocking state, the donor blocks reads with atClusterTime/afterClusterTime >=
+ * blockTimestamp but does not block linearizable reads.
+ */
+let countBlockedReadsPrimary = 0;
+let countBlockedReadsSecondaries = 0;
+function testBlockReadsAfterMigrationEnteredBlocking(testCase, primary, dbName, collName) {
+    const donorDoc = findSplitOperation(primary, operation.migrationId);
+    const command = testCase.requiresReadTimestamp
+        ? testCase.command(collName, donorDoc.blockTimestamp)
+        : testCase.command(collName);
+    const shouldBlock = !testCase.isLinearizableRead;
+    if (shouldBlock) {
+        command.maxTimeMS = kMaxTimeMS;
+        countBlockedReadsPrimary += 1;
+    }
+    let nodes = [primary];
+    if (testCase.isSupportedOnSecondaries) {
+        nodes = donorRst.nodes;
+
+        if (shouldBlock) {
+            countBlockedReadsSecondaries += 1;
+        }
+    }
+    nodes.forEach(node => {
+        const db = node.getDB(dbName);
+        runCommandForConcurrentReadTest(
+            db, command, shouldBlock ? ErrorCodes.MaxTimeMSExpired : null, testCase.isTransaction);
+    });
+}
+
+const testCases = shardSplitConcurrentReadTestCases;
+
+const tenantId = "tenantId";
+const test = new BasicServerlessTest({
+    recipientTagName: "recipientTag",
+    recipientSetName: "recipientSet",
+    quickGarbageCollection: true
+});
+test.addRecipientNodes();
+
+const donorRst = test.donor;
+const donorPrimary = donorRst.getPrimary();
+
+let blockingFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking");
+
+const operation = test.createSplitOperation([tenantId]);
+const splitThread = operation.commitAsync();
+
+// Wait for the split to enter the blocking state.
+blockingFp.wait();
+
+// Wait for the last oplog entry on the primary to be visible in the committed snapshot view of
+// the oplog on all secondaries to ensure that snapshot reads on the secondaries with
+// unspecified atClusterTime have read timestamp >= blockTimestamp.
+donorRst.awaitLastOpCommitted();
+
+for (const [testCaseName, testCase] of Object.entries(testCases)) {
+    jsTest.log(`Testing inBlocking with testCase ${testCaseName}`);
+    const dbName = `${tenantId}_${testCaseName}-inBlocking-${kTenantDefinedDbName}`;
+    testBlockReadsAfterMigrationEnteredBlocking(testCase, donorPrimary, dbName, kCollName);
+}
+
+// check on primary
+BasicServerlessTest.checkShardSplitAccessBlocker(
+    donorPrimary, tenantId, {numBlockedReads: countBlockedReadsPrimary});
+
+// check on secondaries
+const secondaries = donorRst.getSecondaries();
+secondaries.forEach(node => {
+    BasicServerlessTest.checkShardSplitAccessBlocker(
+        node, tenantId, {numBlockedReads: countBlockedReadsSecondaries});
+});
+
+blockingFp.off();
+
+splitThread.join();
+assert.commandWorked(splitThread.returnData());
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_aborted.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_aborted.js
new file mode 100644
index 00000000000..bb76d0aa4aa
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_aborted.js
@@ -0,0 +1,133 @@
+/**
+ * Tests that the donor
+ * - blocks reads with atClusterTime/afterClusterTime >= blockTimestamp that are executed while the
+ *   split is in the blocking state but does not block linearizable reads.
+ * - does not reject reads with atClusterTime/afterClusterTime >= blockTimestamp and linearizable
+ *   reads after the split aborts.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js");
+
+const kCollName = "testColl";
+const kTenantDefinedDbName = "0";
+
+function getTenantId(dbName) {
+    return dbName.split('_')[0];
+}
+
+/**
+ * To be used to resume a split that is paused after entering the blocking state. Waits for the
+ * number of blocked reads to reach 'targetNumBlockedReads' and unpauses the split.
+ */
+function resumeMigrationAfterBlockingRead(host, tenantId, targetNumBlockedReads) {
+    load("jstests/libs/fail_point_util.js");
+    load("jstests/serverless/libs/basic_serverless_test.js");
+    const primary = new Mongo(host);
+
+    assert.soon(() => BasicServerlessTest.getNumBlockedReads(primary, tenantId) ==
+                    targetNumBlockedReads);
+
+    assert.commandWorked(
+        primary.adminCommand({configureFailPoint: "pauseShardSplitAfterBlocking", mode: "off"}));
+}
+
+/**
+ * Tests that the donor unblocks blocked reads (reads with atClusterTime/afterClusterTime >=
+ * blockingTimestamp) once the split aborts.
+ */
+function testUnblockBlockedReadsAfterMigrationAborted(testCase, dbName, collName) {
+    if (testCase.isLinearizableRead) {
+        // Linearizable reads are not blocked.
+        return;
+    }
+
+    const tenantId = getTenantId(dbName);
+    const test = new BasicServerlessTest({
+        recipientTagName: "recipientTag",
+        recipientSetName: "recipientSet",
+        quickGarbageCollection: true
+    });
+    test.addRecipientNodes();
+
+    const donorRst = test.donor;
+    const donorPrimary = test.getDonorPrimary();
+
+    let blockingFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking");
+    let abortFp = configureFailPoint(donorPrimary, "abortShardSplitBeforeLeavingBlockingState");
+    const operation = test.createSplitOperation([tenantId]);
+
+    // Run the commands after the split enters the blocking state.
+    const splitThread = operation.commitAsync();
+
+    let resumeMigrationThread =
+        new Thread(resumeMigrationAfterBlockingRead, donorPrimary.host, tenantId, 1);
+
+    // Run the commands after the split enters the blocking state.
+    resumeMigrationThread.start();
+    blockingFp.wait();
+
+    // Wait for the last oplog entry on the primary to be visible in the committed snapshot view of
+    // the oplog on all secondaries to ensure that snapshot reads on the secondaries with
+    // unspecified atClusterTime have read timestamp >= blockTimestamp.
+    donorRst.awaitLastOpCommitted();
+
+    const donorDoc = findSplitOperation(donorPrimary, operation.migrationId);
+    const command = testCase.requiresReadTimestamp
+        ? testCase.command(collName, donorDoc.blockTimestamp)
+        : testCase.command(collName);
+
+    // The split should unpause and abort after the read is blocked. Verify that the read
+    // unblocks.
+    const db = donorPrimary.getDB(dbName);
+    runCommandForConcurrentReadTest(db, command, null, testCase.isTransaction);
+    if (testCase.isSupportedOnSecondaries) {
+        const primaryPort = String(donorPrimary).split(":")[1];
+        const secondaries = donorRst.nodes.filter(node => node.port != primaryPort);
+        secondaries.forEach(node => {
+            const db = node.getDB(dbName);
+            runCommandForConcurrentReadTest(db, command, null, testCase.isTransaction);
+        });
+    }
+
+    const shouldBlock = !testCase.isLinearizableRead;
+    BasicServerlessTest.checkShardSplitAccessBlocker(donorPrimary, tenantId, {
+        numBlockedReads: shouldBlock ? 1 : 0,
+        // Reads just get unblocked if the split aborts.
+        numTenantMigrationAbortedErrors: 0
+    });
+
+    jsTestLog("Joining");
+    splitThread.join();
+    assert.commandFailed(splitThread.returnData());
+
+    resumeMigrationThread.join();
+    abortFp.off();
+    test.stop();
+}
+
+const testCases = shardSplitConcurrentReadTestCases;
+
+for (const [testCaseName, testCase] of Object.entries(testCases)) {
+    jsTest.log(`Testing inBlockingThenAborted with testCase ${testCaseName}`);
+    const dbName = `${testCaseName}-inBlockingThenAborted_${kTenantDefinedDbName}`;
+    testUnblockBlockedReadsAfterMigrationAborted(testCase, dbName, kCollName);
+}
+})();
diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_committed.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_committed.js
new file mode 100644
index 00000000000..6505439c7cb
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking_then_committed.js
@@ -0,0 +1,127 @@
+/**
+ * Tests that the donor
+ * - rejects reads with atClusterTime/afterClusterTime >= blockTimestamp reads and linearizable
+ *   reads after the split commits.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js");
+
+const kCollName = "testColl";
+const kTenantDefinedDbName = "0";
+
+function getTenantId(dbName) {
+    return dbName.split('_')[0];
+}
+
+/**
+ * To be used to resume a split that is paused after entering the blocking state. Waits for the
+ * number of blocked reads to reach 'targetNumBlockedReads' and unpauses the split.
+ */
+function resumeMigrationAfterBlockingRead(host, tenantId, targetNumBlockedReads) {
+    load("jstests/libs/fail_point_util.js");
+    load("jstests/serverless/libs/basic_serverless_test.js");
+    const primary = new Mongo(host);
+
+    assert.soon(() => BasicServerlessTest.getNumBlockedReads(primary, tenantId) ==
+                    targetNumBlockedReads);
+
+    assert.commandWorked(
+        primary.adminCommand({configureFailPoint: "pauseShardSplitAfterBlocking", mode: "off"}));
+}
+
+/**
+ * Tests that the donor rejects the blocked reads (reads with atClusterTime/afterClusterTime >=
+ * blockingTimestamp) once the split commits.
+ */
+function testRejectBlockedReadsAfterMigrationCommitted(testCase, dbName, collName) {
+    if (testCase.isLinearizableRead) {
+        // Linearizable reads are not blocked.
+        return;
+    }
+
+    const tenantId = getTenantId(dbName);
+    const test = new BasicServerlessTest({
+        recipientTagName: "recipientTag",
+        recipientSetName: "recipientSet",
+        quickGarbageCollection: true
+    });
+    test.addRecipientNodes();
+
+    const donorRst = test.donor;
+    const donorPrimary = test.getDonorPrimary();
+
+    let blockingFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking");
+
+    const operation = test.createSplitOperation([tenantId]);
+
+    let resumeMigrationThread =
+        new Thread(resumeMigrationAfterBlockingRead, donorPrimary.host, tenantId, 1);
+    resumeMigrationThread.start();
+
+    // Run the commands after the split enters the blocking state.
+    const splitThread = operation.commitAsync();
+    blockingFp.wait();
+
+    // Wait for the last oplog entry on the primary to be visible in the committed snapshot view of
+    // the oplog on all secondaries to ensure that snapshot reads on the secondaries with
+    // unspecified atClusterTime have read timestamp >= blockTimestamp.
+    donorRst.awaitLastOpCommitted();
+
+    const donorDoc = findSplitOperation(donorPrimary, operation.migrationId);
+    const command = testCase.requiresReadTimestamp
+        ? testCase.command(collName, donorDoc.blockTimestamp)
+        : testCase.command(collName);
+
+    // The split should unpause and commit after the read is blocked. Verify that the read
+    // is rejected on donor nodes.
+    const db = donorPrimary.getDB(dbName);
+    runCommandForConcurrentReadTest(
+        db, command, ErrorCodes.TenantMigrationCommitted, testCase.isTransaction);
+    if (testCase.isSupportedOnSecondaries) {
+        const primaryPort = String(donorPrimary).split(":")[1];
+        const secondaries = donorRst.nodes.filter(node => node.port != primaryPort);
+        secondaries.filter(node => !test.recipientNodes.includes(node)).forEach(node => {
+            const db = node.getDB(dbName);
+            runCommandForConcurrentReadTest(
+                db, command, ErrorCodes.TenantMigrationCommitted, testCase.isTransaction);
+        });
+    }
+
+    BasicServerlessTest.checkShardSplitAccessBlocker(
+        donorPrimary, tenantId, {numBlockedReads: 1, numTenantMigrationCommittedErrors: 1});
+
+    resumeMigrationThread.join();
+    // Verify that the split succeeded.
+    splitThread.join();
+    assert.commandWorked(splitThread.returnData());
+    test.removeAndStopRecipientNodes();
+
+    test.stop();
+}
+
+const testCases = shardSplitConcurrentReadTestCases;
+
+for (const [testCaseName, testCase] of Object.entries(testCases)) {
+    jsTest.log(`Testing inBlockingThenCommitted with testCase ${testCaseName}`);
+    const dbName = `${testCaseName}-inBlockingThenCommitted_${kTenantDefinedDbName}`;
+    testRejectBlockedReadsAfterMigrationCommitted(testCase, dbName, kCollName);
+}
+})();
diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_committed.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_committed.js
new file mode 100644
index 00000000000..16e16799fe6
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_committed.js
@@ -0,0 +1,128 @@
+/**
+ * Tests that the donor
+ * - rejects reads with atClusterTime/afterClusterTime >= blockTimestamp reads and linearizable
+ *   reads after the split commits.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+load("jstests/serverless/shard_split_concurrent_reads_on_donor_util.js");
+
+const kCollName = "testColl";
+const kTenantDefinedDbName = "0";
+
+/**
+ * Tests that after the split commits, the donor rejects linearizable reads and reads with
+ * atClusterTime/afterClusterTime >= blockTimestamp.
+ */
+let countTenantMigrationCommittedErrorsPrimary = 0;
+let countTenantMigrationCommittedErrorsSecondaries = 0;
+function testRejectReadsAfterMigrationCommitted(testCase, primary, dbName, collName, migrationId) {
+    const donorDoc = findSplitOperation(primary, migrationId);
+
+    let nodes = [primary];
+    if (testCase.isSupportedOnSecondaries) {
+        nodes = donorRst.nodes;
+
+        if (testCase.requiresReadTimestamp) {
+            countTenantMigrationCommittedErrorsSecondaries += 2;
+        } else {
+            countTenantMigrationCommittedErrorsSecondaries += 1;
+        }
+    }
+
+    if (testCase.requiresReadTimestamp) {
+        countTenantMigrationCommittedErrorsPrimary += 2;
+    } else {
+        countTenantMigrationCommittedErrorsPrimary += 1;
+    }
+
+    nodes.forEach(node => {
+        const db = node.getDB(dbName);
+        if (testCase.requiresReadTimestamp) {
+            runCommandForConcurrentReadTest(db,
+                                            testCase.command(collName, donorDoc.blockTimestamp),
+                                            ErrorCodes.TenantMigrationCommitted,
+                                            testCase.isTransaction);
+            runCommandForConcurrentReadTest(
+                db,
+                testCase.command(collName, donorDoc.commitOrAbortOpTime.ts),
+                ErrorCodes.TenantMigrationCommitted,
+                testCase.isTransaction);
+        } else {
+            runCommandForConcurrentReadTest(db,
+                                            testCase.command(collName),
+                                            ErrorCodes.TenantMigrationCommitted,
+                                            testCase.isTransaction);
+        }
+    });
+}
+
+const testCases = shardSplitConcurrentReadTestCases;
+
+const test = new BasicServerlessTest({
+    recipientTagName: "recipientTag",
+    recipientSetName: "recipientSet",
+    quickGarbageCollection: true
+});
+test.addRecipientNodes();
+
+const tenantId = "tenantId";
+
+let donorRst = test.donor;
+const donorPrimary = test.getDonorPrimary();
+
+// Force the donor to preserve all snapshot history to ensure that transactional reads do not
+// fail with TransientTransactionError "Read timestamp is older than the oldest available
+// timestamp".
+donorRst.nodes.forEach(node => {
+    configureFailPoint(node, "WTPreserveSnapshotHistoryIndefinitely");
+});
+
+const operation = test.createSplitOperation([tenantId]);
+assert.commandWorked(operation.commit());
+
+test.removeRecipientNodesFromDonor();
+
+// Wait for the last oplog entry on the primary to be visible in the committed snapshot view of
+// the oplog on all the secondaries. This is to ensure that snapshot reads on secondaries with
+// unspecified atClusterTime have read timestamp >= commitTimestamp.
+donorRst.awaitLastOpCommitted();
+
+for (const [testCaseName, testCase] of Object.entries(testCases)) {
+    jsTest.log(`Testing inCommitted with testCase ${testCaseName}`);
+    const dbName = `${tenantId}_${testCaseName}-inCommitted-${kTenantDefinedDbName}`;
+    testRejectReadsAfterMigrationCommitted(
+        testCase, donorPrimary, dbName, kCollName, operation.migrationId);
+}
+
+// check on primary
+BasicServerlessTest.checkShardSplitAccessBlocker(donorPrimary, tenantId, {
+    numTenantMigrationCommittedErrors: countTenantMigrationCommittedErrorsPrimary
+});
+let secondaries = donorRst.getSecondaries();
+// check on secondaries
+secondaries.forEach(node => {
+    BasicServerlessTest.checkShardSplitAccessBlocker(node, tenantId, {
+        numTenantMigrationCommittedErrors: countTenantMigrationCommittedErrorsSecondaries
+    });
+});
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_util.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_util.js
new file mode 100644
index 00000000000..2403d21aecd
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_util.js
@@ -0,0 +1,126 @@
+/**
+ * This utility file is used to list the different test cases needed for the
+ * shard_split_concurrent_reads_on_donor*tests.
+ */
+
+'use strict';
+
+function runCommandForConcurrentReadTest(db, cmd, expectedError, isTransaction) {
+    const res = db.runCommand(cmd);
+
+    if (expectedError) {
+        assert.commandFailedWithCode(res, expectedError, tojson(cmd));
+        // The 'TransientTransactionError' label is attached only in a scope of a transaction.
+        if (isTransaction &&
+            (expectedError == ErrorCodes.TenantMigrationAborted ||
+             expectedError == ErrorCodes.TenantMigrationCommitted)) {
+            assert(res["errorLabels"] != null, "Error labels are absent from " + tojson(res));
+            const expectedErrorLabels = ['TransientTransactionError'];
+            assert.sameMembers(res["errorLabels"],
+                               expectedErrorLabels,
+                               "Error labels " + tojson(res["errorLabels"]) +
+                                   " are different from expected " + expectedErrorLabels);
+        }
+    } else {
+        assert.commandWorked(res);
+    }
+
+    if (cmd.lsid) {
+        assert.commandWorked(db.runCommand({killSessions: [cmd.lsid]}));
+    }
+}
+
+const shardSplitConcurrentReadTestCases = {
+    snapshotReadWithAtClusterTime: {
+        isSupportedOnSecondaries: true,
+        requiresReadTimestamp: true,
+        command: function(collName, readTimestamp) {
+            return {
+                find: collName,
+                readConcern: {
+                    level: "snapshot",
+                    atClusterTime: readTimestamp,
+                }
+            };
+        },
+    },
+    snapshotReadWithoutAtClusterTime: {
+        isSupportedOnSecondaries: true,
+        command: function(collName) {
+            return {
+                find: collName,
+                readConcern: {
+                    level: "snapshot",
+                }
+            };
+        },
+    },
+    snapshotReadWithAtClusterTimeInTxn: {
+        isSupportedOnSecondaries: false,
+        requiresReadTimestamp: true,
+        isTransaction: true,
+        command: function(collName, readTimestamp) {
+            return {
+                find: collName,
+                lsid: {id: UUID()},
+                txnNumber: NumberLong(0),
+                startTransaction: true,
+                autocommit: false,
+                readConcern: {level: "snapshot", atClusterTime: readTimestamp}
+            };
+        }
+    },
+    snapshotReadWithoutAtClusterTimeInTxn: {
+        isSupportedOnSecondaries: false,
+        isTransaction: true,
+        command: function(collName) {
+            return {
+                find: collName,
+                lsid: {id: UUID()},
+                txnNumber: NumberLong(0),
+                startTransaction: true,
+                autocommit: false,
+                readConcern: {level: "snapshot"}
+            };
+        }
+    },
+    readWithAfterClusterTime: {
+        isSupportedOnSecondaries: true,
+        requiresReadTimestamp: true,
+        command: function(collName, readTimestamp) {
+            return {
+                find: collName,
+                readConcern: {
+                    afterClusterTime: readTimestamp,
+                }
+            };
+        },
+    },
+    readWithAfterClusterTimeInTxn: {
+        isSupportedOnSecondaries: false,
+        requiresReadTimestamp: true,
+        isTransaction: true,
+        command: function(collName, readTimestamp) {
+            return {
+                find: collName,
+                lsid: {id: UUID()},
+                txnNumber: NumberLong(0),
+                startTransaction: true,
+                autocommit: false,
+                readConcern: {
+                    afterClusterTime: readTimestamp,
+                }
+            };
+        },
+    },
+    linearizableRead: {
+        isSupportedOnSecondaries: false,
+        isLinearizableRead: true,
+        command: function(collName) {
+            return {
+                find: collName,
+                readConcern: {level: "linearizable"},
+            };
+        }
+    }
+};
diff --git a/jstests/serverless/shard_split_concurrent_writes_on_donor_aborted.js b/jstests/serverless/shard_split_concurrent_writes_on_donor_aborted.js
new file mode 100644
index 00000000000..b7b59387df7
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_writes_on_donor_aborted.js
@@ -0,0 +1,154 @@
+/**
+ * Tests that the donor accepts writes after the shard split aborts.
+ *
+ * @tags: [
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+TestData.skipCheckDBHashes = true;
+const recipientTagName = "recipientNode";
+const recipientSetName = "recipient";
+const tenantMigrationTest = new BasicServerlessTest({
+    recipientTagName,
+    recipientSetName,
+    quickGarbageCollection: true,
+    allowStaleReadsOnDonor: true,
+    initiateWithShortElectionTimeout: true
+});
+
+const donorPrimary = tenantMigrationTest.getDonorPrimary();
+
+const kCollName = "testColl";
+const kTenantDefinedDbName = "0";
+
+const testCases = TenantMigrationConcurrentWriteUtil.testCases;
+const kTenantID = "tenantId";
+
+function setupTest(testCase, collName, testOpts) {
+    if (testCase.explicitlyCreateCollection) {
+        createCollectionAndInsertDocsForConcurrentWritesTest(
+            testOpts.primaryDB, collName, testCase.isCapped);
+    }
+
+    if (testCase.setUp) {
+        testCase.setUp(testOpts.primaryDB, collName, testOpts.testInTransaction);
+    }
+}
+
+/**
+ * Tests that the donor does not reject writes after the migration aborts.
+ */
+function testDoNotRejectWritesAfterMigrationAborted(testCase, testOpts) {
+    const tenantId = testOpts.dbName.split('_')[0];
+
+    // Wait until the in-memory migration state is updated after the migration has majority
+    // committed the abort decision. Otherwise, the command below is expected to block and then get
+    // rejected.
+    assert.soon(() => {
+        const mtab = BasicServerlessTest.getTenantMigrationAccessBlocker(
+            {node: testOpts.primaryDB, tenantId});
+        return mtab.donor.state === TenantMigrationTest.DonorAccessState.kAborted;
+    });
+
+    runCommandForConcurrentWritesTest(testOpts);
+    testCase.assertCommandSucceeded(testOpts.primaryDB, testOpts.dbName, testOpts.collName);
+    BasicServerlessTest.checkShardSplitAccessBlocker(
+        testOpts.primaryDB, tenantId, {numTenantMigrationAbortedErrors: 0});
+}
+
+const testOptsMap = {};
+
+/**
+ * run the setup for each cases before the migration starts
+ */
+function setupTestsBeforeMigration() {
+    for (const [commandName, testCase] of Object.entries(testCases)) {
+        let baseDbName = kTenantID + "_" + commandName + "-inCommitted0";
+
+        if (testCase.skip) {
+            print("Skipping " + commandName + ": " + testCase.skip);
+            continue;
+        }
+
+        let basicFullDb = baseDbName + "Basic-" + kTenantDefinedDbName;
+        const basicTestOpts = makeTestOptionsForConcurrentWritesTest(
+            donorPrimary, testCase, basicFullDb, kCollName, false, false);
+        testOptsMap[basicFullDb] = basicTestOpts;
+        setupTest(testCase, kCollName, basicTestOpts);
+
+        if (testCase.testInTransaction) {
+            let TxnFullDb = baseDbName + "Txn-" + kTenantDefinedDbName;
+            const txnTestOpts = makeTestOptionsForConcurrentWritesTest(
+                donorPrimary, testCase, TxnFullDb, kCollName, true, false);
+            testOptsMap[TxnFullDb] = txnTestOpts;
+            setupTest(testCase, kCollName, txnTestOpts);
+        }
+
+        if (testCase.testAsRetryableWrite) {
+            let retryableFullDb = baseDbName + "Retryable-" + kTenantDefinedDbName;
+            const retryableTestOpts = makeTestOptionsForConcurrentWritesTest(
+                donorPrimary, testCase, retryableFullDb, kCollName, false, true);
+            testOptsMap[retryableFullDb] = retryableTestOpts;
+            setupTest(testCase, kCollName, retryableTestOpts);
+        }
+    }
+}
+
+/**
+ * Run the test cases after the migration has aborted.
+ */
+function runTestsAfterMigration() {
+    for (const [commandName, testCase] of Object.entries(testCases)) {
+        let baseDbName = kTenantID + "_" + commandName + "-inCommitted0";
+        if (testCase.skip) {
+            continue;
+        }
+
+        const basicTesTOpts = testOptsMap[baseDbName + "Basic-" + kTenantDefinedDbName];
+        testDoNotRejectWritesAfterMigrationAborted(testCase, basicTesTOpts);
+
+        if (testCase.testInTransaction) {
+            const txnTesTOpts = testOptsMap[baseDbName + "Txn-" + kTenantDefinedDbName];
+            testDoNotRejectWritesAfterMigrationAborted(testCase, txnTesTOpts);
+        }
+
+        if (testCase.testAsRetryableWrite) {
+            const retryableTestOpts = testOptsMap[baseDbName + "Retryable-" + kTenantDefinedDbName];
+            testDoNotRejectWritesAfterMigrationAborted(testCase, retryableTestOpts);
+        }
+    }
+}
+
+const abortFp = configureFailPoint(donorPrimary, "abortShardSplitBeforeLeavingBlockingState");
+
+tenantMigrationTest.addRecipientNodes();
+const tenantIds = [kTenantID];
+const operation = tenantMigrationTest.createSplitOperation(tenantIds);
+
+setupTestsBeforeMigration();
+
+operation.commit({retryOnRetryableErrors: false}, {enableDonorStartMigrationFsync: true});
+assertMigrationState(tenantMigrationTest.getDonorPrimary(), operation.migrationId, "aborted");
+
+abortFp.off();
+
+runTestsAfterMigration();
+
+tenantMigrationTest.stop();
+})();
diff --git a/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js b/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js
new file mode 100644
index 00000000000..18d22487df2
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js
@@ -0,0 +1,194 @@
+/**
+ * Tests that the donor blocks writes that are executed while the shard split in the blocking state,
+ * then rejects the writes when the migration completes.
+ *
+ * @tags: [
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+TestData.skipCheckDBHashes = true;
+const recipientTagName = "recipientNode";
+const recipientSetName = "recipient";
+const tenantMigrationTest = new BasicServerlessTest({
+    recipientTagName,
+    recipientSetName,
+    quickGarbageCollection: true,
+    allowStaleReadsOnDonor: true,
+    initiateWithShortElectionTimeout: true
+});
+
+const donorPrimary = tenantMigrationTest.getDonorPrimary();
+
+const kCollName = "testColl";
+const kTenantDefinedDbName = "0";
+
+const testCases = TenantMigrationConcurrentWriteUtil.testCases;
+const kTenantID = "tenantId";
+
+const kMaxTimeMS = 1 * 1000;
+
+let countBlockedWrites = 0;
+
+/**
+ * Tests that the donor blocks writes that are executed in the blocking state and increase the
+ * countBlockedWrites count.
+ */
+function testBlockWritesAfterMigrationEnteredBlocking(testOpts) {
+    testOpts.command.maxTimeMS = kMaxTimeMS;
+    runCommandForConcurrentWritesTest(testOpts, ErrorCodes.MaxTimeMSExpired);
+}
+
+function setupTest(testCase, collName, testOpts) {
+    if (testCase.explicitlyCreateCollection) {
+        createCollectionAndInsertDocsForConcurrentWritesTest(
+            testOpts.primaryDB, collName, testCase.isCapped);
+    }
+
+    if (testCase.setUp) {
+        testCase.setUp(testOpts.primaryDB, collName, testOpts.testInTransaction);
+    }
+}
+
+const testOptsMap = {};
+
+/**
+ * run the setup for each cases before the migration starts
+ */
+function setupTestsBeforeMigration() {
+    for (const [commandName, testCase] of Object.entries(testCases)) {
+        let baseDbName = kTenantID + "_" + commandName + "-inCommitted0";
+
+        if (testCase.skip) {
+            print("Skipping " + commandName + ": " + testCase.skip);
+            continue;
+        }
+
+        let basicFullDb = baseDbName + "Basic-" + kTenantDefinedDbName;
+        const basicTestOpts = makeTestOptionsForConcurrentWritesTest(
+            donorPrimary, testCase, basicFullDb, kCollName, false, false);
+        testOptsMap[basicFullDb] = basicTestOpts;
+        setupTest(testCase, kCollName, basicTestOpts);
+
+        if (testCase.testInTransaction) {
+            let TxnFullDb = baseDbName + "Txn-" + kTenantDefinedDbName;
+            const txnTestOpts = makeTestOptionsForConcurrentWritesTest(
+                donorPrimary, testCase, TxnFullDb, kCollName, true, false);
+            testOptsMap[TxnFullDb] = txnTestOpts;
+            setupTest(testCase, kCollName, txnTestOpts);
+        }
+
+        if (testCase.testAsRetryableWrite) {
+            let retryableFullDb = baseDbName + "Retryable-" + kTenantDefinedDbName;
+            const retryableTestOpts = makeTestOptionsForConcurrentWritesTest(
+                donorPrimary, testCase, retryableFullDb, kCollName, false, true);
+            testOptsMap[retryableFullDb] = retryableTestOpts;
+            setupTest(testCase, kCollName, retryableTestOpts);
+        }
+    }
+}
+
+/**
+ * Run the test cases after the migration has committed
+ */
+function runTestsWhileBlocking() {
+    for (const [commandName, testCase] of Object.entries(testCases)) {
+        let baseDbName = kTenantID + "_" + commandName + "-inCommitted0";
+        if (testCase.skip) {
+            continue;
+        }
+
+        testBlockWritesAfterMigrationEnteredBlocking(
+            testOptsMap[baseDbName + "Basic-" + kTenantDefinedDbName]);
+        countBlockedWrites += 1;
+
+        if (testCase.testInTransaction) {
+            testBlockWritesAfterMigrationEnteredBlocking(
+                testOptsMap[baseDbName + "Txn-" + kTenantDefinedDbName]);
+            countBlockedWrites += 1;
+        }
+
+        if (testCase.testAsRetryableWrite) {
+            testBlockWritesAfterMigrationEnteredBlocking(
+                testOptsMap[baseDbName + "Retryable-" + kTenantDefinedDbName]);
+            countBlockedWrites += 1;
+        }
+    }
+}
+
+/**
+ * Run the test cases after the migration has committed
+ */
+function runTestsAfterMigrationCommitted() {
+    for (const [commandName, testCase] of Object.entries(testCases)) {
+        let baseDbName = kTenantID + "_" + commandName + "-inCommitted0";
+        if (testCase.skip) {
+            continue;
+        }
+
+        const basicTesTOpts = testOptsMap[baseDbName + "Basic-" + kTenantDefinedDbName];
+        testCase.assertCommandFailed(
+            basicTesTOpts.primaryDB, basicTesTOpts.dbName, basicTesTOpts.collName);
+
+        if (testCase.testInTransaction) {
+            const txnTesTOpts = testOptsMap[baseDbName + "Txn-" + kTenantDefinedDbName];
+            testCase.assertCommandFailed(
+                txnTesTOpts.primaryDB, txnTesTOpts.dbName, txnTesTOpts.collName);
+        }
+
+        if (testCase.testAsRetryableWrite) {
+            const retryableTestOpts = testOptsMap[baseDbName + "Retryable-" + kTenantDefinedDbName];
+            testCase.assertCommandFailed(
+                retryableTestOpts.primaryDB, retryableTestOpts.dbName, retryableTestOpts.collName);
+        }
+    }
+}
+
+tenantMigrationTest.addRecipientNodes();
+const tenantIds = [kTenantID];
+const operation = tenantMigrationTest.createSplitOperation(tenantIds);
+
+setupTestsBeforeMigration();
+
+let blockFp = configureFailPoint(donorPrimary, "pauseShardSplitAfterBlocking");
+
+// start a shard split operation asynchronously.
+const splitThread = operation.commitAsync();
+
+// Run the command after the migration enters the blocking state.
+blockFp.wait();
+
+// Run test cases while the migration is in blocking state.
+runTestsWhileBlocking();
+
+// Allow the migration to complete.
+blockFp.off();
+splitThread.join();
+
+const data = splitThread.returnData();
+assert.commandWorked(data);
+assert.eq(data.state, "committed");
+
+// run test after blocking is over and the migration committed.
+runTestsAfterMigrationCommitted();
+
+BasicServerlessTest.checkShardSplitAccessBlocker(
+    donorPrimary, kTenantID, {numBlockedWrites: countBlockedWrites});
+
+tenantMigrationTest.stop();
+})();
diff --git a/jstests/serverless/shard_split_concurrent_writes_on_donor_committed.js b/jstests/serverless/shard_split_concurrent_writes_on_donor_committed.js
new file mode 100644
index 00000000000..19db6591205
--- /dev/null
+++ b/jstests/serverless/shard_split_concurrent_writes_on_donor_committed.js
@@ -0,0 +1,145 @@
+/**
+ * Tests that the donor blocks writes that are executed after the shard split committed state are
+ * rejected.
+ * @tags: [
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/tenant_migration_concurrent_writes_on_donor_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+TestData.skipCheckDBHashes = true;
+const recipientTagName = "recipientNode";
+const recipientSetName = "recipient";
+const tenantMigrationTest = new BasicServerlessTest({
+    recipientTagName,
+    recipientSetName,
+    quickGarbageCollection: true,
+    allowStaleReadsOnDonor: true,
+    initiateWithShortElectionTimeout: true
+});
+
+const donorPrimary = tenantMigrationTest.getDonorPrimary();
+
+const kCollName = "testColl";
+const kTenantDefinedDbName = "0";
+
+const testCases = TenantMigrationConcurrentWriteUtil.testCases;
+const kTenantID = "tenantId";
+
+let countTenantMigrationCommittedErrors = 0;
+
+function setupTest(testCase, collName, testOpts) {
+    if (testCase.explicitlyCreateCollection) {
+        createCollectionAndInsertDocsForConcurrentWritesTest(
+            testOpts.primaryDB, collName, testCase.isCapped);
+    }
+
+    if (testCase.setUp) {
+        testCase.setUp(testOpts.primaryDB, collName, testOpts.testInTransaction);
+    }
+}
+
+/**
+ * Tests that the donor rejects writes after a migration has already committed.
+ */
+function testRejectWritesAfterMigrationCommitted(testCase, testOpts) {
+    runCommandForConcurrentWritesTest(testOpts, ErrorCodes.TenantMigrationCommitted);
+    testCase.assertCommandFailed(testOpts.primaryDB, testOpts.dbName, testOpts.collName);
+}
+
+const testOptsMap = {};
+
+/**
+ * run the setup for each cases before the migration starts
+ */
+function setupTestsBeforeMigration() {
+    for (const [commandName, testCase] of Object.entries(testCases)) {
+        let baseDbName = kTenantID + "_" + commandName + "-inCommitted0";
+
+        if (testCase.skip) {
+            print("Skipping " + commandName + ": " + testCase.skip);
+            continue;
+        }
+
+        let basicFullDb = baseDbName + "Basic-" + kTenantDefinedDbName;
+        const basicTestOpts = makeTestOptionsForConcurrentWritesTest(
+            donorPrimary, testCase, basicFullDb, kCollName, false, false);
+        testOptsMap[basicFullDb] = basicTestOpts;
+        setupTest(testCase, kCollName, basicTestOpts);
+
+        if (testCase.testInTransaction) {
+            let TxnFullDb = baseDbName + "Txn-" + kTenantDefinedDbName;
+            const txnTestOpts = makeTestOptionsForConcurrentWritesTest(
+                donorPrimary, testCase, TxnFullDb, kCollName, true, false);
+            testOptsMap[TxnFullDb] = txnTestOpts;
+            setupTest(testCase, kCollName, txnTestOpts);
+        }
+
+        if (testCase.testAsRetryableWrite) {
+            let retryableFullDb = baseDbName + "Retryable-" + kTenantDefinedDbName;
+            const retryableTestOpts = makeTestOptionsForConcurrentWritesTest(
+                donorPrimary, testCase, retryableFullDb, kCollName, false, true);
+            testOptsMap[retryableFullDb] = retryableTestOpts;
+            setupTest(testCase, kCollName, retryableTestOpts);
+        }
+    }
+}
+
+/**
+ * Run the test cases after the migration has committed
+ */
+function runTestsAfterMigration() {
+    for (const [commandName, testCase] of Object.entries(testCases)) {
+        let baseDbName = kTenantID + "_" + commandName + "-inCommitted0";
+        if (testCase.skip) {
+            continue;
+        }
+
+        const basicTesTOpts = testOptsMap[baseDbName + "Basic-" + kTenantDefinedDbName];
+        testRejectWritesAfterMigrationCommitted(testCase, basicTesTOpts);
+        countTenantMigrationCommittedErrors += 1;
+
+        if (testCase.testInTransaction) {
+            const txnTesTOpts = testOptsMap[baseDbName + "Txn-" + kTenantDefinedDbName];
+            testRejectWritesAfterMigrationCommitted(testCase, txnTesTOpts);
+            countTenantMigrationCommittedErrors += 1;
+        }
+
+        if (testCase.testAsRetryableWrite) {
+            const retryableTestOpts = testOptsMap[baseDbName + "Retryable-" + kTenantDefinedDbName];
+            testRejectWritesAfterMigrationCommitted(testCase, retryableTestOpts);
+            countTenantMigrationCommittedErrors += 1;
+        }
+    }
+}
+
+tenantMigrationTest.addRecipientNodes();
+const tenantIds = [kTenantID];
+const operation = tenantMigrationTest.createSplitOperation(tenantIds);
+
+setupTestsBeforeMigration();
+
+assert.commandWorked(
+    operation.commit({retryOnRetryableErrors: false}, {enableDonorStartMigrationFsync: true}));
+
+runTestsAfterMigration();
+BasicServerlessTest.checkShardSplitAccessBlocker(donorPrimary, kTenantID, {
+    numTenantMigrationCommittedErrors: countTenantMigrationCommittedErrors
+});
+
+tenantMigrationTest.stop();
+})();
diff --git a/jstests/serverless/shard_split_drop_state_doc_collection_aborted.js b/jstests/serverless/shard_split_drop_state_doc_collection_aborted.js
new file mode 100644
index 00000000000..7f23f2840d0
--- /dev/null
+++ b/jstests/serverless/shard_split_drop_state_doc_collection_aborted.js
@@ -0,0 +1,114 @@
+/**
+ * Tests dropping the donor state doc collections after the shard split has aborted.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+const recipientTagName = "recipientNode";
+const recipientSetName = "recipient";
+
+TestData.skipCheckDBHashes = true;
+
+function testDroppingStateDocCollections(
+    test,
+    fpName,
+    {dropDonorsCollection = false, retryWithDifferentMigrationId = false, expectedAbortReason}) {
+    jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${
+        dropDonorsCollection}, retryWithDifferentMigrationId: ${retryWithDifferentMigrationId}`);
+
+    test.addRecipientNodes();
+    let donorPrimary = test.donor.getPrimary();
+
+    const tenantIds = ["tenant1", "tenant2"];
+
+    const operation = test.createSplitOperation(tenantIds);
+    let migrationId = operation.migrationId;
+
+    let fp = configureFailPoint(donorPrimary.getDB("admin"), fpName);
+    let commitShardSplitThread = operation.commitAsync();
+    fp.wait();
+
+    if (dropDonorsCollection) {
+        assert(donorPrimary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS).drop());
+        let donorDoc = findSplitOperation(donorPrimary, migrationId);
+        assert.eq(donorDoc, null);
+
+        const currOpDonor = assert.commandWorked(
+            donorPrimary.adminCommand({currentOp: true, desc: "shard split operation"}));
+        assert.eq(currOpDonor.inprog.length, 0);
+
+        // Trigger stepup to allow the donor service to rebuild.
+        assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true}));
+    }
+
+    fp.off();
+    commitShardSplitThread.join();
+    if (expectedAbortReason) {
+        const data = commitShardSplitThread.returnData();
+        assert.commandFailedWithCode(data, expectedAbortReason);
+        assert.eq(data.code, expectedAbortReason);
+    }
+    test.removeRecipientNodesFromDonor();
+    if (!dropDonorsCollection) {
+        operation.forget();
+        test.waitForGarbageCollection(migrationId, tenantIds);
+    }
+    test.removeAndStopRecipientNodes();
+    test.reconfigDonorSetAfterSplit();
+
+    test.addRecipientNodes();
+
+    const operation2 =
+        retryWithDifferentMigrationId ? test.createSplitOperation(tenantIds) : operation;
+    migrationId = operation2.migrationId;
+    const runMigrationRes = operation2.commit();
+
+    assert.commandWorked(runMigrationRes);
+    assert.eq(runMigrationRes.state, "committed");
+
+    operation2.forget();
+
+    test.cleanupSuccesfulCommitted(migrationId, tenantIds);
+}
+
+jsTest.log("Test dropping donor and recipient state doc collections during a shard split.");
+const test = new BasicServerlessTest({
+    recipientTagName,
+    recipientSetName,
+    quickGarbageCollection: true,
+    initiateWithShortElectionTimeout: true
+});
+
+const fpName = "abortShardSplitBeforeLeavingBlockingState";
+testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: true});
+
+testDroppingStateDocCollections(
+    test, fpName, {dropDonorsCollection: true, retryWithDifferentMigrationId: true});
+
+testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: false});
+
+testDroppingStateDocCollections(test, fpName, {
+    dropDonorsCollection: false,
+    expectedAbortReason: (fpName == "abortShardSplitBeforeLeavingBlockingState")
+        ? ErrorCodes.TenantMigrationAborted
+        : null
+});
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_drop_state_doc_collection_blocking.js b/jstests/serverless/shard_split_drop_state_doc_collection_blocking.js
new file mode 100644
index 00000000000..91a6a8df296
--- /dev/null
+++ b/jstests/serverless/shard_split_drop_state_doc_collection_blocking.js
@@ -0,0 +1,118 @@
+/**
+ * Tests dropping the donor state doc collections in the middle of a shard split in blocking state.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+const recipientTagName = "recipientNode";
+const recipientSetName = "recipient";
+
+TestData.skipCheckDBHashes = true;
+
+function testDroppingStateDocCollections(
+    test,
+    fpName,
+    {dropDonorsCollection = false, retryWithDifferentMigrationId = false, expectedAbortReason}) {
+    jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${
+        dropDonorsCollection}, retryWithDifferentMigrationId: ${retryWithDifferentMigrationId}`);
+
+    test.addRecipientNodes();
+    let donorPrimary = test.donor.getPrimary();
+
+    const tenantIds = ["tenant1", "tenant2"];
+
+    const operation = test.createSplitOperation(tenantIds);
+    let migrationId = operation.migrationId;
+
+    let fp = configureFailPoint(donorPrimary.getDB("admin"), fpName);
+    let commitShardSplitThread = operation.commitAsync();
+    fp.wait();
+
+    if (dropDonorsCollection) {
+        assert(donorPrimary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS).drop());
+        let donorDoc = findSplitOperation(donorPrimary, migrationId);
+        assert.eq(donorDoc, null);
+
+        const currOpDonor = assert.commandWorked(
+            donorPrimary.adminCommand({currentOp: true, desc: "shard split operation"}));
+        assert.eq(currOpDonor.inprog.length, 0);
+
+        // Trigger stepup to allow the donor service to rebuild.
+        assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true}));
+    }
+
+    fp.off();
+    commitShardSplitThread.join();
+    if (expectedAbortReason) {
+        const data = commitShardSplitThread.returnData();
+        assert.commandFailedWithCode(data, expectedAbortReason);
+        assert.eq(data.code, expectedAbortReason);
+    }
+    test.removeRecipientNodesFromDonor();
+    if (!dropDonorsCollection) {
+        operation.forget();
+        test.waitForGarbageCollection(migrationId, tenantIds);
+    }
+    test.removeAndStopRecipientNodes();
+    test.reconfigDonorSetAfterSplit();
+
+    test.addRecipientNodes();
+
+    const operation2 =
+        retryWithDifferentMigrationId ? test.createSplitOperation(tenantIds) : operation;
+    migrationId = operation2.migrationId;
+    const runMigrationRes = operation2.commit();
+
+    assert.commandWorked(runMigrationRes);
+    assert.eq(runMigrationRes.state, "committed");
+
+    operation2.forget();
+
+    test.cleanupSuccesfulCommitted(migrationId, tenantIds);
+}
+
+jsTest.log("Test dropping donor and recipient state doc collections during a shard split.");
+const test = new BasicServerlessTest({
+    recipientTagName,
+    recipientSetName,
+    quickGarbageCollection: true,
+    initiateWithShortElectionTimeout: true
+});
+
+const fpName = "pauseShardSplitAfterBlocking";
+testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: true});
+
+testDroppingStateDocCollections(
+    test, fpName, {dropDonorsCollection: true, retryWithDifferentMigrationId: true});
+
+if (fpName) {
+    // if we do not have a failpoint and we do not drop the donor state doc there is no need
+    // to run a test.
+    testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: false});
+
+    testDroppingStateDocCollections(test, fpName, {
+        dropDonorsCollection: false,
+        expectedAbortReason: (fpName == "abortShardSplitBeforeLeavingBlockingState")
+            ? ErrorCodes.TenantMigrationAborted
+            : null
+    });
+}
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_drop_state_doc_collection_committed.js b/jstests/serverless/shard_split_drop_state_doc_collection_committed.js
new file mode 100644
index 00000000000..ba9c5c5bc65
--- /dev/null
+++ b/jstests/serverless/shard_split_drop_state_doc_collection_committed.js
@@ -0,0 +1,91 @@
+/**
+ * Tests dropping the donor state doc collections after the shard split has committed.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+const recipientTagName = "recipientNode";
+const recipientSetName = "recipient";
+
+TestData.skipCheckDBHashes = true;
+
+function testDroppingStateDocCollections(
+    test,
+    fpName,
+    {dropDonorsCollection = false, retryWithDifferentMigrationId = false, expectedAbortReason}) {
+    jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${
+        dropDonorsCollection}, retryWithDifferentMigrationId: ${retryWithDifferentMigrationId}`);
+
+    test.addRecipientNodes();
+    let donorPrimary = test.donor.getPrimary();
+
+    const tenantIds = ["tenant1", "tenant2"];
+
+    const operation = test.createSplitOperation(tenantIds);
+    let migrationId = operation.migrationId;
+
+    assert.commandWorked(operation.commit());
+    operation.forget();
+
+    test.cleanupSuccesfulCommitted(migrationId, tenantIds);
+
+    if (dropDonorsCollection) {
+        assert(donorPrimary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS).drop());
+        let donorDoc = findSplitOperation(donorPrimary, migrationId);
+        assert.eq(donorDoc, null);
+
+        const currOpDonor = assert.commandWorked(
+            donorPrimary.adminCommand({currentOp: true, desc: "shard split operation"}));
+        assert.eq(currOpDonor.inprog.length, 0);
+
+        // Trigger stepup to allow the donor service to rebuild.
+        assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true}));
+    }
+
+    test.addRecipientNodes();
+
+    const operation2 =
+        retryWithDifferentMigrationId ? test.createSplitOperation(tenantIds) : operation;
+    migrationId = operation2.migrationId;
+    const runMigrationRes = operation2.commit();
+
+    assert.commandWorked(runMigrationRes);
+    assert.eq(runMigrationRes.state, "committed");
+
+    operation2.forget();
+
+    test.cleanupSuccesfulCommitted(migrationId, tenantIds);
+}
+
+jsTest.log("Test dropping donor and recipient state doc collections during a shard split.");
+const test = new BasicServerlessTest({
+    recipientTagName,
+    recipientSetName,
+    quickGarbageCollection: true,
+    initiateWithShortElectionTimeout: true
+});
+
+const fpName = undefined;
+testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: true});
+
+testDroppingStateDocCollections(
+    test, fpName, {dropDonorsCollection: true, retryWithDifferentMigrationId: true});
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_drop_state_doc_collection_decision_fullfilled.js b/jstests/serverless/shard_split_drop_state_doc_collection_decision_fullfilled.js
new file mode 100644
index 00000000000..1c5120d3e95
--- /dev/null
+++ b/jstests/serverless/shard_split_drop_state_doc_collection_decision_fullfilled.js
@@ -0,0 +1,121 @@
+/**
+ * Tests dropping the donor state doc collections after the shard split decision promise is
+ * fulfilled.
+ *
+ * @tags: [
+ *   incompatible_with_eft,
+ *   incompatible_with_macos,
+ *   incompatible_with_windows_tls,
+ *   requires_majority_read_concern,
+ *   requires_persistence,
+ *   serverless,
+ *   requires_fcv_52,
+ *   featureFlagShardSplit
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/serverless/libs/basic_serverless_test.js");
+
+const recipientTagName = "recipientNode";
+const recipientSetName = "recipient";
+
+TestData.skipCheckDBHashes = true;
+
+function testDroppingStateDocCollections(
+    test,
+    fpName,
+    {dropDonorsCollection = false, retryWithDifferentMigrationId = false, expectedAbortReason}) {
+    jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${
+        dropDonorsCollection}, retryWithDifferentMigrationId: ${retryWithDifferentMigrationId}`);
+
+    test.addRecipientNodes();
+    let donorPrimary = test.donor.getPrimary();
+
+    const tenantIds = ["tenant1", "tenant2"];
+
+    const operation = test.createSplitOperation(tenantIds);
+    let migrationId = operation.migrationId;
+
+    let commitShardSplitThread = undefined;
+
+    let fp = configureFailPoint(donorPrimary.getDB("admin"), fpName);
+    commitShardSplitThread = operation.commitAsync();
+    fp.wait();
+
+    if (dropDonorsCollection) {
+        assert(donorPrimary.getCollection(BasicServerlessTest.kConfigSplitDonorsNS).drop());
+        let donorDoc = findSplitOperation(donorPrimary, migrationId);
+        assert.eq(donorDoc, null);
+
+        const currOpDonor = assert.commandWorked(
+            donorPrimary.adminCommand({currentOp: true, desc: "shard split operation"}));
+        assert.eq(currOpDonor.inprog.length, 0);
+
+        // Trigger stepup to allow the donor service to rebuild.
+        assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true}));
+    }
+
+    fp.off();
+    commitShardSplitThread.join();
+    if (expectedAbortReason) {
+        const data = commitShardSplitThread.returnData();
+        assert.commandFailedWithCode(data, expectedAbortReason);
+        assert.eq(data.code, expectedAbortReason);
+    }
+    test.removeRecipientNodesFromDonor();
+    if (!dropDonorsCollection) {
+        operation.forget();
+        test.waitForGarbageCollection(migrationId, tenantIds);
+    }
+    test.removeAndStopRecipientNodes();
+    test.reconfigDonorSetAfterSplit();
+
+    test.addRecipientNodes();
+
+    const operation2 =
+        retryWithDifferentMigrationId ? test.createSplitOperation(tenantIds) : operation;
+    migrationId = operation2.migrationId;
+    const runMigrationRes = operation2.commit();
+
+    assert.commandWorked(runMigrationRes);
+    assert.eq(runMigrationRes.state, "committed");
+
+    operation2.forget();
+
+    test.cleanupSuccesfulCommitted(migrationId, tenantIds);
+}
+
+jsTest.log("Test dropping donor and recipient state doc collections during a shard split.");
+const test = new BasicServerlessTest({
+    recipientTagName,
+    recipientSetName,
+    quickGarbageCollection: true,
+    initiateWithShortElectionTimeout: true
+});
+
+const fpName = "pauseShardSplitAfterDecision";
+testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: true});
+
+testDroppingStateDocCollections(
+    test, fpName, {dropDonorsCollection: true, retryWithDifferentMigrationId: true});
+
+if (fpName) {
+    // if we do not have a failpoint and we do not drop the donor state doc there is no need
+    // to run a test.
+    testDroppingStateDocCollections(test, fpName, {dropDonorsCollection: false});
+
+    testDroppingStateDocCollections(test, fpName, {
+        dropDonorsCollection: false,
+        expectedAbortReason: (fpName == "abortShardSplitBeforeLeavingBlockingState")
+            ? ErrorCodes.TenantMigrationAborted
+            : null
+    });
+}
+
+test.stop();
+})();
diff --git a/jstests/serverless/shard_split_performance_test.js b/jstests/serverless/shard_split_performance_test.js
index cb5d0dd20cb..2633d804dec 100644
--- a/jstests/serverless/shard_split_performance_test.js
+++ b/jstests/serverless/shard_split_performance_test.js
@@ -8,7 +8,6 @@ load("jstests/serverless/libs/basic_serverless_test.js");
 load("jstests/replsets/rslib.js");
 
 const kBlockStart = "Entering 'blocking' state.";
-const kAbortingIndex = "Aborting index build for shard split.";
 const kReconfig = "Applying the split config";
 const kWaitForRecipients = "Waiting for recipient to accept the split.";
 const kEndMsg = "Shard split decision reached";
@@ -82,18 +81,15 @@ function runOneSplit() {
     assertMigrationState(test.donor.getPrimary(), operation.migrationId, "committed");
 
     const blockTS = extractTs(checkLog.getLogMessage(primary, kBlockStart));
-    const abortingTS = extractTs(checkLog.getLogMessage(primary, kAbortingIndex));
     const reconfigTS = extractTs(checkLog.getLogMessage(primary, kReconfig));
     const waitForRecipientsTS = extractTs(checkLog.getLogMessage(primary, kWaitForRecipients));
     const endTS = extractTs(checkLog.getLogMessage(primary, kEndMsg));
 
     const blockDurationMs = endTS - blockTS;
-    const abortingIndexDurationMs = endTS - abortingTS;
     const waitForRecipientsDurationMs = endTS - waitForRecipientsTS;
     const reconfigDurationMs = endTS - reconfigTS;
 
-    const splitResult =
-        {blockDurationMs, abortingIndexDurationMs, reconfigDurationMs, waitForRecipientsDurationMs};
+    const splitResult = {blockDurationMs, reconfigDurationMs, waitForRecipientsDurationMs};
 
     jsTestLog(`Performance result of shard split: ${tojson(splitResult)}`);
     const maximumReconfigDuration = 500;
diff --git a/jstests/serverless/shard_split_unblock_reads_and_writes_on_completion.js b/jstests/serverless/shard_split_unblock_reads_and_writes_on_completion.js
index ac41ae32af2..bf5dd5be8f6 100644
--- a/jstests/serverless/shard_split_unblock_reads_and_writes_on_completion.js
+++ b/jstests/serverless/shard_split_unblock_reads_and_writes_on_completion.js
@@ -194,7 +194,7 @@ const kCollName = "testColl";
     // Cannot mark the state doc as garbage collectable before the migration commits or aborts.
     assert.commandFailedWithCode(donorsColl.update({recipientSetName: operation.recipientSetName},
                                                    {$set: {expireAt: new Date()}}),
-                                 ErrorCodes.IllegalOperation);
+                                 ErrorCodes.BadValue);
 
     // Can drop the state doc collection but this will not cause all blocked reads and writes to
     // hang.
diff --git a/jstests/serverless/shard_split_write_during_split_stepdown.js b/jstests/serverless/shard_split_write_during_split_stepdown.js
index d147f09412b..1670b5b57b3 100644
--- a/jstests/serverless/shard_split_write_during_split_stepdown.js
+++ b/jstests/serverless/shard_split_write_during_split_stepdown.js
@@ -38,12 +38,9 @@ tenantIds.forEach(id => {
         [{_id: 0, x: 0}, {_id: 1, x: 1}, {_id: 2, x: 2}], {writeConcern: {w: "majority"}}));
 });
 
-const operation = test.createSplitOperation(tenantIds);
-
 const blockingFP = configureFailPoint(donorPrimary.getDB("admin"), "pauseShardSplitAfterBlocking");
-
+const operation = test.createSplitOperation(tenantIds);
 const splitThread = operation.commitAsync();
-
 blockingFP.wait();
 
 const donorRst = createRstArgs(test.donor);
diff --git a/jstests/sharding/auth.js b/jstests/sharding/auth.js
index 3bf15b56716..2191e6f2759 100644
--- a/jstests/sharding/auth.js
+++ b/jstests/sharding/auth.js
@@ -187,7 +187,7 @@ assert.commandWorked(bulk.execute());
 s.startBalancer(60000);
 
 const balanceAccordingToDataSize =
-    FeatureFlagUtil.isEnabled(s.getDB('admin'), "BalanceAccordingToDataSize");
+    FeatureFlagUtil.isEnabled(s.configRS.getPrimary().getDB('admin'), "BalanceAccordingToDataSize");
 if (!balanceAccordingToDataSize) {
     assert.soon(function() {
         var d1Chunks =
diff --git a/jstests/sharding/autosplit_configure_collection.js b/jstests/sharding/autosplit_configure_collection.js
index 83acde55afd..a9d62fe4e0f 100644
--- a/jstests/sharding/autosplit_configure_collection.js
+++ b/jstests/sharding/autosplit_configure_collection.js
@@ -1,12 +1,8 @@
 /**
  * This test confirms that chunks get split according to a collection specific setting as they grow
  * due to data insertion.
- *
- * @tags: [
- *  requires_fcv_53,
- *  featureFlagPerCollBalancingSettings,
- * ]
  */
+
 (function() {
 'use strict';
 load('jstests/sharding/autosplit_include.js');
diff --git a/jstests/sharding/balancer_defragmentation_merge_chunks.js b/jstests/sharding/balancer_defragmentation_merge_chunks.js
index 8bb0cb601d2..0d967135ede 100644
--- a/jstests/sharding/balancer_defragmentation_merge_chunks.js
+++ b/jstests/sharding/balancer_defragmentation_merge_chunks.js
@@ -1,10 +1,9 @@
 /**
  * Test the configureCollectionBalancing command and balancerCollectionStatus command
  *
- * // TODO (SERVER-63036): remove the 'does_not_support_stepdowns' tag
  * @tags: [
- *  requires_fcv_53,
- *  featureFlagPerCollBalancingSettings,
+ *  # This test does not support stepdowns of CSRS because of how it uses failpoints
+ *  # to control phase transition
  *  does_not_support_stepdowns,
  * ]
  */
@@ -97,67 +96,6 @@ function clearFailPointOnConfigNodes(failpoint) {
     });
 }
 
-function waitForFailpointOnConfigNodes(failpoint, timesEntered) {
-    jsTest.log("Waiting for failpoint " + failpoint + ", times entered " + timesEntered);
-    assert.soon(function() {
-        let hitFailpoint = false;
-        let csrs_nodes = [st.configRS.getPrimary()];
-        csrs_nodes.concat(st.configRS.getSecondaries());
-
-        csrs_nodes.forEach((config) => {
-            let res = assert.commandWorkedOrFailedWithCode(config.adminCommand({
-                waitForFailPoint: failpoint,
-                timesEntered: timesEntered + 1,
-                maxTimeMS: kDefaultWaitForFailPointTimeout / 10
-            }),
-                                                           ErrorCodes.MaxTimeMSExpired);
-            hitFailpoint = hitFailpoint || res["ok"] === 1;
-        });
-        return hitFailpoint;
-    });
-    jsTest.log("Failpoint " + failpoint + " hit " + timesEntered + " times");
-}
-
-jsTest.log("Split chunks while defragmenting");
-{
-    st.stopBalancer();
-    const coll = getNewColl();
-    const nss = coll.getFullName();
-    assert.commandWorked(st.s.adminCommand({shardCollection: nss, key: {skey: 1}}));
-
-    const chunks = findChunksUtil.findChunksByNs(st.config, nss).toArray();
-    assert.eq(1, chunks.length);
-    assert.commandWorked(st.s.adminCommand({split: nss, middle: {skey: 0}}));
-
-    const primaryShard = st.getPrimaryShard(coll.getDB().getName());
-    assert.eq(st.normalize(primaryShard.name), st.normalize(chunks[0]['shard']));
-    assert.commandWorked(
-        st.s.adminCommand({moveChunk: nss, find: {skey: 0}, to: st.getOther(primaryShard).name}));
-
-    // Pause defragmentation after initialization but before phase 1 runs
-    setFailPointOnConfigNodes("afterBuildingNextDefragmentationPhase", {skip: 1});
-    assert.commandWorked(st.s.adminCommand({
-        configureCollectionBalancing: nss,
-        defragmentCollection: true,
-        chunkSize: targetChunkSizeMB,
-    }));
-    st.startBalancer();
-
-    waitForFailpointOnConfigNodes("afterBuildingNextDefragmentationPhase", 0);
-
-    assert.eq('moveAndMergeChunks',
-              st.config.collections.findOne({_id: nss})['defragmentationPhase']);
-    assert.eq(2, findChunksUtil.countChunksForNs(st.config, nss));
-    assert.commandWorked(st.s.adminCommand({split: nss, middle: {skey: -10}}));
-    assert.commandWorked(st.s.adminCommand({split: nss, middle: {skey: 10}}));
-    assert.eq(4, findChunksUtil.countChunksForNs(st.config, nss));
-
-    clearFailPointOnConfigNodes("afterBuildingNextDefragmentationPhase");
-    defragmentationUtil.waitForEndOfDefragmentation(st.s, nss);
-    // Ensure the defragmentation succeeded
-    assert.eq(1, findChunksUtil.countChunksForNs(st.config, nss));
-}
-
 // Setup collection for first tests
 const coll1 = setupCollection();
 const coll1Name = coll1.getFullName();
diff --git a/jstests/sharding/check_sharding_index_versioned.js b/jstests/sharding/check_sharding_index_versioned.js
index 2fd0e9bf2d7..c47f2485dc9 100644
--- a/jstests/sharding/check_sharding_index_versioned.js
+++ b/jstests/sharding/check_sharding_index_versioned.js
@@ -21,7 +21,7 @@ assert.throwsWithCode(() => {
     st.rs0.getPrimary().getDB(dbName).runCommand({
         checkShardingIndex: ns,
         keyPattern: {x: 1},
-        shardVersion: [Timestamp(99, 10101), ObjectId(), Timestamp(1, 1)],
+        shardVersion: {e: ObjectId(), t: Timestamp(1, 1), v: Timestamp(99, 10101)},
     });
 }, ErrorCodes.StaleConfig);
 
diff --git a/jstests/sharding/clear_jumbo.js b/jstests/sharding/clear_jumbo.js
index ee12cf70cc5..30190abac8d 100644
--- a/jstests/sharding/clear_jumbo.js
+++ b/jstests/sharding/clear_jumbo.js
@@ -1,105 +1,136 @@
+// requires_fcv_61 since the balancer in v6.0 is still working on the number of chunks,
+// hence the balancer is not triggered and the chunk is not marked as jumbo
+// @tags: [requires_fcv_61]
+
 (function() {
 "use strict";
 
 load("jstests/sharding/libs/find_chunks_util.js");
 
-let st = new ShardingTest({shards: 2});
+let st = new ShardingTest({shards: 2, other: {chunkSize: 1}});
 
 const mongosSession = st.s.startSession({retryWrites: true});
-const sessionAdminDB = mongosSession.getDatabase('admin');
-const sessionConfigDB = mongosSession.getDatabase('config');
+const adminDB = mongosSession.getDatabase('admin');
+const configDB = mongosSession.getDatabase('config');
+const testDB = mongosSession.getDatabase('test');
+const testColl = testDB.getCollection('range');
+const hashedTestColl = testDB.getCollection('hashed');
+
+function runBalancer(coll) {
+    st.startBalancer();
+
+    // Let the balancer run until balanced.
+    st.printShardingStatus(true);
+    st.awaitBalance(coll.getName(), coll.getDB());
+    st.printShardingStatus(true);
+
+    st.stopBalancer();
+}
+
+function createJumboChunk(coll, keyValue) {
+    const largeString = 'X'.repeat(1024 * 1024);  // 1 MB
+
+    // Create sufficient documents to create a jumbo chunk, and use the same shard key in all of
+    // them so that the chunk cannot be split.
+    let bulk = coll.initializeUnorderedBulkOp();
+    for (let i = 0; i < 10; i++) {
+        bulk.insert({x: keyValue, big: largeString, i: i});
+    }
+    assert.commandWorked(bulk.execute());
+    runBalancer(coll);
+}
 
-assert.commandWorked(sessionAdminDB.runCommand({enableSharding: 'test'}));
-st.ensurePrimaryShard('test', st.shard0.shardName);
+function validateJumboFlag(ns, query) {
+    let jumboChunk = findChunksUtil.findOneChunkByNs(configDB, ns, query);
+    assert.eq(jumboChunk.jumbo, true);
+}
+
+// Initializing test database
 assert.commandWorked(
-    sessionAdminDB.runCommand({addShardToZone: st.shard1.shardName, zone: 'finalDestination'}));
+    adminDB.runCommand({enableSharding: 'test', primaryShard: st.shard0.shardName}));
+assert.commandWorked(adminDB.runCommand({addShardToZone: st.shard1.shardName, zone: 'ZoneShard1'}));
 
 ////////////////////////////////////////////////////////////////////////////
 // Ranged shard key
-assert.commandWorked(sessionAdminDB.runCommand({shardCollection: 'test.range', key: {x: 1}}));
-assert.commandWorked(sessionAdminDB.runCommand({split: 'test.range', middle: {x: 0}}));
+let testNs = testColl.getFullName();
 
-let chunkColl = sessionConfigDB.chunks;
+assert.commandWorked(adminDB.runCommand({shardCollection: testNs, key: {x: 1}}));
+assert.commandWorked(adminDB.runCommand({split: testNs, middle: {x: 0}}));
 
-let testRangeColl = sessionConfigDB.collections.findOne({_id: 'test.range'});
-if (testRangeColl.timestamp) {
-    assert.commandWorked(
-        chunkColl.update({uuid: testRangeColl.uuid, min: {x: 0}}, {$set: {jumbo: true}}));
-} else {
-    assert.commandWorked(chunkColl.update({ns: 'test.range', min: {x: 0}}, {$set: {jumbo: true}}));
-}
-
-let jumboChunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.range', {min: {x: 0}});
-assert(jumboChunk.jumbo, tojson(jumboChunk));
+createJumboChunk(testColl, 0);
+validateJumboFlag(testNs, {min: {x: 0}});
+let jumboChunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 let jumboMajorVersionBefore = jumboChunk.lastmod.getTime();
 
 // Target non-jumbo chunk should not affect real jumbo chunk.
-assert.commandWorked(sessionAdminDB.runCommand({clearJumboFlag: 'test.range', find: {x: -1}}));
-jumboChunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.range', {min: {x: 0}});
+assert.commandWorked(adminDB.runCommand({clearJumboFlag: testNs, find: {x: -1}}));
+jumboChunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 assert(jumboChunk.jumbo, tojson(jumboChunk));
 assert.eq(jumboMajorVersionBefore, jumboChunk.lastmod.getTime());
 
 // Target real jumbo chunk should bump version.
-assert.commandWorked(sessionAdminDB.runCommand({clearJumboFlag: 'test.range', find: {x: 1}}));
-jumboChunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.range', {min: {x: 0}});
+assert.commandWorked(adminDB.runCommand({clearJumboFlag: testNs, find: {x: 1}}));
+jumboChunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 assert(!jumboChunk.jumbo, tojson(jumboChunk));
 assert.lt(jumboMajorVersionBefore, jumboChunk.lastmod.getTime());
 
+// Delete all documents
+assert.commandWorked(testColl.deleteMany({x: 0}));
+let docCount = assert.commandWorked(adminDB.runCommand({count: testNs}));
+assert.eq(docCount.n, 0);
+
 ////////////////////////////////////////////////////////////////////////////
 // Hashed shard key
-assert.commandWorked(sessionAdminDB.runCommand(
-    {shardCollection: 'test.hashed', key: {x: 'hashed'}, numInitialChunks: 2}));
-
-let testHashedColl = sessionConfigDB.collections.findOne({_id: 'test.hashed'});
-if (testHashedColl.timestamp) {
-    assert.commandWorked(
-        chunkColl.update({uuid: testHashedColl.uuid, min: {x: 0}}, {$set: {jumbo: true}}));
-} else {
-    assert.commandWorked(chunkColl.update({ns: 'test.hashed', min: {x: 0}}, {$set: {jumbo: true}}));
-}
-jumboChunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.hashed', {min: {x: 0}});
-assert(jumboChunk.jumbo, tojson(jumboChunk));
+testNs = hashedTestColl.getFullName();
+
+assert.commandWorked(
+    adminDB.runCommand({shardCollection: testNs, key: {x: 'hashed'}, numInitialChunks: 2}));
+
+createJumboChunk(hashedTestColl, 0);
+validateJumboFlag(testNs, {min: {x: 0}});
+
+jumboChunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 jumboMajorVersionBefore = jumboChunk.lastmod.getTime();
 
 // Target non-jumbo chunk should not affect real jumbo chunk.
-let unrelatedChunk =
-    findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.hashed', {min: {x: MinKey}});
-assert.commandWorked(sessionAdminDB.runCommand(
-    {clearJumboFlag: 'test.hashed', bounds: [unrelatedChunk.min, unrelatedChunk.max]}));
-jumboChunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.hashed', {min: {x: 0}});
+let unrelatedChunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: MinKey}});
+assert.commandWorked(
+    adminDB.runCommand({clearJumboFlag: testNs, bounds: [unrelatedChunk.min, unrelatedChunk.max]}));
+jumboChunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 assert(jumboChunk.jumbo, tojson(jumboChunk));
 assert.eq(jumboMajorVersionBefore, jumboChunk.lastmod.getTime());
 
 // Target real jumbo chunk should bump version.
-assert.commandWorked(sessionAdminDB.runCommand(
-    {clearJumboFlag: 'test.hashed', bounds: [jumboChunk.min, jumboChunk.max]}));
-jumboChunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.hashed', {min: {x: 0}});
+assert.commandWorked(
+    adminDB.runCommand({clearJumboFlag: testNs, bounds: [jumboChunk.min, jumboChunk.max]}));
+jumboChunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 assert(!jumboChunk.jumbo, tojson(jumboChunk));
 assert.lt(jumboMajorVersionBefore, jumboChunk.lastmod.getTime());
 
-////////////////////////////////////////////////////////////////////////////
 // Ensure clear jumbo flag stores the correct chunk version
-
 assert.eq(undefined, jumboChunk.lastmodEpoch);
 assert.eq(undefined, jumboChunk.lastmodTimestamp);
 
+// Delete all documents
+assert.commandWorked(hashedTestColl.deleteMany({x: 0}));
+docCount = assert.commandWorked(adminDB.runCommand({count: testNs}));
+assert.eq(docCount.n, 0);
+
 ////////////////////////////////////////////////////////////////////////////
 // Balancer with jumbo chunks behavior
 // Forces a jumbo chunk to be on a wrong zone but balancer shouldn't be able to move it until
 // jumbo flag is cleared.
+testNs = testColl.getFullName();
 
 st.stopBalancer();
 
-if (testRangeColl.timestamp) {
-    assert.commandWorked(
-        chunkColl.update({uuid: testRangeColl.uuid, min: {x: 0}}, {$set: {jumbo: true}}));
-} else {
-    assert.commandWorked(chunkColl.update({ns: 'test.range', min: {x: 0}}, {$set: {jumbo: true}}));
-}
-assert.commandWorked(sessionAdminDB.runCommand(
-    {updateZoneKeyRange: 'test.range', min: {x: 0}, max: {x: MaxKey}, zone: 'finalDestination'}));
+assert.commandWorked(adminDB.runCommand(
+    {updateZoneKeyRange: testNs, min: {x: 0}, max: {x: MaxKey}, zone: 'ZoneShard1'}));
 
-let chunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.range', {min: {x: 0}});
+createJumboChunk(testColl, 0);
+validateJumboFlag(testNs, {min: {x: 0}});
+
+let chunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 assert(chunk.jumbo, tojson(chunk));
 assert.eq(st.shard0.shardName, chunk.shard);
 
@@ -111,29 +142,26 @@ st.forEachConfigServer((conn) => {
     });
 });
 
-let waitForBalancerToRun = function() {
-    let lastRoundNumber =
-        assert.commandWorked(sessionAdminDB.runCommand({balancerStatus: 1})).numBalancerRounds;
-    st.startBalancer();
+runBalancer(testColl);
 
-    assert.soon(function() {
-        let res = assert.commandWorked(sessionAdminDB.runCommand({balancerStatus: 1}));
-        return res.mode == "full" && res.numBalancerRounds - lastRoundNumber > 1;
-    });
-
-    st.stopBalancer();
-};
-
-waitForBalancerToRun();
-
-chunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.range', {min: {x: 0}});
+// Verify chunk stays in shard0
+chunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 assert.eq(st.shard0.shardName, chunk.shard);
 
-assert.commandWorked(sessionAdminDB.runCommand({clearJumboFlag: 'test.range', find: {x: 0}}));
+// Delete all documents
+assert.commandWorked(testColl.deleteMany({x: 0}));
+docCount = assert.commandWorked(adminDB.runCommand({count: testNs}));
+assert.eq(docCount.n, 0);
+
+// Clear jumbo flag
+assert.commandWorked(adminDB.runCommand({clearJumboFlag: testNs, find: {x: 0}}));
+chunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
+assert(!jumboChunk.jumbo, tojson(jumboChunk));
 
-waitForBalancerToRun();
+runBalancer(testColl);
 
-chunk = findChunksUtil.findOneChunkByNs(sessionConfigDB, 'test.range', {min: {x: 0}});
+// Verify chunk is moved to shard1
+chunk = findChunksUtil.findOneChunkByNs(configDB, testNs, {min: {x: 0}});
 assert.eq(st.shard1.shardName, chunk.shard);
 
 st.stop();
diff --git a/jstests/sharding/cluster_server_parameter_commands_sharded.js b/jstests/sharding/cluster_server_parameter_commands_sharded.js
index 4b8486befc9..c1547d0c3ac 100644
--- a/jstests/sharding/cluster_server_parameter_commands_sharded.js
+++ b/jstests/sharding/cluster_server_parameter_commands_sharded.js
@@ -2,12 +2,10 @@
  * Checks that set/getClusterParameter runs as expected on sharded clusters.
  *
  * @tags: [
- *   # Requires all nodes to be running the latest binary.
- *   requires_fcv_60,
- *   featureFlagClusterWideConfig,
  *   does_not_support_stepdowns,
  *   requires_replication,
- *   requires_sharding
+ *   requires_sharding,
+ *   multiversion_incompatible
  *  ]
  */
 (function() {
diff --git a/jstests/sharding/compound_hashed_shard_key_sharding_cmds.js b/jstests/sharding/compound_hashed_shard_key_sharding_cmds.js
index 4be26f1c18d..09885c4b3d7 100644
--- a/jstests/sharding/compound_hashed_shard_key_sharding_cmds.js
+++ b/jstests/sharding/compound_hashed_shard_key_sharding_cmds.js
@@ -133,9 +133,11 @@ function testMoveChunk(shardKey) {
 
     // Error if either of the bounds is not a valid shard key.
     assert.commandFailedWithCode(
-        st.s0.adminCommand({moveChunk: ns, bounds: [NaN, aChunk.max], to: shard1}), 10065);
+        st.s0.adminCommand({moveChunk: ns, bounds: [NaN, aChunk.max], to: shard1}),
+        ErrorCodes.TypeMismatch);
     assert.commandFailedWithCode(
-        st.s0.adminCommand({moveChunk: ns, bounds: [aChunk.min, NaN], to: shard1}), 10065);
+        st.s0.adminCommand({moveChunk: ns, bounds: [aChunk.min, NaN], to: shard1}),
+        ErrorCodes.TypeMismatch);
 
     assert.commandWorked(
         st.s0.adminCommand({moveChunk: ns, bounds: [aChunk.min, aChunk.max], to: shard1}));
diff --git a/jstests/sharding/configure_collection_balancing_setFCV.js b/jstests/sharding/configure_collection_balancing_setFCV.js
deleted file mode 100644
index 7be5a8726fb..00000000000
--- a/jstests/sharding/configure_collection_balancing_setFCV.js
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Test setFCV interactions with per-collection balancing settings
- *
- * @tags: [
- *  requires_fcv_53,
- *  featureFlagPerCollBalancingSettings,
- * ]
- */
-// TODO SERVER-62693 get rid of this file once 6.0 branches out
-
-'use strict';
-
-const st = new ShardingTest({mongos: 1, shards: 1, other: {enableBalancer: false}});
-
-const database = st.getDB('test');
-assert.commandWorked(st.s.adminCommand({enableSharding: 'test'}));
-const collName = 'coll';
-const coll = database[collName];
-const fullNs = coll.getFullName();
-
-assert.commandWorked(st.s.adminCommand({shardCollection: fullNs, key: {x: 1}}));
-
-const downgradeVersion = lastLTSFCV;
-assert.commandWorked(st.s.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
-
-/* Test that
- * - downgrade can be performed while a collection is undergoing defragmentation
- * - at the end of the process,  per-collection balancing fields are removed upon setFCV < 5.3
- */
-{
-    assert.commandWorked(st.s.adminCommand({
-        configureCollectionBalancing: fullNs,
-        defragmentCollection: true,
-        enableAutoSplitter: false,
-        chunkSize: 10
-    }));
-
-    var configEntryBeforeSetFCV =
-        st.config.getSiblingDB('config').collections.findOne({_id: fullNs});
-    var shardEntryBeforeSetFCV = st.shard0.getDB('config').cache.collections.findOne({_id: fullNs});
-    assert.eq(10 * 1024 * 1024, configEntryBeforeSetFCV.maxChunkSizeBytes);
-    assert(configEntryBeforeSetFCV.noAutoSplit);
-    assert.eq(10 * 1024 * 1024, shardEntryBeforeSetFCV.maxChunkSizeBytes);
-    assert(!shardEntryBeforeSetFCV.allowAutoSplit);
-    assert(configEntryBeforeSetFCV.defragmentCollection);
-
-    assert.commandWorked(st.s.adminCommand({setFeatureCompatibilityVersion: downgradeVersion}));
-
-    var configEntryAfterSetFCV =
-        st.config.getSiblingDB('config').collections.findOne({_id: fullNs});
-    var shardEntryAfterSetFCV = st.shard0.getDB('config').cache.collections.findOne({_id: fullNs});
-    assert.isnull(configEntryAfterSetFCV.maxChunkSizeBytes);
-    assert.isnull(configEntryAfterSetFCV.noAutoSplit);
-    assert.isnull(shardEntryAfterSetFCV.maxChunkSizeBytes);
-    assert.isnull(shardEntryAfterSetFCV.allowAutoSplit);
-}
-
-st.stop();
diff --git a/jstests/sharding/database_versioning_all_commands.js b/jstests/sharding/database_versioning_all_commands.js
index fe7c409f524..6d4f2bbba52 100644
--- a/jstests/sharding/database_versioning_all_commands.js
+++ b/jstests/sharding/database_versioning_all_commands.js
@@ -315,6 +315,7 @@ let testCases = {
             },
         }
     },
+    coordinateCommitTransaction: {skip: "unimplemented. Serves only as a stub."},
     count: {
         run: {
             sendsDbVersion: true,
@@ -456,10 +457,6 @@ let testCases = {
     flushRouterConfig: {skip: "executes locally on mongos (not sent to any remote node)"},
     fsync: {skip: "broadcast to all shards"},
     getAuditConfig: {skip: "not on a user database", conditional: true},
-    getChangeStreamOptions: {
-        skip: "executes locally on mongos (not sent to any remote node)",
-        conditional: true
-    },  // TODO SERVER-65353 remove in 6.1.
     getClusterParameter: {skip: "always targets the config server"},
     getCmdLineOpts: {skip: "executes locally on mongos (not sent to any remote node)"},
     getDefaultRWConcern: {skip: "executes locally on mongos (not sent to any remote node)"},
@@ -628,10 +625,6 @@ let testCases = {
     serverStatus: {skip: "executes locally on mongos (not sent to any remote node)"},
     setAllowMigrations: {skip: "not on a user database"},
     setAuditConfig: {skip: "not on a user database", conditional: true},
-    setChangeStreamOptions: {
-        skip: "always targets the config server",
-        conditional: true
-    },  // TODO SERVER-65353 remove in 6.1.
     setDefaultRWConcern: {skip: "always targets the config server"},
     setIndexCommitQuorum: {
         run: {
diff --git a/jstests/sharding/defragment_large_collection.js b/jstests/sharding/defragment_large_collection.js
index 20210762258..beea51aafe5 100644
--- a/jstests/sharding/defragment_large_collection.js
+++ b/jstests/sharding/defragment_large_collection.js
@@ -1,11 +1,3 @@
-/**
- *
- * @tags: [
- *  requires_fcv_53,
- *  featureFlagPerCollBalancingSettings,
- * ]
- */
-
 (function() {
 'use strict';
 
diff --git a/jstests/sharding/error_propagation.js b/jstests/sharding/error_propagation.js
index b4bb0b72331..1a74270f745 100644
--- a/jstests/sharding/error_propagation.js
+++ b/jstests/sharding/error_propagation.js
@@ -20,6 +20,6 @@ assert.commandWorked(db.foo.insert({a: [1, 2]}, {writeConcern: {w: 3}}));
 
 var res = db.runCommand(
     {aggregate: 'foo', pipeline: [{$project: {total: {'$add': ['$a', 1]}}}], cursor: {}});
-assert.commandFailedWithCode(res, 16554);
+assert.commandFailedWithCode(res, [16554, ErrorCodes.TypeMismatch]);
 st.stop();
 }());
diff --git a/jstests/sharding/internal_txns/libs/chunk_migration_test.js b/jstests/sharding/internal_txns/libs/chunk_migration_test.js
index 02c490255e8..cc78170153e 100644
--- a/jstests/sharding/internal_txns/libs/chunk_migration_test.js
+++ b/jstests/sharding/internal_txns/libs/chunk_migration_test.js
@@ -14,6 +14,7 @@
 'use strict';
 
 load('jstests/libs/chunk_manipulation_util.js');
+load('jstests/sharding/internal_txns/libs/fixture_helpers.js');
 load('jstests/sharding/libs/sharded_transactions_helpers.js');
 
 function InternalTransactionChunkMigrationTest(storeFindAndModifyImagesInSideCollection = true) {
@@ -359,23 +360,25 @@ function InternalTransactionChunkMigrationTest(storeFindAndModifyImagesInSideCol
         testCase.setUpFunc();
 
         const lsid = getTransactionSessionId(txnType, testCase);
-        const txnNumber = getNextTxnNumber(txnType, testCase);
-
-        for (let i = 0; i < testCase.commands.length; i++) {
-            const command = testCase.commands[i];
-            const cmdObj = Object.assign({}, command.cmdObj, {lsid, txnNumber, autocommit: false});
-            if (i == 0) {
-                cmdObj.startTransaction = true;
+        runTxnRetryOnLockTimeoutError(() => {
+            const txnNumber = getNextTxnNumber(txnType, testCase);
+
+            for (let i = 0; i < testCase.commands.length; i++) {
+                const command = testCase.commands[i];
+                const cmdObj =
+                    Object.assign({}, command.cmdObj, {lsid, txnNumber, autocommit: false});
+                if (i == 0) {
+                    cmdObj.startTransaction = true;
+                }
+                const res = assert.commandWorked(st.s.getDB(testCase.dbName).runCommand(cmdObj));
+                command.checkResponseFunc(res);
             }
-            const res = assert.commandWorked(st.s.getDB(testCase.dbName).runCommand(cmdObj));
-            command.checkResponseFunc(res);
-        }
-
-        if (testCase.abortOnInitialTry) {
-            abortTransaction(lsid, txnNumber, testCase.isPreparedTxn);
-        } else {
-            commitTransaction(lsid, txnNumber);
-        }
+            if (testCase.abortOnInitialTry) {
+                abortTransaction(lsid, txnNumber, testCase.isPreparedTxn);
+            } else {
+                commitTransaction(lsid, txnNumber);
+            }
+        });
 
         testCase.checkDocsFunc(!testCase.abortOnInitialTry /* isTxnCommitted */);
     }
@@ -398,25 +401,28 @@ function InternalTransactionChunkMigrationTest(storeFindAndModifyImagesInSideCol
         const lsid = getTransactionSessionId(txnType, testCase);
         // Give the session a different txnUUID to simulate a retry from a different mongos.
         lsid.txnUUID = UUID();
-        const txnNumber = getNextTxnNumber(txnType, testCase);
+        runTxnRetryOnLockTimeoutError(() => {
+            const txnNumber = getNextTxnNumber(txnType, testCase);
 
-        for (let i = 0; i < testCase.commands.length; i++) {
-            const command = testCase.commands[i];
+            for (let i = 0; i < testCase.commands.length; i++) {
+                const command = testCase.commands[i];
 
-            if (!isRetryAfterAbort && command.cmdObj.stmtId == -1) {
-                // The transaction has already committed and the statement in this command
-                // is not retryable so do not retry it.
-                continue;
-            }
+                if (!isRetryAfterAbort && command.cmdObj.stmtId == -1) {
+                    // The transaction has already committed and the statement in this command
+                    // is not retryable so do not retry it.
+                    continue;
+                }
 
-            const cmdObj = Object.assign({}, command.cmdObj, {lsid, txnNumber, autocommit: false});
-            if (i == 0) {
-                cmdObj.startTransaction = true;
+                const cmdObj =
+                    Object.assign({}, command.cmdObj, {lsid, txnNumber, autocommit: false});
+                if (i == 0) {
+                    cmdObj.startTransaction = true;
+                }
+                const res = assert.commandWorked(st.s.getDB(testCase.dbName).runCommand(cmdObj));
+                command.checkResponseFunc(res);
             }
-            const res = assert.commandWorked(st.s.getDB(testCase.dbName).runCommand(cmdObj));
-            command.checkResponseFunc(res);
-        }
-        commitTransaction(lsid, txnNumber);
+            commitTransaction(lsid, txnNumber);
+        });
 
         testCase.checkDocsFunc(true /* isTxnCommitted */);
     }
diff --git a/jstests/sharding/internal_txns/libs/fixture_helpers.js b/jstests/sharding/internal_txns/libs/fixture_helpers.js
new file mode 100644
index 00000000000..5e418726fc6
--- /dev/null
+++ b/jstests/sharding/internal_txns/libs/fixture_helpers.js
@@ -0,0 +1,37 @@
+function runTxnRetryOnTransientError(txnFunc) {
+    assert.soon(() => {
+        try {
+            txnFunc();
+            return true;
+        } catch (e) {
+            if (e.hasOwnProperty('errorLabels') &&
+                e.errorLabels.includes('TransientTransactionError') &&
+                e.code != ErrorCodes.NoSuchTransaction) {
+                // Don't retry on a NoSuchTransaction error since it implies the transaction was
+                // aborted so we should propagate the error instead.
+                jsTest.log("Failed to run transaction due to a transient error " + tojson(e));
+                return false;
+            } else {
+                throw e;
+            }
+        }
+    });
+}
+
+function runTxnRetryOnLockTimeoutError(txnFunc) {
+    assert.soon(() => {
+        try {
+            txnFunc();
+            return true;
+        } catch (e) {
+            if (e.hasOwnProperty('errorLabels') &&
+                e.errorLabels.includes('TransientTransactionError') &&
+                e.code == ErrorCodes.LockTimeout) {
+                jsTest.log("Failed to run transaction due to a transient error " + tojson(e));
+                return false;
+            } else {
+                throw e;
+            }
+        }
+    });
+}
diff --git a/jstests/sharding/internal_txns/libs/resharding_test.js b/jstests/sharding/internal_txns/libs/resharding_test.js
index c5ded8792b1..ce4d96b9858 100644
--- a/jstests/sharding/internal_txns/libs/resharding_test.js
+++ b/jstests/sharding/internal_txns/libs/resharding_test.js
@@ -14,6 +14,7 @@
 'use strict';
 
 load("jstests/libs/discover_topology.js");
+load('jstests/sharding/internal_txns/libs/fixture_helpers.js');
 load("jstests/sharding/libs/resharding_test_fixture.js");
 load('jstests/sharding/libs/sharded_transactions_helpers.js');
 
@@ -324,38 +325,25 @@ function InternalTransactionReshardingTest(
         testCase.setUpFunc();
 
         const lsid = getTransactionSessionId(txnType, testCase);
-
-        while (true) {
+        runTxnRetryOnTransientError(() => {
             const txnNumber = getNextTxnNumber(txnType, testCase);
 
-            try {
-                for (let i = 0; i < testCase.commands.length; i++) {
-                    const command = testCase.commands[i];
-                    const cmdObj =
-                        Object.assign({}, command.cmdObj, {lsid, txnNumber, autocommit: false});
-                    if (i == 0) {
-                        cmdObj.startTransaction = true;
-                    }
-                    const res = assert.commandWorked(mongosConn.getDB(kDbName).runCommand(cmdObj));
-                    command.checkResponseFunc(res);
-                }
-
-                if (testCase.abortOnInitialTry) {
-                    abortTransaction(lsid, txnNumber, testCase.isPreparedTxn);
-                } else {
-                    commitTransaction(lsid, txnNumber);
-                }
-                break;
-            } catch (e) {
-                if (e.hasOwnProperty('errorLabels') &&
-                    e.errorLabels.includes('TransientTransactionError') &&
-                    e.code != ErrorCodes.NoSuchTransaction) {
-                    jsTest.log("Failed to run transaction due to a transient error " + tojson(e));
-                } else {
-                    throw e;
+            for (let i = 0; i < testCase.commands.length; i++) {
+                const command = testCase.commands[i];
+                const cmdObj =
+                    Object.assign({}, command.cmdObj, {lsid, txnNumber, autocommit: false});
+                if (i == 0) {
+                    cmdObj.startTransaction = true;
                 }
+                const res = assert.commandWorked(mongosConn.getDB(kDbName).runCommand(cmdObj));
+                command.checkResponseFunc(res);
             }
-        }
+            if (testCase.abortOnInitialTry) {
+                abortTransaction(lsid, txnNumber, testCase.isPreparedTxn);
+            } else {
+                commitTransaction(lsid, txnNumber);
+            }
+        });
 
         testCase.checkDocsFunc(!testCase.abortOnInitialTry /* isTxnCommitted */);
     }
@@ -381,50 +369,36 @@ function InternalTransactionReshardingTest(
         const lsid = getTransactionSessionId(txnType, testCase);
         // Give the session a different txnUUID to simulate a retry from a different mongos.
         lsid.txnUUID = UUID();
-
-        while (true) {
+        runTxnRetryOnTransientError(() => {
             const txnNumber = getNextTxnNumber(txnType, testCase);
 
-            try {
-                for (let i = 0; i < testCase.commands.length; i++) {
-                    const command = testCase.commands[i];
+            for (let i = 0; i < testCase.commands.length; i++) {
+                const command = testCase.commands[i];
 
-                    if (!isRetryAfterAbort && command.cmdObj.stmtId == -1) {
-                        // The transaction has already committed and the statement in this command
-                        // is not retryable so do not retry it.
-                        continue;
-                    }
+                if (!isRetryAfterAbort && command.cmdObj.stmtId == -1) {
+                    // The transaction has already committed and the statement in this command
+                    // is not retryable so do not retry it.
+                    continue;
+                }
 
-                    const cmdObj =
-                        Object.assign({}, command.cmdObj, {lsid, txnNumber, autocommit: false});
-                    if (i == 0) {
-                        cmdObj.startTransaction = true;
-                    }
-                    const res = mongosConn.getDB(kDbName).runCommand(cmdObj);
-
-                    if (expectRetryToSucceed) {
-                        assert.commandWorked(res);
-                        command.checkResponseFunc(res);
-                    } else {
-                        assert.commandFailedWithCode(res, ErrorCodes.IncompleteTransactionHistory);
-                        return;
-                    }
+                const cmdObj =
+                    Object.assign({}, command.cmdObj, {lsid, txnNumber, autocommit: false});
+                if (i == 0) {
+                    cmdObj.startTransaction = true;
                 }
+                const res = mongosConn.getDB(kDbName).runCommand(cmdObj);
 
-                commitTransaction(lsid, txnNumber);
-                break;
-            } catch (e) {
-                if (e.hasOwnProperty('errorLabels') &&
-                    e.errorLabels.includes('TransientTransactionError') &&
-                    e.code != ErrorCodes.NoSuchTransaction) {
-                    jsTest.log("Failed to run transaction due to a transient error " + tojson(e));
+                if (expectRetryToSucceed) {
+                    assert.commandWorked(res);
+                    command.checkResponseFunc(res);
                 } else {
-                    throw e;
+                    assert.commandFailedWithCode(res, ErrorCodes.IncompleteTransactionHistory);
+                    return;
                 }
             }
-        }
-
-        testCase.checkDocsFunc(true /* isTxnCommitted */);
+            commitTransaction(lsid, txnNumber);
+            testCase.checkDocsFunc(true /* isTxnCommitted */);
+        });
     }
 
     /*
diff --git a/jstests/sharding/internal_txns/libs/retryable_internal_transaction_test.js b/jstests/sharding/internal_txns/libs/retryable_internal_transaction_test.js
index 6836540c441..0332843f72c 100644
--- a/jstests/sharding/internal_txns/libs/retryable_internal_transaction_test.js
+++ b/jstests/sharding/internal_txns/libs/retryable_internal_transaction_test.js
@@ -3,6 +3,7 @@
  */
 'use strict';
 
+load('jstests/sharding/internal_txns/libs/fixture_helpers.js');
 load('jstests/sharding/libs/sharded_transactions_helpers.js');
 
 function getOplogEntriesForTxnWithRetries(rs, lsid, txnNumber) {
@@ -58,6 +59,12 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
         return {id: UUID(), txnNumber: NumberLong(0), txnUUID: UUID()};
     }
 
+    function setTxnFields(cmdObj, lsid, txnNumber) {
+        cmdObj.lsid = lsid;
+        cmdObj.txnNumber = NumberLong(txnNumber);
+        cmdObj.autocommit = false;
+    }
+
     const getRandomOplogEntryLocation = function() {
         const locations = Object.values(kOplogEntryLocation);
         return locations[Math.floor(Math.random() * locations.length)];
@@ -106,60 +113,123 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
         assert.commandWorked(mongosTestDB.adminCommand(commitCmdObj));
     }
 
-    function testRetryBasic(cmdObj, lsid, txnNumber, {
-        expectRetryToSucceed,
+    function testNonRetryableBasic(cmdObj, {
+        txnOptions,
+        testMode,
         expectFindAndModifyImageInOplog,
-        expectFindAndModifyImageInSideCollection,
+        expectFindAndModifyImageInSideCollection
+    }) {
+        // A findAndModify write statement in a non-retryable transaction will not generate a
+        // pre/post image.
+        assert(!expectFindAndModifyImageInOplog);
+        assert(!expectFindAndModifyImageInSideCollection);
+        jsTest.log("Testing retrying a non-retryable internal transaction");
+        cmdObj.startTransaction = true;
+
+        // Initial try.
+        const initialLsid = txnOptions.makeSessionIdFunc();
+        let initialTxnNumber = 0;
+        runTxnRetryOnLockTimeoutError(() => {
+            initialTxnNumber++;
+            setTxnFields(cmdObj, initialLsid, initialTxnNumber);
+            assert.commandWorked(mongosTestDB.runCommand(cmdObj));
+            commitTransaction(initialLsid, initialTxnNumber, txnOptions.isPreparedTxn);
+        });
+
+        const initialTxnStateBefore = getTransactionState(initialLsid, initialTxnNumber);
+        assert.eq(initialTxnStateBefore.oplogEntries.length,
+                  (txnOptions.isPreparedTxn ? 2 : 1) + (expectFindAndModifyImageInOplog ? 1 : 0),
+                  initialTxnStateBefore.oplogEntries);
+        assert.eq(initialTxnStateBefore.imageEntries.length,
+                  expectFindAndModifyImageInSideCollection ? 1 : 0,
+                  initialTxnStateBefore.imageEntries);
+        assertConsistentImageEntries(initialLsid, initialTxnNumber);
+
+        setUpTestMode(testMode);
+
+        // Retry.
+        assert.commandFailedWithCode(mongosTestDB.runCommand(cmdObj),
+                                     ErrorCodes.ConflictingOperationInProgress);
+
+        const initialTxnStateAfter = getTransactionState(initialLsid, initialTxnNumber);
+        assert.eq(initialTxnStateBefore.oplogEntries, initialTxnStateAfter.oplogEntries);
+        assert.eq(initialTxnStateBefore.txnEntries, initialTxnStateAfter.txnEntries);
+        assert.eq(initialTxnStateBefore.imageEntries, initialTxnStateAfter.imageEntries);
+
+        assert.commandWorked(mongosTestColl.remove({}));
+    }
+
+    function testRetryableBasic(cmdObj, {
         txnOptions,
         testMode,
-        checkFunc
+        expectFindAndModifyImageInOplog,
+        expectFindAndModifyImageInSideCollection,
+        checkRetryResponseFunc
     }) {
         assert(!expectFindAndModifyImageInOplog || !expectFindAndModifyImageInSideCollection);
+        jsTest.log(
+            "Testing retrying a retryable internal transaction with one applyOps oplog entry");
+        cmdObj.startTransaction = true;
 
-        const cmdObjToRetry = Object.assign(cmdObj, {
-            lsid: lsid,
-            txnNumber: NumberLong(txnNumber),
-            startTransaction: true,
-            autocommit: false,
+        // Initial try.
+        const initialLsid = txnOptions.makeSessionIdFunc();
+        let initialTxnNumber = 0;
+        let initialRes;
+        runTxnRetryOnLockTimeoutError(() => {
+            initialTxnNumber++;
+            setTxnFields(cmdObj, initialLsid, initialTxnNumber);
+            initialRes = assert.commandWorked(mongosTestDB.runCommand(cmdObj));
+            commitTransaction(initialLsid, initialTxnNumber, txnOptions.isPreparedTxn);
         });
 
-        const initialRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
-        commitTransaction(lsid, txnNumber, txnOptions.isPreparedTxn);
-
-        const txnStateBeforeRetry = getTransactionState(lsid, txnNumber);
-        assert.eq(txnStateBeforeRetry.oplogEntries.length,
+        const initialTxnStateBefore = getTransactionState(initialLsid, initialTxnNumber);
+        assert.eq(initialTxnStateBefore.oplogEntries.length,
                   (txnOptions.isPreparedTxn ? 2 : 1) + (expectFindAndModifyImageInOplog ? 1 : 0),
-                  txnStateBeforeRetry.oplogEntries);
-        assert.eq(txnStateBeforeRetry.imageEntries.length,
+                  initialTxnStateBefore.oplogEntries);
+        assert.eq(initialTxnStateBefore.imageEntries.length,
                   expectFindAndModifyImageInSideCollection ? 1 : 0,
-                  txnStateBeforeRetry.imageEntries);
-        assertConsistentImageEntries(lsid, txnNumber);
+                  initialTxnStateBefore.imageEntries);
+        assertConsistentImageEntries(initialLsid, initialTxnNumber);
 
         setUpTestMode(testMode);
 
-        const retryRes = mongosTestDB.runCommand(cmdObjToRetry);
-        if (expectRetryToSucceed) {
-            assert.commandWorked(retryRes);
-            checkFunc(initialRes, retryRes);
-            commitTransaction(lsid, txnNumber, txnOptions.isPreparedTxn, true /* isRetry */);
-        } else {
-            assert.commandFailedWithCode(retryRes, ErrorCodes.ConflictingOperationInProgress);
-        }
+        // Retry in the initial internal transaction. No need to commit since the transaction has
+        // already committed.
+        const retryRes = assert.commandWorked(mongosTestDB.runCommand(cmdObj));
+        checkRetryResponseFunc(initialRes, retryRes);
+
+        // Retry in a different internal transaction (running in an internal session with a
+        // different txnUUID) to simulate a retry from a different mongos.
+        const retryLsid = Object.assign({}, initialLsid, {txnUUID: UUID()});
+        let retryTxnNumber = 0;
+        runTxnRetryOnLockTimeoutError(() => {
+            retryTxnNumber++;
+            setTxnFields(cmdObj, retryLsid, retryTxnNumber);
+            const retryRes = assert.commandWorked(mongosTestDB.runCommand(cmdObj));
+            checkRetryResponseFunc(initialRes, retryRes);
+            commitTransaction(
+                retryLsid, retryTxnNumber, txnOptions.isPreparedTxn, true /* isRetry */);
+        });
 
-        const txnStateAfterRetry = getTransactionState(lsid, txnNumber);
-        assert.eq(txnStateBeforeRetry.oplogEntries, txnStateAfterRetry.oplogEntries);
-        assert.eq(txnStateBeforeRetry.txnEntries, txnStateAfterRetry.txnEntries);
-        assert.eq(txnStateBeforeRetry.imageEntries, txnStateAfterRetry.imageEntries);
+        const initialTxnStateAfter = getTransactionState(initialLsid, initialTxnNumber);
+        assert.eq(initialTxnStateBefore.oplogEntries, initialTxnStateAfter.oplogEntries);
+        assert.eq(initialTxnStateBefore.txnEntries, initialTxnStateAfter.txnEntries);
+        assert.eq(initialTxnStateBefore.imageEntries, initialTxnStateAfter.imageEntries);
+        // The retry should not generate any persisted transaction state.
+        const retryTxnState = getTransactionState(retryLsid, retryTxnNumber);
+        assert.eq(retryTxnState.oplogEntries.length, 0, retryTxnState);
+        assert.eq(retryTxnState.txnEntries.length, 0, retryTxnState);
+        assert.eq(retryTxnState.imageEntries.length, 0, retryTxnState);
 
         assert.commandWorked(mongosTestColl.remove({}));
     }
 
-    function testRetryLargeTxn(cmdObj, lsid, txnNumber, {
-        expectFindAndModifyImageInOplog,
-        expectFindAndModifyImageInSideCollection,
+    function testRetryableLargeTxn(cmdObj, {
         txnOptions,
         testMode,
-        checkFunc
+        expectFindAndModifyImageInOplog,
+        expectFindAndModifyImageInSideCollection,
+        checkRetryResponseFunc
     }) {
         assert(!expectFindAndModifyImageInOplog || !expectFindAndModifyImageInSideCollection);
 
@@ -171,18 +241,12 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
             return {
                 insert: kCollName,
                 documents: [Object.assign(doc, {y: new Array(kSize10MB).join("a")})],
-                lsid: lsid,
-                txnNumber: NumberLong(txnNumber),
                 stmtId: NumberInt(stmtId++),
-                autocommit: false
             };
         };
         let makeCmdObjToRetry = (cmdObj) => {
             const cmdObjToRetry = Object.assign(cmdObj, {
-                lsid: lsid,
-                txnNumber: NumberLong(txnNumber),
                 stmtId: NumberInt(stmtId),
-                autocommit: false,
             });
             if (cmdObjToRetry.documents) {
                 stmtId += cmdObjToRetry.documents.length;
@@ -198,94 +262,131 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
 
         const insertCmdObj0 =
             Object.assign(makeInsertCmdObj({_id: -100, x: 100}), {startTransaction: true});
-        const cmdObjToRetry = makeCmdObjToRetry(cmdObj);
         const insertCmdObj1 = makeInsertCmdObj({_id: -200, x: -200});
         const insertCmdObj2 = makeInsertCmdObj({_id: -300, x: -300});
+        const cmdObjToRetry = makeCmdObjToRetry(cmdObj);
         const insertCmdObjs = [insertCmdObj0, insertCmdObj1, insertCmdObj2];
 
+        // Initial try.
+        const initialLsid = txnOptions.makeSessionIdFunc();
+        let initialTxnNumber = 0;
         let initialRes;
-        if (txnOptions.oplogEntryLocation == kOplogEntryLocation.kLast) {
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj0));
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj1));
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj2));
-            initialRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
-        } else if (txnOptions.oplogEntryLocation == kOplogEntryLocation.kMiddle) {
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj0));
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj1));
-            initialRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj2));
-        } else {
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj0));
-            initialRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj1));
-            assert.commandWorked(mongosTestDB.runCommand(insertCmdObj2));
-        }
-        commitTransaction(lsid, txnNumber, txnOptions.isPreparedTxn);
+        runTxnRetryOnLockTimeoutError(() => {
+            initialTxnNumber++;
+            setTxnFields(cmdObjToRetry, initialLsid, initialTxnNumber);
+            insertCmdObjs.forEach(cmdObj => setTxnFields(cmdObj, initialLsid, initialTxnNumber));
+            if (txnOptions.oplogEntryLocation == kOplogEntryLocation.kLast) {
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj0));
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj1));
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj2));
+                initialRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
+            } else if (txnOptions.oplogEntryLocation == kOplogEntryLocation.kMiddle) {
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj0));
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj1));
+                initialRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj2));
+            } else {
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj0));
+                initialRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj1));
+                assert.commandWorked(mongosTestDB.runCommand(insertCmdObj2));
+            }
+            commitTransaction(initialLsid, initialTxnNumber, txnOptions.isPreparedTxn);
+        });
 
-        const txnStateBeforeRetry = getTransactionState(lsid, txnNumber);
-        assert.eq(txnStateBeforeRetry.oplogEntries.length,
+        const initialTxnStateBefore = getTransactionState(initialLsid, initialTxnNumber);
+        assert.eq(initialTxnStateBefore.oplogEntries.length,
                   (txnOptions.isPreparedTxn ? insertCmdObjs.length + 1 : insertCmdObjs.length) +
                       (expectFindAndModifyImageInOplog ? 1 : 0));
-        assert.eq(txnStateBeforeRetry.imageEntries.length,
+        assert.eq(initialTxnStateBefore.imageEntries.length,
                   expectFindAndModifyImageInSideCollection ? 1 : 0,
-                  txnStateBeforeRetry.imageEntries);
-        assertConsistentImageEntries(lsid, txnNumber);
+                  initialTxnStateBefore.imageEntries);
+        assertConsistentImageEntries(initialLsid, initialTxnNumber);
 
         setUpTestMode(testMode);
 
-        insertCmdObjs.forEach(insertCmdObj => {
-            const retryRes = assert.commandWorked(mongosTestDB.runCommand(insertCmdObj));
-            assert.eq(retryRes.n, 1);
+        // Retry in the initial internal transaction. No need to commit since the transaction has
+        // already committed.
+        const retryRes = assert.commandWorked(mongosTestDB.runCommand(cmdObj));
+        checkRetryResponseFunc(initialRes, retryRes);
+
+        // Retry in a different internal transaction (running in an internal session with a
+        // different txnUUID) to simulate a retry from a different mongos.
+        const retryLsid = Object.assign({}, initialLsid, {txnUUID: UUID()});
+        let retryTxnNumber = 0;
+        runTxnRetryOnLockTimeoutError(() => {
+            retryTxnNumber++;
+            setTxnFields(cmdObjToRetry, retryLsid, retryTxnNumber);
+            insertCmdObjs.forEach(cmdObj => setTxnFields(cmdObj, retryLsid, retryTxnNumber));
+            insertCmdObjs.forEach(insertCmdObj => {
+                const retryRes = assert.commandWorked(mongosTestDB.runCommand(insertCmdObj));
+                assert.eq(retryRes.n, 1);
+            });
+            const retryRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
+            checkRetryResponseFunc(initialRes, retryRes);
+            commitTransaction(
+                retryLsid, retryTxnNumber, txnOptions.isPreparedTxn, true /* isRetry */);
         });
-        const retryRes = assert.commandWorked(mongosTestDB.runCommand(cmdObjToRetry));
-        checkFunc(initialRes, retryRes);
-        commitTransaction(lsid, txnNumber, txnOptions.isPreparedTxn, true /* isRetry */);
 
-        const txnStateAfterRetry = getTransactionState(lsid, txnNumber);
-        assert.eq(txnStateBeforeRetry.oplogEntries, txnStateAfterRetry.oplogEntries);
-        assert.eq(txnStateBeforeRetry.txnEntries, txnStateAfterRetry.txnEntries);
-        assert.eq(txnStateBeforeRetry.imageEntries, txnStateBeforeRetry.imageEntries);
+        const initialTxnStateAfter = getTransactionState(initialLsid, initialTxnNumber);
+        assert.eq(initialTxnStateBefore.oplogEntries, initialTxnStateAfter.oplogEntries);
+        assert.eq(initialTxnStateBefore.txnEntries, initialTxnStateAfter.txnEntries);
+        assert.eq(initialTxnStateBefore.imageEntries, initialTxnStateAfter.imageEntries);
+        // The retry should not generate any persisted transaction state.
+        const retryTxnState = getTransactionState(retryLsid, retryTxnNumber);
+        assert.eq(retryTxnState.oplogEntries.length, 0, retryTxnState);
+        assert.eq(retryTxnState.txnEntries.length, 0, retryTxnState);
+        assert.eq(retryTxnState.imageEntries.length, 0, retryTxnState);
 
         assert.commandWorked(mongosTestColl.remove({}));
     }
 
-    function testRetry(cmdObj, lsid, txnNumber, {
+    function testRetry(cmdObj, {
+        txnOptions,
+        testMode,
         expectRetryToSucceed,
         expectFindAndModifyImageInOplog,
         expectFindAndModifyImageInSideCollection,
-        txnOptions,
-        testMode,
-        checkFunc
+        checkRetryResponseFunc
     }) {
-        const testRetryFunc = txnOptions.isLargeTxn ? testRetryLargeTxn : testRetryBasic;
-        testRetryFunc(cmdObj, lsid, txnNumber, {
+        const testRetryFunc = (() => {
+            if (txnOptions.isLargeTxn) {
+                // This fixture only supports testing large retryable transactions since when a
+                // non-retryable transaction is retried, it fails before the it even starts so
+                // testing with a large transaction doesn't add any test coverage.
+                assert(expectRetryToSucceed);
+                return testRetryableLargeTxn;
+            }
+            return expectRetryToSucceed ? testRetryableBasic : testNonRetryableBasic;
+        })();
+        testRetryFunc(cmdObj, {
+            txnOptions,
+            testMode,
             expectRetryToSucceed,
             expectFindAndModifyImageInOplog,
             expectFindAndModifyImageInSideCollection,
-            txnOptions,
-            testMode,
-            checkFunc
+            checkRetryResponseFunc
         });
     }
 
-    function testRetryInserts(lsid, txnNumber, {expectRetryToSucceed, txnOptions, testMode}) {
+    function testRetryInserts({txnOptions, testMode, expectRetryToSucceed}) {
         jsTest.log("Testing batched inserts");
 
         const insertCmdObj = {
             insert: kCollName,
             documents: [{_id: 0, x: 0}, {_id: 1, x: 1}],
         };
-        const checkFunc = (initialRes, retryRes) => {
+        const checkRetryResponseFunc = (initialRes, retryRes) => {
             assert.eq(initialRes.n, retryRes.n);
             insertCmdObj.documents.forEach(doc => {
                 assert.eq(mongosTestColl.count(doc), 1);
             });
         };
-        testRetry(
-            insertCmdObj, lsid, txnNumber, {expectRetryToSucceed, txnOptions, testMode, checkFunc});
+        testRetry(insertCmdObj,
+                  {txnOptions, testMode, expectRetryToSucceed, checkRetryResponseFunc});
     }
 
-    function testRetryUpdates(lsid, txnNumber, {expectRetryToSucceed, txnOptions, testMode}) {
+    function testRetryUpdates({txnOptions, testMode, expectRetryToSucceed}) {
         jsTest.log("Testing batched updates");
 
         assert.commandWorked(mongosTestColl.insert([{_id: 0, x: 0}, {_id: 1, x: 1}]));
@@ -295,7 +396,7 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
             updates:
                 [{q: {_id: 0, x: 0}, u: {$inc: {x: 10}}}, {q: {_id: 1, x: 1}, u: {$inc: {x: 10}}}],
         };
-        const checkFunc = (initialRes, retryRes) => {
+        const checkRetryResponseFunc = (initialRes, retryRes) => {
             assert.eq(initialRes.nModified, retryRes.nModified);
             updateCmdObj.updates.forEach(updateArgs => {
                 const originalDoc = updateArgs.q;
@@ -305,11 +406,11 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
                 assert.eq(mongosTestColl.count(updatedDoc), 1);
             });
         };
-        testRetry(
-            updateCmdObj, lsid, txnNumber, {expectRetryToSucceed, txnOptions, testMode, checkFunc});
+        testRetry(updateCmdObj,
+                  {txnOptions, testMode, expectRetryToSucceed, checkRetryResponseFunc});
     }
 
-    function testRetryDeletes(lsid, txnNumber, {expectRetryToSucceed, txnOptions, testMode}) {
+    function testRetryDeletes({txnOptions, testMode, expectRetryToSucceed}) {
         jsTest.log("Testing batched deletes");
 
         assert.commandWorked(mongosTestColl.insert([{_id: 0, x: 0}, {_id: 1, x: 1}]));
@@ -318,50 +419,50 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
             delete: kCollName,
             deletes: [{q: {_id: 0, x: 0}, limit: 1}, {q: {_id: 1, x: 1}, limit: 1}],
         };
-        const checkFunc = (initialRes, retryRes) => {
+        const checkRetryResponseFunc = (initialRes, retryRes) => {
             assert.eq(initialRes.n, retryRes.n);
             deleteCmdObj.deletes.forEach(deleteArgs => {
                 assert.eq(mongosTestColl.count(deleteArgs.q), 0);
             });
         };
-        testRetry(
-            deleteCmdObj, lsid, txnNumber, {expectRetryToSucceed, txnOptions, testMode, checkFunc});
+        testRetry(deleteCmdObj,
+                  {txnOptions, testMode, expectRetryToSucceed, checkRetryResponseFunc});
     }
 
-    function testRetryFindAndModify(findAndModifyCmdObj, lsid, txnNumber, {
-        expectRetryToSucceed,
-        expectFindAndModifyImage,
+    function testRetryFindAndModify(findAndModifyCmdObj, {
         txnOptions,
         testMode,
         enableFindAndModifyImageCollection,
+        expectRetryToSucceed,
+        expectFindAndModifyImage,
     }) {
         const shard0Primary = st.rs0.getPrimary();
         assert.commandWorked(shard0Primary.adminCommand({
             setParameter: 1,
             storeFindAndModifyImagesInSideCollection: enableFindAndModifyImageCollection
         }));
-        const checkFunc = (initialRes, retryRes) => {
+        const checkRetryResponseFunc = (initialRes, retryRes) => {
             assert.eq(initialRes.lastErrorObject, retryRes.lastErrorObject);
             assert.eq(initialRes.value, retryRes.value);
         };
 
-        testRetry(findAndModifyCmdObj, lsid, txnNumber, {
+        testRetry(findAndModifyCmdObj, {
+            txnOptions,
+            testMode,
             expectRetryToSucceed,
             expectFindAndModifyImageInOplog: expectRetryToSucceed && expectFindAndModifyImage &&
                 !enableFindAndModifyImageCollection,
             expectFindAndModifyImageInSideCollection: expectRetryToSucceed &&
                 expectFindAndModifyImage && enableFindAndModifyImageCollection,
-            txnOptions,
-            testMode,
-            checkFunc
+            checkRetryResponseFunc
         });
     }
 
-    function testRetryFindAndModifyUpsert(lsid, txnNumber, {
-        expectRetryToSucceed,
+    function testRetryFindAndModifyUpsert({
         txnOptions,
         testMode,
         enableFindAndModifyImageCollection,
+        expectRetryToSucceed,
     }) {
         jsTest.log(
             "Testing findAndModify upsert (i.e. no preImage or postImage) with enableFindAndModifyImageCollection: " +
@@ -374,20 +475,20 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
             upsert: true,
         };
         const expectFindAndModifyImage = false;  // no pre or post image.
-        testRetryFindAndModify(findAndModifyCmdObj, lsid, txnNumber, {
-            expectRetryToSucceed,
-            expectFindAndModifyImage,
+        testRetryFindAndModify(findAndModifyCmdObj, {
             txnOptions,
             testMode,
             enableFindAndModifyImageCollection,
+            expectFindAndModifyImage,
+            expectRetryToSucceed,
         });
     }
 
-    function testRetryFindAndModifyUpdateWithPreImage(lsid, txnNumber, {
-        expectRetryToSucceed,
+    function testRetryFindAndModifyUpdateWithPreImage({
         txnOptions,
         testMode,
         enableFindAndModifyImageCollection,
+        expectRetryToSucceed,
     }) {
         jsTest.log(
             "Testing findAndModify update with preImage with enableFindAndModifyImageCollection: " +
@@ -400,20 +501,20 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
             update: {$inc: {x: -10}},
         };
         const expectFindAndModifyImage = true;
-        testRetryFindAndModify(findAndModifyCmdObj, lsid, txnNumber, {
-            expectRetryToSucceed,
-            expectFindAndModifyImage,
+        testRetryFindAndModify(findAndModifyCmdObj, {
             txnOptions,
             testMode,
             enableFindAndModifyImageCollection,
+            expectFindAndModifyImage,
+            expectRetryToSucceed,
         });
     }
 
-    function testRetryFindAndModifyUpdateWithPostImage(lsid, txnNumber, {
-        expectRetryToSucceed,
+    function testRetryFindAndModifyUpdateWithPostImage({
         txnOptions,
         testMode,
         enableFindAndModifyImageCollection,
+        expectRetryToSucceed,
     }) {
         jsTest.log(
             "Testing findAndModify update with postImage with enableFindAndModifyImageCollection: " +
@@ -427,20 +528,20 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
             new: true,
         };
         const expectFindAndModifyImage = true;
-        testRetryFindAndModify(findAndModifyCmdObj, lsid, txnNumber, {
-            expectRetryToSucceed,
-            expectFindAndModifyImage,
+        testRetryFindAndModify(findAndModifyCmdObj, {
             txnOptions,
             testMode,
             enableFindAndModifyImageCollection,
+            expectFindAndModifyImage,
+            expectRetryToSucceed,
         });
     }
 
-    function testRetryFindAndModifyRemove(lsid, txnNumber, {
-        expectRetryToSucceed,
+    function testRetryFindAndModifyRemove({
         txnOptions,
         testMode,
         enableFindAndModifyImageCollection,
+        expectRetryToSucceed,
     }) {
         jsTest.log(
             "Testing findAndModify remove (i.e. with preImage) with enableFindAndModifyImageCollection: " +
@@ -453,84 +554,84 @@ function RetryableInternalTransactionTest(collectionOptions = {}) {
             remove: true,
         };
         const expectFindAndModifyImage = true;
-        testRetryFindAndModify(findAndModifyCmdObj, lsid, txnNumber, {
-            expectRetryToSucceed,
-            enableFindAndModifyImageCollection,
+        testRetryFindAndModify(findAndModifyCmdObj, {
             txnOptions,
             testMode,
+            enableFindAndModifyImageCollection,
             expectFindAndModifyImage,
+            expectRetryToSucceed,
         });
     }
 
     this.TestMode = kTestMode;
 
-    this.runInsertUpdateDeleteTests = function(lsid, testOptions) {
-        testOptions.lastUsedTxnNumber =
-            testOptions.lastUsedTxnNumber ? testOptions.lastUsedTxnNumber : 0;
-        testOptions.txnOptions = testOptions.txnOptions ? testOptions.txnOptions : {};
+    this.runInsertUpdateDeleteTests = function(testOptions) {
         if (testOptions.txnOptions.isLargeTxn) {
             testOptions.txnOptions.oplogEntryLocation = getRandomOplogEntryLocation();
         }
         jsTest.log(`Testing insert, update and delete with options: ${tojson(testOptions)}`);
 
-        testRetryInserts(lsid, testOptions.lastUsedTxnNumber++, testOptions);
-        testRetryUpdates(lsid, testOptions.lastUsedTxnNumber++, testOptions);
-        testRetryDeletes(lsid, testOptions.lastUsedTxnNumber++, testOptions);
+        testRetryInserts(testOptions);
+        testRetryUpdates(testOptions);
+        testRetryDeletes(testOptions);
     };
 
-    function runFindAndModifyTests(lsid, testOptions) {
-        testOptions.lastUsedTxnNumber =
-            testOptions.lastUsedTxnNumber ? testOptions.lastUsedTxnNumber : 0;
-        testOptions.txnOptions = testOptions.txnOptions ? testOptions.txnOptions : {};
+    function runFindAndModifyTests(testOptions) {
         if (testOptions.txnOptions.isLargeTxn) {
             testOptions.txnOptions.oplogEntryLocation = getRandomOplogEntryLocation();
         }
         jsTest.log(`Testing findAndModify with options: ${tojson(testOptions)}`);
 
-        testRetryFindAndModifyUpsert(lsid, testOptions.lastUsedTxnNumber++, testOptions);
-        testRetryFindAndModifyUpdateWithPreImage(
-            lsid, testOptions.lastUsedTxnNumber++, testOptions);
-        testRetryFindAndModifyUpdateWithPostImage(
-            lsid, testOptions.lastUsedTxnNumber++, testOptions);
-        testRetryFindAndModifyRemove(lsid, testOptions.lastUsedTxnNumber++, testOptions);
+        testRetryFindAndModifyUpsert(testOptions);
+        testRetryFindAndModifyUpdateWithPreImage(testOptions);
+        testRetryFindAndModifyUpdateWithPostImage(testOptions);
+        testRetryFindAndModifyRemove(testOptions);
     }
 
-    this.runFindAndModifyTestsEnableImageCollection = function(lsid, testOptions) {
+    this.runFindAndModifyTestsEnableImageCollection = function(testOptions) {
         testOptions.enableFindAndModifyImageCollection = true;
-        runFindAndModifyTests(lsid, testOptions);
+        runFindAndModifyTests(testOptions);
     };
 
-    this.runFindAndModifyTestsDisableImageCollection = function(lsid, testOptions) {
+    this.runFindAndModifyTestsDisableImageCollection = function(testOptions) {
         testOptions.enableFindAndModifyImageCollection = false;
-        runFindAndModifyTests(lsid, testOptions);
+        runFindAndModifyTests(testOptions);
     };
 
     this.runTestsForAllUnpreparedRetryableInternalTransactionTypes = function(runTestsFunc,
                                                                               testMode) {
+        const makeSessionIdFunc = makeSessionIdForRetryableInternalTransaction;
         const expectRetryToSucceed = true;
 
-        runTestsFunc(makeSessionIdForRetryableInternalTransaction(), {
-            expectRetryToSucceed,
-            txnOptions: {isPreparedTxn: false, isLargeTxn: false},
-            testMode
+        runTestsFunc({
+            txnOptions: {makeSessionIdFunc, isPreparedTxn: false, isLargeTxn: false},
+            testMode,
+            expectRetryToSucceed
         });
 
-        runTestsFunc(
-            makeSessionIdForRetryableInternalTransaction(),
-            {expectRetryToSucceed, txnOptions: {isPreparedTxn: false, isLargeTxn: true}, testMode});
+        runTestsFunc({
+            txnOptions: {makeSessionIdFunc, isPreparedTxn: false, isLargeTxn: true},
+            testMode,
+            expectRetryToSucceed
+        });
     };
 
     this.runTestsForAllPreparedRetryableInternalTransactionTypes = function(runTestsFunc,
                                                                             testMode) {
+        const makeSessionIdFunc = makeSessionIdForRetryableInternalTransaction;
         const expectRetryToSucceed = true;
 
-        runTestsFunc(
-            makeSessionIdForRetryableInternalTransaction(),
-            {expectRetryToSucceed, txnOptions: {isPreparedTxn: true, isLargeTxn: false}, testMode});
+        runTestsFunc({
+            txnOptions: {makeSessionIdFunc, isPreparedTxn: true, isLargeTxn: false},
+            testMode,
+            expectRetryToSucceed
+        });
 
-        runTestsFunc(
-            makeSessionIdForRetryableInternalTransaction(),
-            {expectRetryToSucceed, txnOptions: {isPreparedTxn: true, isLargeTxn: true}, testMode});
+        runTestsFunc({
+            txnOptions: {makeSessionIdFunc, isPreparedTxn: true, isLargeTxn: true},
+            testMode,
+            expectRetryToSucceed
+        });
     };
 
     this.runTestsForAllRetryableInternalTransactionTypes = function(runTestsFunc, testMode) {
diff --git a/jstests/sharding/internal_txns/overwrite_txns.js b/jstests/sharding/internal_txns/overwrite_txns.js
new file mode 100644
index 00000000000..baf7abd7931
--- /dev/null
+++ b/jstests/sharding/internal_txns/overwrite_txns.js
@@ -0,0 +1,292 @@
+/*
+ * Tests when internal transactions overwrite existing transactions.
+ *
+ * @tags: [requires_fcv_60, uses_transactions]
+ */
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js");
+
+const st = new ShardingTest({shards: 1, rs: {nodes: 2}});
+
+const kDbName = "testDb";
+const kCollName = "testColl";
+const testDB = st.rs0.getPrimary().getDB(kDbName);
+assert.commandWorked(testDB[kCollName].insert({x: 1}));  // Set up the collection.
+
+(() => {
+    jsTest.log("Verify in progress child transactions are aborted by higher txnNumbers");
+
+    let clientTxnNumber = 5;
+    const clientSession = {id: UUID()};
+    const retryableChildSession = {
+        id: clientSession.id,
+        txnUUID: UUID(),
+        txnNumber: NumberLong(clientTxnNumber)
+    };
+    const nonRetryableChildSession = {id: clientSession.id, txnUUID: UUID()};
+
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: clientSession,
+        txnNumber: NumberLong(clientTxnNumber),
+        startTransaction: true,
+        autocommit: false
+    }));
+
+    // A new child transaction should abort an existing client transaction.
+    clientTxnNumber++;
+    retryableChildSession.txnNumber = NumberLong(clientTxnNumber);
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: retryableChildSession,
+        txnNumber: NumberLong(0),
+        startTransaction: true,
+        autocommit: false
+    }));
+    // The client transaction should have been aborted.
+    assert.commandFailedWithCode(testDB.adminCommand({
+        commitTransaction: 1,
+        lsid: clientSession,
+        txnNumber: NumberLong(clientTxnNumber - 1),
+        autocommit: false
+    }),
+                                 ErrorCodes.TransactionTooOld);
+
+    // A non-retryable child transaction shouldn't affect retryable operations.
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: nonRetryableChildSession,
+        txnNumber: NumberLong(0),
+        startTransaction: true,
+        autocommit: false
+    }));
+    // The retryable child transaction should still be open.
+    assert.commandWorked(testDB.runCommand({
+        find: kCollName,
+        lsid: retryableChildSession,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }));
+
+    // A new child transaction should abort a lower child transaction.
+    clientTxnNumber++;
+    let retryableChildSessionCopy = Object.merge({}, retryableChildSession);
+    retryableChildSession.txnNumber = NumberLong(clientTxnNumber);
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: retryableChildSession,
+        txnNumber: NumberLong(0),
+        startTransaction: true,
+        autocommit: false
+    }));
+    // The child transaction should have been aborted.
+    assert.commandFailedWithCode(testDB.adminCommand({
+        commitTransaction: 1,
+        lsid: retryableChildSessionCopy,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }),
+                                 ErrorCodes.TransactionTooOld);
+
+    // A new client transaction should abort a lower child transaction.
+    clientTxnNumber++;
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: clientSession,
+        txnNumber: NumberLong(clientTxnNumber),
+        startTransaction: true,
+        autocommit: false
+    }));
+    // The client transaction should have been aborted.
+    assert.commandFailedWithCode(testDB.adminCommand({
+        commitTransaction: 1,
+        lsid: retryableChildSessionCopy,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }),
+                                 ErrorCodes.TransactionTooOld);
+
+    // A new retryable write should abort a lower child transaction.
+    clientTxnNumber++;
+    retryableChildSession.txnNumber = NumberLong(clientTxnNumber);
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: retryableChildSession,
+        txnNumber: NumberLong(0),
+        startTransaction: true,
+        autocommit: false
+    }));
+    clientTxnNumber++;
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: clientSession,
+        txnNumber: NumberLong(clientTxnNumber)
+    }));
+    // The child transaction should have been aborted.
+    assert.commandFailedWithCode(testDB.adminCommand({
+        commitTransaction: 1,
+        lsid: retryableChildSession,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }),
+                                 ErrorCodes.TransactionTooOld);
+
+    // The non-retryable child transaction should still be open.
+    assert.commandWorked(testDB.adminCommand({
+        commitTransaction: 1,
+        lsid: nonRetryableChildSession,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }));
+})();
+
+(() => {
+    jsTest.log("Verify prepared child transactions are not aborted by higher txnNumbers");
+
+    let clientTxnNumber = 5;
+    const clientSession = {id: UUID()};
+    const retryableChildSession = {
+        id: clientSession.id,
+        txnUUID: UUID(),
+        txnNumber: NumberLong(clientTxnNumber)
+    };
+    const nonRetryableChildSession = {id: clientSession.id, txnUUID: UUID()};
+
+    // Prepare a retryable and non-retryable child transaction.
+
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: nonRetryableChildSession,
+        txnNumber: NumberLong(0),
+        startTransaction: true,
+        autocommit: false
+    }));
+    assert.commandWorked(testDB.adminCommand({
+        prepareTransaction: 1,
+        lsid: nonRetryableChildSession,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }));
+
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: retryableChildSession,
+        txnNumber: NumberLong(0),
+        startTransaction: true,
+        autocommit: false
+    }));
+    assert.commandWorked(testDB.adminCommand({
+        prepareTransaction: 1,
+        lsid: retryableChildSession,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }));
+
+    // Verify a higher txnNumber cannot be accepted until the retryable transaction exits prepare.
+    // Test all three sources of a higher txnNumber: client retryable write, client transaction, and
+    // a retryable child session transaction.
+    clientTxnNumber++;
+    assert.commandFailedWithCode(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: clientSession,
+        txnNumber: NumberLong(clientTxnNumber),
+        maxTimeMS: 1000
+    }),
+                                 ErrorCodes.MaxTimeMSExpired);
+
+    clientTxnNumber++;
+    assert.commandFailedWithCode(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: clientSession,
+        txnNumber: NumberLong(clientTxnNumber),
+        startTransaction: true,
+        autocommit: false,
+        maxTimeMS: 1000
+    }),
+                                 ErrorCodes.MaxTimeMSExpired);
+
+    clientTxnNumber++;
+    assert.commandFailedWithCode(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: {id: clientSession.id, txnUUID: UUID(), txnNumber: NumberLong(clientTxnNumber)},
+        txnNumber: NumberLong(clientTxnNumber),
+        startTransaction: true,
+        autocommit: false,
+        maxTimeMS: 1000
+    }),
+                                 ErrorCodes.MaxTimeMSExpired);
+
+    // Verify a transaction blocked on a prepared child transaction can become unstuck and succeed
+    // once the child transaction exits prepare.
+    const fp = configureFailPoint(
+        st.rs0.getPrimary(),
+        "waitAfterNewStatementBlocksBehindOpenInternalTransactionForRetryableWrite");
+    const newTxnThread = new Thread((host, lsidUUID, txnNumber) => {
+        const lsid = {id: UUID(lsidUUID)};
+
+        const conn = new Mongo(host);
+        assert.commandWorked(conn.getDB("foo").runCommand({
+            insert: "test",
+            documents: [{x: 1}],
+            lsid: lsid,
+            txnNumber: NumberLong(txnNumber),
+            startTransaction: true,
+            autocommit: false,
+        }));
+        assert.commandWorked(conn.adminCommand({
+            commitTransaction: 1,
+            lsid: lsid,
+            txnNumber: NumberLong(txnNumber),
+            autocommit: false
+        }));
+    }, st.s.host, extractUUIDFromObject(clientSession.id), clientTxnNumber);
+    newTxnThread.start();
+
+    // Wait for the side transaction to hit a PreparedTransactionInProgress error, then resolve the
+    // prepared transaction and verify the side transaction can successfully complete.
+    fp.wait();
+    fp.off();
+
+    assert.commandWorked(testDB.adminCommand({
+        abortTransaction: 1,
+        lsid: retryableChildSession,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }));
+
+    newTxnThread.join();
+
+    // A higher txnNumber is accepted despite the prepared non-retryable child transaction.
+    clientTxnNumber++;
+    assert.commandWorked(testDB.runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: clientSession,
+        txnNumber: NumberLong(clientTxnNumber)
+    }));
+
+    assert.commandWorked(testDB.adminCommand({
+        abortTransaction: 1,
+        lsid: nonRetryableChildSession,
+        txnNumber: NumberLong(0),
+        autocommit: false
+    }));
+})();
+
+st.stop();
+})();
diff --git a/jstests/sharding/internal_txns/partial_index.js b/jstests/sharding/internal_txns/partial_index.js
index 032505660ab..b9c5462aaa4 100644
--- a/jstests/sharding/internal_txns/partial_index.js
+++ b/jstests/sharding/internal_txns/partial_index.js
@@ -9,242 +9,327 @@
 
 load("jstests/libs/analyze_plan.js");
 
-const st = new ShardingTest({shards: {rs0: {nodes: 2}}});
-
 const kDbName = "testDb";
 const kCollName = "testColl";
 const kConfigTxnNs = "config.transactions";
+const kPartialIndexName = "parent_lsid";
+
+function runTest(st, alwaysCreateFeatureFlagEnabled) {
+    const mongosTestDB = st.s.getDB(kDbName);
+    const shard0PrimaryConfigTxnColl = st.rs0.getPrimary().getCollection(kConfigTxnNs);
+
+    function assertPartialIndexExists(node) {
+        const configDB = node.getDB("config");
+        const indexSpecs =
+            assert.commandWorked(configDB.runCommand({"listIndexes": "transactions"}))
+                .cursor.firstBatch;
+        indexSpecs.sort((index0, index1) => index0.name > index1.name);
+        assert.eq(indexSpecs.length, 2);
+        const idIndexSpec = indexSpecs[0];
+        assert.eq(idIndexSpec.key, {"_id": 1});
+        const partialIndexSpec = indexSpecs[1];
+        assert.eq(partialIndexSpec.key, {"parentLsid": 1, "_id.txnNumber": 1, "_id": 1});
+        assert.eq(partialIndexSpec.partialFilterExpression, {"parentLsid": {"$exists": true}});
+    }
+
+    function assertFindUsesCoveredQuery(node) {
+        const configTxnColl = node.getCollection(kConfigTxnNs);
+        const childSessionDoc = configTxnColl.findOne({
+            "_id.id": sessionUUID,
+            "_id.txnNumber": childLsid.txnNumber,
+            "_id.txnUUID": childLsid.txnUUID
+        });
+
+        const explainRes = assert.commandWorked(
+            configTxnColl.explain()
+                .find({"parentLsid": parentSessionDoc._id, "_id.txnNumber": childLsid.txnNumber},
+                      {_id: 1})
+                .finish());
+        const winningPlan = getWinningPlan(explainRes.queryPlanner);
+        assert.eq(winningPlan.stage, "PROJECTION_COVERED");
+        assert.eq(winningPlan.inputStage.stage, "IXSCAN");
+
+        const findRes =
+            configTxnColl
+                .find({"parentLsid": parentSessionDoc._id, "_id.txnNumber": childLsid.txnNumber},
+                      {_id: 1})
+                .toArray();
+        assert.eq(findRes.length, 1);
+        assert.eq(findRes[0]._id, childSessionDoc._id);
+    }
+
+    function assertPartialIndexDoesNotExist(node) {
+        const configDB = node.getDB("config");
+        const indexSpecs =
+            assert.commandWorked(configDB.runCommand({"listIndexes": "transactions"}))
+                .cursor.firstBatch;
+        assert.eq(indexSpecs.length, 1);
+        const idIndexSpec = indexSpecs[0];
+        assert.eq(idIndexSpec.key, {"_id": 1});
+    }
+
+    function indexRecreationTest(expectRecreateAfterDrop) {
+        st.rs0.getPrimary().getCollection(kConfigTxnNs).dropIndex(kPartialIndexName);
+        st.rs0.awaitReplication();
+
+        st.rs0.nodes.forEach(node => {
+            assertPartialIndexDoesNotExist(node);
+        });
+
+        let primary = st.rs0.getPrimary();
+        assert.commandWorked(
+            primary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
+        assert.commandWorked(primary.adminCommand({replSetFreeze: 0}));
+
+        st.rs0.awaitNodesAgreeOnPrimary();
+        st.rs0.awaitReplication();
+
+        st.rs0.nodes.forEach(node => {
+            if (expectRecreateAfterDrop) {
+                assertPartialIndexExists(node);
+            } else {
+                assertPartialIndexDoesNotExist(node);
+            }
+        });
+    }
+
+    // If the collection is empty and the index does not exist, we should always create the partial
+    // index on stepup,
+    indexRecreationTest(true /* expectRecreateAfterDrop */);
+
+    const sessionUUID = UUID();
+    const parentLsid = {id: sessionUUID};
+    const parentTxnNumber = 35;
+    let stmtId = 0;
 
-const mongosTestDB = st.s.getDB(kDbName);
-const shard0PrimaryConfigTxnColl = st.rs0.getPrimary().getCollection(kConfigTxnNs);
-
-const sessionUUID = UUID();
-const parentLsid = {
-    id: sessionUUID
-};
-const parentTxnNumber = 35;
-let stmtId = 0;
-
-assert.commandWorked(mongosTestDB.runCommand({
-    insert: kCollName,
-    documents: [{_id: 0}],
-    lsid: parentLsid,
-    txnNumber: NumberLong(parentTxnNumber),
-    stmtId: NumberInt(stmtId++)
-}));
-const parentSessionDoc = shard0PrimaryConfigTxnColl.findOne({"_id.id": sessionUUID});
-
-const childLsid = {
-    id: sessionUUID,
-    txnNumber: NumberLong(parentTxnNumber),
-    txnUUID: UUID()
-};
-let childTxnNumber = 0;
-
-function runRetryableInternalTransaction(txnNumber) {
     assert.commandWorked(mongosTestDB.runCommand({
         insert: kCollName,
+        documents: [{_id: 0}],
+        lsid: parentLsid,
+        txnNumber: NumberLong(parentTxnNumber),
+        stmtId: NumberInt(stmtId++)
+    }));
+    const parentSessionDoc = shard0PrimaryConfigTxnColl.findOne({"_id.id": sessionUUID});
+
+    const childLsid = {id: sessionUUID, txnNumber: NumberLong(parentTxnNumber), txnUUID: UUID()};
+    let childTxnNumber = 0;
+
+    function runRetryableInternalTransaction(txnNumber) {
+        assert.commandWorked(mongosTestDB.runCommand({
+            insert: kCollName,
+            documents: [{x: 1}],
+            lsid: childLsid,
+            txnNumber: NumberLong(txnNumber),
+            stmtId: NumberInt(stmtId++),
+            autocommit: false,
+            startTransaction: true
+        }));
+        assert.commandWorked(mongosTestDB.adminCommand({
+            commitTransaction: 1,
+            lsid: childLsid,
+            txnNumber: NumberLong(txnNumber),
+            autocommit: false
+        }));
+    }
+
+    runRetryableInternalTransaction(childTxnNumber);
+    assert.eq(shard0PrimaryConfigTxnColl.count({"_id.id": sessionUUID}), 2);
+
+    st.rs0.nodes.forEach(node => {
+        assertPartialIndexExists(node);
+        assertFindUsesCoveredQuery(node);
+    });
+
+    childTxnNumber++;
+    runRetryableInternalTransaction(childTxnNumber);
+    assert.eq(shard0PrimaryConfigTxnColl.count({"_id.id": sessionUUID}), 2);
+
+    st.rs0.nodes.forEach(node => {
+        assertPartialIndexExists(node);
+        assertFindUsesCoveredQuery(node);
+    });
+
+    //
+    // Verify clients can create the index only if they provide the exact specification and that
+    // operations requiring the index fails if it does not exist.
+    //
+
+    const indexConn = st.rs0.getPrimary();
+    assert.commandWorked(
+        indexConn.getCollection("config.transactions").dropIndex(kPartialIndexName));
+
+    // Normal writes don't involve config.transactions, so they succeed.
+    assert.commandWorked(indexConn.getDB(kDbName).runCommand(
+        {insert: kCollName, documents: [{x: 1}], lsid: {id: UUID()}}));
+
+    // Retryable writes read from the partial index, so they fail.
+    let res = assert.commandFailedWithCode(indexConn.getDB(kDbName).runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: {id: UUID()},
+        txnNumber: NumberLong(11)
+    }),
+                                           ErrorCodes.BadValue);
+    assert(res.errmsg.includes("Please create an index directly "), tojson(res));
+
+    // User transactions read from the partial index, so they fail.
+    assert.commandFailedWithCode(indexConn.getDB(kDbName).runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: {id: UUID()},
+        txnNumber: NumberLong(11),
+        startTransaction: true,
+        autocommit: false
+    }),
+                                 ErrorCodes.BadValue);
+
+    // Non retryable internal transactions do not read from or update the partial index, so they can
+    // succeed without the index existing.
+    let nonRetryableTxnSession = {id: UUID(), txnUUID: UUID()};
+    assert.commandWorked(indexConn.getDB(kDbName).runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: nonRetryableTxnSession,
+        txnNumber: NumberLong(11),
+        stmtId: NumberInt(0),
+        startTransaction: true,
+        autocommit: false
+    }));
+    assert.commandWorked(indexConn.adminCommand({
+        commitTransaction: 1,
+        lsid: nonRetryableTxnSession,
+        txnNumber: NumberLong(11),
+        autocommit: false
+    }));
+
+    // Retryable transactions read from the partial index, so they fail.
+    assert.commandFailedWithCode(indexConn.getDB(kDbName).runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: {id: UUID(), txnUUID: UUID(), txnNumber: NumberLong(2)},
+        txnNumber: NumberLong(11),
+        stmtId: NumberInt(0),
+        startTransaction: true,
+        autocommit: false
+    }),
+                                 ErrorCodes.BadValue);
+
+    // Recreating the partial index requires the exact options used internally, but in any order.
+    assert.commandFailedWithCode(indexConn.getDB("config").runCommand({
+        createIndexes: "transactions",
+        indexes: [{v: 2, name: "parent_lsid", key: {parentLsid: 1, "_id.txnNumber": 1, _id: 1}}],
+    }),
+                                 ErrorCodes.IllegalOperation);
+    assert.commandWorked(indexConn.getDB("config").runCommand({
+        createIndexes: "transactions",
+        indexes: [{
+            name: "parent_lsid",
+            key: {parentLsid: 1, "_id.txnNumber": 1, _id: 1},
+            partialFilterExpression: {parentLsid: {$exists: true}},
+            v: 2,
+        }],
+    }));
+
+    // Operations involving the index should succeed now.
+
+    assert.commandWorked(indexConn.getDB(kDbName).runCommand(
+        {insert: kCollName, documents: [{x: 1}], lsid: {id: UUID()}}));
+
+    assert.commandWorked(indexConn.getDB(kDbName).runCommand(
+        {insert: kCollName, documents: [{x: 1}], lsid: {id: UUID()}, txnNumber: NumberLong(11)}));
+
+    let userSessionAfter = {id: UUID()};
+    assert.commandWorked(indexConn.getDB(kDbName).runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: userSessionAfter,
+        txnNumber: NumberLong(11),
+        startTransaction: true,
+        autocommit: false
+    }));
+    assert.commandWorked(indexConn.adminCommand({
+        commitTransaction: 1,
+        lsid: userSessionAfter,
+        txnNumber: NumberLong(11),
+        autocommit: false
+    }));
+
+    let nonRetryableTxnSessionAfter = {id: UUID(), txnUUID: UUID()};
+    assert.commandWorked(indexConn.getDB(kDbName).runCommand({
+        insert: kCollName,
+        documents: [{x: 1}],
+        lsid: nonRetryableTxnSessionAfter,
+        txnNumber: NumberLong(11),
+        stmtId: NumberInt(0),
+        startTransaction: true,
+        autocommit: false
+    }));
+    assert.commandWorked(indexConn.adminCommand({
+        commitTransaction: 1,
+        lsid: nonRetryableTxnSessionAfter,
+        txnNumber: NumberLong(11),
+        autocommit: false
+    }));
+
+    let retryableTxnSessionAfter = {id: UUID(), txnUUID: UUID(), txnNumber: NumberLong(2)};
+    assert.commandWorked(indexConn.getDB(kDbName).runCommand({
+        insert: kCollName,
         documents: [{x: 1}],
-        lsid: childLsid,
-        txnNumber: NumberLong(txnNumber),
-        stmtId: NumberInt(stmtId++),
-        autocommit: false,
-        startTransaction: true
+        lsid: retryableTxnSessionAfter,
+        txnNumber: NumberLong(11),
+        stmtId: NumberInt(0),
+        startTransaction: true,
+        autocommit: false
     }));
-    assert.commandWorked(mongosTestDB.adminCommand({
+    assert.commandWorked(indexConn.adminCommand({
         commitTransaction: 1,
-        lsid: childLsid,
-        txnNumber: NumberLong(txnNumber),
+        lsid: retryableTxnSessionAfter,
+        txnNumber: NumberLong(11),
         autocommit: false
     }));
+
+    if (!alwaysCreateFeatureFlagEnabled) {
+        // We expect that if the partial index is dropped when the collection isn't empty, then on
+        // stepup we should not recreate the collection.
+        indexRecreationTest(false /* expectRecreateAfterDrop */);
+    } else {
+        // Creating the partial index when the collection isn't empty can be enabled by a feature
+        // flag.
+        indexRecreationTest(true /* expectRecreateAfterDrop */);
+    }
 }
 
-function assertPartialIndexExists(node) {
-    const configDB = node.getDB("config");
-    const indexSpecs = assert.commandWorked(configDB.runCommand({"listIndexes": "transactions"}))
-                           .cursor.firstBatch;
-    indexSpecs.sort((index0, index1) => index0.name > index1.name);
-    assert.eq(indexSpecs.length, 2);
-    const idIndexSpec = indexSpecs[0];
-    assert.eq(idIndexSpec.key, {"_id": 1});
-    const partialIndexSpec = indexSpecs[1];
-    assert.eq(partialIndexSpec.key, {"parentLsid": 1, "_id.txnNumber": 1, "_id": 1});
-    assert.eq(partialIndexSpec.partialFilterExpression, {"parentLsid": {"$exists": true}});
+{
+    const st = new ShardingTest({shards: {rs0: {nodes: 2}}});
+    runTest(st, false /* alwaysCreateFeatureFlagEnabled */);
+    st.stop();
 }
 
-function assertFindUsesCoveredQuery(node) {
-    const configTxnColl = node.getCollection(kConfigTxnNs);
-    const childSessionDoc = configTxnColl.findOne({
-        "_id.id": sessionUUID,
-        "_id.txnNumber": childLsid.txnNumber,
-        "_id.txnUUID": childLsid.txnUUID
+{
+    const featureFlagSt = new ShardingTest({
+        shards: 1,
+        other: {
+            rs: {nodes: 2},
+            rsOptions:
+                {setParameter: "featureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp=true"}
+        }
     });
 
-    const explainRes = assert.commandWorked(
-        configTxnColl.explain()
-            .find({"parentLsid": parentSessionDoc._id, "_id.txnNumber": childLsid.txnNumber},
-                  {_id: 1})
-            .finish());
-    const winningPlan = getWinningPlan(explainRes.queryPlanner);
-    assert.eq(winningPlan.stage, "PROJECTION_COVERED");
-    assert.eq(winningPlan.inputStage.stage, "IXSCAN");
-
-    const findRes =
-        configTxnColl
-            .find({"parentLsid": parentSessionDoc._id, "_id.txnNumber": childLsid.txnNumber},
-                  {_id: 1})
-            .toArray();
-    assert.eq(findRes.length, 1);
-    assert.eq(findRes[0]._id, childSessionDoc._id);
-}
+    // Sanity check the feature flag was enabled.
+    assert(assert
+               .commandWorked(featureFlagSt.rs0.getPrimary().adminCommand({
+                   getParameter: 1,
+                   featureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp: 1
+               }))
+               .featureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp.value);
+    assert(assert
+               .commandWorked(featureFlagSt.rs0.getSecondary().adminCommand({
+                   getParameter: 1,
+                   featureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp: 1
+               }))
+               .featureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp.value);
 
-runRetryableInternalTransaction(childTxnNumber);
-assert.eq(shard0PrimaryConfigTxnColl.count({"_id.id": sessionUUID}), 2);
-
-st.rs0.nodes.forEach(node => {
-    assertPartialIndexExists(node);
-    assertFindUsesCoveredQuery(node);
-});
-
-childTxnNumber++;
-runRetryableInternalTransaction(childTxnNumber);
-assert.eq(shard0PrimaryConfigTxnColl.count({"_id.id": sessionUUID}), 2);
-
-st.rs0.nodes.forEach(node => {
-    assertPartialIndexExists(node);
-    assertFindUsesCoveredQuery(node);
-});
-
-//
-// Verify clients can create the index only if they provide the exact specification and that
-// operations requiring the index fails if it does not exist.
-//
-
-const indexConn = st.rs0.getPrimary();
-assert.commandWorked(indexConn.getCollection("config.transactions").dropIndex("parent_lsid"));
-
-// Normal writes don't involve config.transactions, so they succeed.
-assert.commandWorked(indexConn.getDB(kDbName).runCommand(
-    {insert: kCollName, documents: [{x: 1}], lsid: {id: UUID()}}));
-
-// Retryable writes read from the partial index, so they fail.
-let res = assert.commandFailedWithCode(
-    indexConn.getDB(kDbName).runCommand(
-        {insert: kCollName, documents: [{x: 1}], lsid: {id: UUID()}, txnNumber: NumberLong(11)}),
-    ErrorCodes.BadValue);
-assert(res.errmsg.includes("Please create an index directly "), tojson(res));
-
-// User transactions read from the partial index, so they fail.
-assert.commandFailedWithCode(indexConn.getDB(kDbName).runCommand({
-    insert: kCollName,
-    documents: [{x: 1}],
-    lsid: {id: UUID()},
-    txnNumber: NumberLong(11),
-    startTransaction: true,
-    autocommit: false
-}),
-                             ErrorCodes.BadValue);
-
-// Non retryable internal transactions do not read from or update the partial index, so they can
-// succeed without the index existing.
-let nonRetryableTxnSession = {id: UUID(), txnUUID: UUID()};
-assert.commandWorked(indexConn.getDB(kDbName).runCommand({
-    insert: kCollName,
-    documents: [{x: 1}],
-    lsid: nonRetryableTxnSession,
-    txnNumber: NumberLong(11),
-    stmtId: NumberInt(0),
-    startTransaction: true,
-    autocommit: false
-}));
-assert.commandWorked(indexConn.adminCommand({
-    commitTransaction: 1,
-    lsid: nonRetryableTxnSession,
-    txnNumber: NumberLong(11),
-    autocommit: false
-}));
-
-// Retryable transactions read from the partial index, so they fail.
-assert.commandFailedWithCode(indexConn.getDB(kDbName).runCommand({
-    insert: kCollName,
-    documents: [{x: 1}],
-    lsid: {id: UUID(), txnUUID: UUID(), txnNumber: NumberLong(2)},
-    txnNumber: NumberLong(11),
-    stmtId: NumberInt(0),
-    startTransaction: true,
-    autocommit: false
-}),
-                             ErrorCodes.BadValue);
-
-// Recreating the partial index requires the exact options used internally, but in any order.
-assert.commandFailedWithCode(indexConn.getDB("config").runCommand({
-    createIndexes: "transactions",
-    indexes: [{v: 2, name: "parent_lsid", key: {parentLsid: 1, "_id.txnNumber": 1, _id: 1}}],
-}),
-                             ErrorCodes.IllegalOperation);
-assert.commandWorked(indexConn.getDB("config").runCommand({
-    createIndexes: "transactions",
-    indexes: [{
-        name: "parent_lsid",
-        key: {parentLsid: 1, "_id.txnNumber": 1, _id: 1},
-        partialFilterExpression: {parentLsid: {$exists: true}},
-        v: 2,
-    }],
-}));
-
-// Operations involving the index should succeed now.
-
-assert.commandWorked(indexConn.getDB(kDbName).runCommand(
-    {insert: kCollName, documents: [{x: 1}], lsid: {id: UUID()}}));
-
-assert.commandWorked(indexConn.getDB(kDbName).runCommand(
-    {insert: kCollName, documents: [{x: 1}], lsid: {id: UUID()}, txnNumber: NumberLong(11)}));
-
-let userSessionAfter = {id: UUID()};
-assert.commandWorked(indexConn.getDB(kDbName).runCommand({
-    insert: kCollName,
-    documents: [{x: 1}],
-    lsid: userSessionAfter,
-    txnNumber: NumberLong(11),
-    startTransaction: true,
-    autocommit: false
-}));
-assert.commandWorked(indexConn.adminCommand(
-    {commitTransaction: 1, lsid: userSessionAfter, txnNumber: NumberLong(11), autocommit: false}));
-
-let nonRetryableTxnSessionAfter = {id: UUID(), txnUUID: UUID()};
-assert.commandWorked(indexConn.getDB(kDbName).runCommand({
-    insert: kCollName,
-    documents: [{x: 1}],
-    lsid: nonRetryableTxnSessionAfter,
-    txnNumber: NumberLong(11),
-    stmtId: NumberInt(0),
-    startTransaction: true,
-    autocommit: false
-}));
-assert.commandWorked(indexConn.adminCommand({
-    commitTransaction: 1,
-    lsid: nonRetryableTxnSessionAfter,
-    txnNumber: NumberLong(11),
-    autocommit: false
-}));
-
-let retryableTxnSessionAfter = {id: UUID(), txnUUID: UUID(), txnNumber: NumberLong(2)};
-assert.commandWorked(indexConn.getDB(kDbName).runCommand({
-    insert: kCollName,
-    documents: [{x: 1}],
-    lsid: retryableTxnSessionAfter,
-    txnNumber: NumberLong(11),
-    stmtId: NumberInt(0),
-    startTransaction: true,
-    autocommit: false
-}));
-assert.commandWorked(indexConn.adminCommand({
-    commitTransaction: 1,
-    lsid: retryableTxnSessionAfter,
-    txnNumber: NumberLong(11),
-    autocommit: false
-}));
-
-st.stop();
+    runTest(featureFlagSt, true /* alwaysCreateFeatureFlagEnabled */);
+    featureFlagSt.stop();
+}
 })();
diff --git a/jstests/sharding/internal_txns/retryable_findAndModify_basic.js b/jstests/sharding/internal_txns/retryable_findAndModify_basic.js
index 7221e192ac0..09f4d90054e 100644
--- a/jstests/sharding/internal_txns/retryable_findAndModify_basic.js
+++ b/jstests/sharding/internal_txns/retryable_findAndModify_basic.js
@@ -13,16 +13,22 @@ const transactionTest = new RetryableInternalTransactionTest();
 
 {
     jsTest.log("Test that non-internal transactions cannot be retried");
-    const lsid = {id: UUID()};
-    const testOptions = {expectRetryToSucceed: false};
-    transactionTest.runFindAndModifyTestsEnableImageCollection(lsid, testOptions);
+    const makeSessionIdFunc = () => {
+        return {id: UUID()};
+    };
+    const expectRetryToSucceed = false;
+    transactionTest.runFindAndModifyTestsEnableImageCollection(
+        {txnOptions: {makeSessionIdFunc}, expectRetryToSucceed});
 }
 
 {
     jsTest.log("Test that non-retryable internal transactions cannot be retried");
-    const lsid = {id: UUID(), txnUUID: UUID()};
-    const testOptions = {expectRetryToSucceed: false};
-    transactionTest.runFindAndModifyTestsEnableImageCollection(lsid, testOptions);
+    const makeSessionIdFunc = () => {
+        return {id: UUID(), txnUUID: UUID()};
+    };
+    const expectRetryToSucceed = false;
+    transactionTest.runFindAndModifyTestsEnableImageCollection(
+        {txnOptions: {makeSessionIdFunc}, expectRetryToSucceed});
 }
 
 {
diff --git a/jstests/sharding/internal_txns/retryable_writes_basic.js b/jstests/sharding/internal_txns/retryable_writes_basic.js
index d3ebdadce81..666bf93416a 100644
--- a/jstests/sharding/internal_txns/retryable_writes_basic.js
+++ b/jstests/sharding/internal_txns/retryable_writes_basic.js
@@ -17,16 +17,22 @@ const transactionTest = new RetryableInternalTransactionTest();
 
 {
     jsTest.log("Test that non-internal transactions cannot be retried");
-    const lsid = {id: UUID()};
-    const testOptions = {expectRetryToSucceed: false};
-    transactionTest.runInsertUpdateDeleteTests(lsid, testOptions);
+    const makeSessionIdFunc = () => {
+        return {id: UUID()};
+    };
+    const expectRetryToSucceed = false;
+    transactionTest.runInsertUpdateDeleteTests(
+        {txnOptions: {makeSessionIdFunc}, expectRetryToSucceed});
 }
 
 {
     jsTest.log("Test that non-retryable internal transactions cannot be retried");
-    const lsid = {id: UUID(), txnUUID: UUID()};
-    const testOptions = {expectRetryToSucceed: false};
-    transactionTest.runInsertUpdateDeleteTests(lsid, testOptions);
+    const makeSessionIdFunc = () => {
+        return {id: UUID(), txnUUID: UUID()};
+    };
+    const expectRetryToSucceed = false;
+    transactionTest.runInsertUpdateDeleteTests(
+        {txnOptions: {makeSessionIdFunc}, expectRetryToSucceed});
 }
 
 {
diff --git a/jstests/sharding/internal_txns/retryable_writes_retry_conflict.js b/jstests/sharding/internal_txns/retryable_writes_retry_conflict.js
index e2a8fbd2096..922e729b718 100644
--- a/jstests/sharding/internal_txns/retryable_writes_retry_conflict.js
+++ b/jstests/sharding/internal_txns/retryable_writes_retry_conflict.js
@@ -83,9 +83,14 @@ function testBlockingRetry(retryFunc, testOpts = {
         commitCmdObj.commitTimestamp = preparedTxnRes.prepareTimestamp;
     }
 
-    // Retry and wait for it to block behind the internal transaction above.
-    const fp = configureFailPoint(
-        shard0Primary, "waitAfterNewStatementBlocksBehindOpenInternalTransactionForRetryableWrite");
+    let fp;
+    if (testOpts.prepareBeforeRetry) {
+        // A prepared transaction cannot be interrupted by a retry so retry and wait for it to block
+        // behind the internal transaction above.
+        fp = configureFailPoint(
+            shard0Primary,
+            "waitAfterNewStatementBlocksBehindOpenInternalTransactionForRetryableWrite");
+    }
     const retryThread = new Thread(retryFunc, {
         shard0RstArgs: createRstArgs(st.rs0),
         parentSessionUUIDString: extractUUIDFromObject(parentLsid.id),
@@ -97,18 +102,33 @@ function testBlockingRetry(retryFunc, testOpts = {
         stepDownPrimaryAfterBlockingRetry: testOpts.stepDownPrimaryAfterBlockingRetry
     });
     retryThread.start();
-    fp.wait();
-    fp.off();
+    if (testOpts.prepareBeforeRetry) {
+        // The retry should block behind the prepared transaction.
+        fp.wait();
+        fp.off();
+    } else {
+        // The retry should complete without blocking.
+        retryThread.join();
+    }
 
     if (testOpts.stepDownPrimaryAfterBlockingRetry) {
         stepDownShard0Primary();
     }
 
     // Commit or abort the internal transaction, and verify that the write statement executed
-    // exactly once despite the concurrent retry.
-    assert.commandWorked(
-        shard0TestDB.adminCommand(testOpts.abortAfterBlockingRetry ? abortCmdObj : commitCmdObj));
-    retryThread.join();
+    // exactly once despite the concurrent retry, whether or not the retry interrupted the original
+    // attempt.
+    if (testOpts.prepareBeforeRetry) {
+        assert.commandWorked(shard0TestDB.adminCommand(
+            testOpts.abortAfterBlockingRetry ? abortCmdObj : commitCmdObj));
+        retryThread.join();
+    } else {
+        // The retry should have interrupted the original attempt.
+        assert.commandFailedWithCode(
+            shard0TestDB.adminCommand(testOpts.abortAfterBlockingRetry ? abortCmdObj
+                                                                       : commitCmdObj),
+            ErrorCodes.NoSuchTransaction);
+    }
     assert.eq(shard0TestColl.count(docToInsert), 1);
 
     assert.commandWorked(mongosTestColl.remove({}));
diff --git a/jstests/sharding/large_chunk.js b/jstests/sharding/large_chunk.js
index 6c1cefadcc6..8097ea940e9 100644
--- a/jstests/sharding/large_chunk.js
+++ b/jstests/sharding/large_chunk.js
@@ -5,8 +5,6 @@
  * of 5MB across all sharding tests in wiredTiger.
  * @tags: [
  *  resource_intensive,
- *  requires_fcv_53,
- *  featureFlagPerCollBalancingSettings,
  * ]
  */
 (function() {
diff --git a/jstests/sharding/libs/resharding_test_fixture.js b/jstests/sharding/libs/resharding_test_fixture.js
index 13d39674f0f..128ea61ebf1 100644
--- a/jstests/sharding/libs/resharding_test_fixture.js
+++ b/jstests/sharding/libs/resharding_test_fixture.js
@@ -326,9 +326,8 @@ var ReshardingTest = class {
 
         this._commandDoneSignal = new CountDownLatch(1);
 
-        this._reshardingThread = new Thread(
-            function(
-                host, ns, newShardKeyPattern, newChunks, commandDoneSignal, expectedErrorCode) {
+        this._reshardingThread =
+            new Thread(function(host, ns, newShardKeyPattern, newChunks, commandDoneSignal) {
                 const conn = new Mongo(host);
 
                 // We allow the client to retry the reshardCollection a large but still finite
@@ -359,18 +358,8 @@ var ReshardingTest = class {
                     }
                 }
 
-                if (expectedErrorCode === ErrorCodes.OK) {
-                    assert.commandWorked(res);
-                } else {
-                    assert.commandFailedWithCode(res, expectedErrorCode);
-                }
-            },
-            this._st.s.host,
-            this._ns,
-            newShardKeyPattern,
-            newChunks,
-            this._commandDoneSignal,
-            expectedErrorCode);
+                return res;
+            }, this._st.s.host, this._ns, newShardKeyPattern, newChunks, this._commandDoneSignal);
 
         this._reshardingThread.start();
         this._isReshardingActive = true;
@@ -459,7 +448,7 @@ var ReshardingTest = class {
                     fp.off();
                 } catch (disableFailpointError) {
                     print(`Ignoring error from disabling the resharding coordinator failpoint: ${
-                        tojson(disableFailpointError)}`);
+                        tojsononeline(disableFailpointError)}`);
 
                     print(
                         "The config server primary and the mongo shell along with it are expected" +
@@ -476,13 +465,16 @@ var ReshardingTest = class {
                 try {
                     this._reshardingThread.join();
                 } catch (joinError) {
-                    print(`Ignoring error from the resharding thread: ${tojson(joinError)}`);
+                    print(`Ignoring error from the resharding thread: ${tojsononeline(joinError)}`);
+                } finally {
+                    print(`Ignoring response from the resharding thread: ${
+                        tojsononeline(this._reshardingThread.returnData())}`);
                 }
 
                 this._isReshardingActive = false;
             } catch (killOpError) {
                 print(`Ignoring error from sending killOp to the reshardCollection command: ${
-                    tojson(killOpError)}`);
+                    tojsononeline(killOpError)}`);
 
                 print("The mongo shell is expected to abort due to the resharding thread being" +
                       " left unjoined");
@@ -587,6 +579,12 @@ var ReshardingTest = class {
             this._isReshardingActive = false;
         }
 
+        if (expectedErrorCode === ErrorCodes.OK) {
+            assert.commandWorked(this._reshardingThread.returnData());
+        } else {
+            assert.commandFailedWithCode(this._reshardingThread.returnData(), expectedErrorCode);
+        }
+
         // Reaching this line implies the `_reshardingThread` has successfully exited without
         // throwing an exception. Assert that we performed all expected correctness checks.
         assert(performCorrectnessChecks, {
diff --git a/jstests/sharding/libs/shard_versioning_util.js b/jstests/sharding/libs/shard_versioning_util.js
index aa9a33ad44f..4d2f76dea5b 100644
--- a/jstests/sharding/libs/shard_versioning_util.js
+++ b/jstests/sharding/libs/shard_versioning_util.js
@@ -5,7 +5,11 @@ var ShardVersioningUtil = (function() {
     /*
      * Shard version indicating that shard version checking must be skipped.
      */
-    const kIgnoredShardVersion = [Timestamp(0, 0), ObjectId("00000000ffffffffffffffff")];
+    const kIgnoredShardVersion = {
+        e: ObjectId("00000000ffffffffffffffff"),
+        t: Timestamp(Math.pow(2, 32) - 1, Math.pow(2, 32) - 1),
+        v: Timestamp(0, 0)
+    };
 
     /*
      * Returns the metadata for the collection in the shard's catalog cache.
diff --git a/jstests/sharding/multi_writes_with_shard_version_ignored_dont_bubble_up_critical_section.js b/jstests/sharding/multi_writes_with_shard_version_ignored_dont_bubble_up_critical_section.js
new file mode 100644
index 00000000000..2f055ec08a0
--- /dev/null
+++ b/jstests/sharding/multi_writes_with_shard_version_ignored_dont_bubble_up_critical_section.js
@@ -0,0 +1,154 @@
+/*
+ * Tests that multi-writes where the router attaches 'shardVersion: IGNORED' (i.e. if they need to
+ * target several shards AND are not part of a txn) do not bubble up StaleConfig errors due to
+ * ongoing critical sections. Instead, the shard yields and waits for the critical section to finish
+ * and then continues the write plan.
+ *
+ * @tags: [
+ *   requires_fcv_61,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load('jstests/libs/parallel_shell_helpers.js');
+load("jstests/libs/fail_point_util.js");
+
+// Configure 'internalQueryExecYieldIterations' on both shards such that operations will yield on
+// each 10th PlanExecuter iteration.
+var st = new ShardingTest({
+    shards: 2,
+    rs: {setParameter: {internalQueryExecYieldIterations: 10}},
+    other: {enableBalancer: false}
+});
+
+const dbName = "test";
+const collName = "foo";
+const ns = dbName + "." + collName;
+const numDocs = 100;
+let coll = st.s.getCollection(ns);
+
+assert.commandWorked(
+    st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName}));
+
+function setupTest() {
+    coll.drop();
+    assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}}));
+
+    // Create three chunks:
+    // - [MinKey, 0) initially on shard0 and has no documents. This chunk will be migrated during
+    // the test execution.
+    // - [0, numDocs) on shard 0. Contains 'numDocs' documents.
+    // - [numDocs, MaxKey) shard 1. Contains no documents.
+    assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: 0}}));
+    assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: numDocs}}));
+    assert.commandWorked(st.s.adminCommand(
+        {moveChunk: ns, find: {x: numDocs}, to: st.shard1.shardName, waitForDelete: true}));
+
+    jsTest.log("Inserting initial data.");
+    const bulkOp = coll.initializeOrderedBulkOp();
+    for (let i = 0; i < numDocs; ++i) {
+        bulkOp.insert({x: i, c: 0});
+    }
+    assert.commandWorked(bulkOp.execute());
+    jsTest.log("Inserted initial data.");
+}
+
+function runMigration() {
+    const awaitResult = startParallelShell(
+        funWithArgs(function(ns, toShard) {
+            jsTest.log("Starting migration.");
+            assert.commandWorked(db.adminCommand({moveChunk: ns, find: {x: -1}, to: toShard}));
+            jsTest.log("Completed migration.");
+        }, ns, st.shard1.shardName), st.s.port);
+
+    return awaitResult;
+}
+
+function updateOperationFn(shardColl, numInitialDocsOnShard0) {
+    load('jstests/sharding/libs/shard_versioning_util.js');  // For kIgnoredShardVersion
+
+    jsTest.log("Begin multi-update.");
+
+    // Send a multi-update with 'shardVersion: IGNORED' directly to the shard, as if we were a
+    // router.
+    const result = assert.commandWorked(shardColl.runCommand({
+        update: shardColl.getName(),
+        updates: [{q: {}, u: {$inc: {c: 1}}, multi: true}],
+        shardVersion: ShardVersioningUtil.kIgnoredShardVersion
+    }));
+
+    jsTest.log("End multi-update. Result: " + tojson(result));
+
+    // Check that all documents got updates. Despite the weak guarantees of {multi: true} writes
+    // concurrent with migrations, this has to be the case in this test because the migrated chunk
+    // does not contain any document.
+    assert.eq(numInitialDocsOnShard0, shardColl.find({c: 1}).itcount());
+}
+
+function deleteOperationFn(shardColl, numInitialDocsOnShard0) {
+    load('jstests/sharding/libs/shard_versioning_util.js');  // For kIgnoredShardVersion
+
+    jsTest.log("Begin multi-delete");
+
+    // Send a multi-delete with 'shardVersion: IGNORED' directly to the shard, as if we were a
+    // router.
+    const result = assert.commandWorked(shardColl.runCommand({
+        delete: shardColl.getName(),
+        deletes: [{q: {}, limit: 0}],
+        shardVersion: ShardVersioningUtil.kIgnoredShardVersion
+    }));
+
+    jsTest.log("End multi-delete. Result: " + tojson(result));
+
+    // Check that all documents got deleted. Despite the weak guarantees of {multi: true} writes
+    // concurrent with migrations, this has to be the case in this test because the migrated chunk
+    // does not contain any document.
+    assert.eq(0, shardColl.find().itcount());
+}
+
+function runTest(writeOpFn) {
+    setupTest();
+
+    let fp1 = configureFailPoint(
+        st.rs0.getPrimary(), 'setYieldAllLocksHang', {namespace: coll.getFullName()});
+
+    const awaitWriteResult = startParallelShell(
+        funWithArgs(function(writeOpFn, dbName, collName, numDocs) {
+            const shardColl = db.getSiblingDB(dbName)[collName];
+            writeOpFn(shardColl, numDocs);
+        }, writeOpFn, coll.getDB().getName(), coll.getName(), numDocs), st.rs0.getPrimary().port);
+
+    // Wait for the write op to yield.
+    fp1.wait();
+    jsTest.log("Multi-write yielded");
+
+    // Start chunk migration and wait for it to enter the critical section.
+    let failpointHangMigrationWhileInCriticalSection =
+        configureFailPoint(st.rs0.getPrimary(), 'moveChunkHangAtStep5');
+    const awaitMigration = runMigration();
+    failpointHangMigrationWhileInCriticalSection.wait();
+
+    // Let the multi-write resume from the yield.
+    jsTest.log("Resuming yielded multi-write");
+    fp1.off();
+
+    // Let the multi-write run for a bit after the resuming from yield. It will encounter the
+    // critical section.
+    sleep(1000);
+
+    // Let the migration continue and release the critical section.
+    jsTest.log("Letting migration exit its critical section and complete");
+    failpointHangMigrationWhileInCriticalSection.off();
+    awaitMigration();
+
+    // Wait for the write op to finish. It should succeed.
+    awaitWriteResult();
+}
+
+runTest(updateOperationFn);
+runTest(deleteOperationFn);
+
+st.stop();
+})();
diff --git a/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js b/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js
index c3924654a74..ae525d14c17 100644
--- a/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js
+++ b/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js
@@ -116,12 +116,10 @@ const isShardedLookupEnabled = getShardedLookupParam.hasOwnProperty("featureFlag
 
 let res = st.getPrimaryShard(jsTestName()).getDB("admin").adminCommand({
     getParameter: 1,
-    featureFlagSBELookupPushdown: 1,
     internalQueryForceClassicEngine: 1
 });
-let isSBELookupEnabled = res.ok && res.hasOwnProperty("featureFlagSBELookupPushdown") &&
-    res.hasOwnProperty("internalQueryForceClassicEngine") &&
-    res.featureFlagSBELookupPushdown.value && !res.internalQueryForceClassicEngine;
+let isSBELookupEnabled = res.ok && res.hasOwnProperty("internalQueryForceClassicEngine") &&
+    !res.internalQueryForceClassicEngine;
 
 // Now run a getMore for each of the test cases. The collection has become sharded mid-iteration, so
 // we should observe the error code associated with the test case.
@@ -198,13 +196,6 @@ assert.commandWorked(shard0.getPrimary().adminCommand({fsync: 1}));
 // both the source and foreign collections.
 shard0.restart(shard0.getPrimary());
 
-// Refreshes the SBE lookup feature availability just in case that a different version of mongod
-// is running after restart.
-res = shard0.getPrimary().getDB("admin").adminCommand(
-    {getParameter: 1, featureFlagSBELookupPushdown: 1});
-isSBELookupEnabled = res.ok && res.hasOwnProperty("featureFlagSBELookupPushdown") &&
-    res.featureFlagSBELookupPushdown.value;
-
 // Enable profiling on shard0 to capture stale shard version exceptions.
 const primaryDB = shard0.getPrimary().getDB(jsTestName());
 assert.commandWorked(primaryDB.setProfilingLevel(2));
diff --git a/jstests/sharding/query/lookup_unionWith_subpipeline_local_read.js b/jstests/sharding/query/lookup_unionWith_subpipeline_local_read.js
index 9107f1621b5..c5018e52c6a 100644
--- a/jstests/sharding/query/lookup_unionWith_subpipeline_local_read.js
+++ b/jstests/sharding/query/lookup_unionWith_subpipeline_local_read.js
@@ -14,7 +14,6 @@ load('jstests/libs/profiler.js');             // For various profiler helpers.
 load('jstests/aggregation/extras/utils.js');  // For arrayEq()
 load("jstests/libs/fail_point_util.js");      // for configureFailPoint.
 load("jstests/libs/log.js");                  // For findMatchingLogLines.
-load("jstests/libs/feature_flag_util.js");    // For isEnabled.
 
 const st = new ShardingTest({name: jsTestName(), mongos: 1, shards: 2, rs: {nodes: 2}});
 
@@ -510,12 +509,12 @@ assertAggResultAndRouting(pipeline, expectedRes, {comment: "lookup_foreign_does_
     // collection and needs to target shards to properly resolve it. Then, it can use the local
     // read path for each subpipeline query.
     subPipelineLocal: [4, 0],
-    // If the $lookup is pushed down, we will try to take a lock on the foreign collection to check
+    // Because $lookup is pushed down, we will try to take a lock on the foreign collection to check
     // foreign collection's sharding state. Given that the stale shard version is resolved earlier
     // and we've figured out that the foreign collection is unsharded, we no longer need to target a
     // shard and instead can read locally. As such, we will not generate an entry in the profiler
     // for querying the foreign collection.
-    subPipelineRemote: FeatureFlagUtil.isEnabled(mongosDB, "SBELookupPushdown") ? [0, 0] : [1, 0],
+    subPipelineRemote: [0, 0],
 });
 
 //
diff --git a/jstests/sharding/read_write_concern_defaults_application.js b/jstests/sharding/read_write_concern_defaults_application.js
index 9b8b3a1efda..3f83dfe4db9 100644
--- a/jstests/sharding/read_write_concern_defaults_application.js
+++ b/jstests/sharding/read_write_concern_defaults_application.js
@@ -372,17 +372,12 @@ let testCases = {
         checkWriteConcern: false,
     },
     donorAbortMigration: {skip: "does not accept read or write concern"},
-    // TODO(SERVER-61845) : remove overrides once possible
-    donorAbortSplit: {skip: "has been removed from the server"},
     donorForgetMigration: {skip: "does not accept read or write concern"},
-    donorForgetSplit: {skip: "has been removed from the server"},
     donorStartMigration: {skip: "does not accept read or write concern"},
-    donorStartSplit: {skip: "has been removed from the server"},
     donorWaitForMigrationToCommit: {skip: "does not accept read or write concern"},
-    // TODO(SERVER-63743) : split cmds need to support writeConcern.
-    abortShardSplit: {skip: "does not accept read or write concern"},
-    commitShardSplit: {skip: "does not accept read or write concern"},
-    forgetShardSplit: {skip: "does not accept read or write concern"},
+    abortShardSplit: {skip: "internal command"},
+    commitShardSplit: {skip: "internal command"},
+    forgetShardSplit: {skip: "internal command"},
     driverOIDTest: {skip: "internal command"},
     drop: {
         setUp: function(conn) {
@@ -479,8 +474,6 @@ let testCases = {
     fsync: {skip: "does not accept read or write concern"},
     fsyncUnlock: {skip: "does not accept read or write concern"},
     getAuditConfig: {skip: "does not accept read or write concern"},
-    getChangeStreamOptions:
-        {skip: "does not accept read or write concern"},  // TODO SERVER-65353 remove in 6.1.
     getClusterParameter: {skip: "does not accept read or write concern"},
     getCmdLineOpts: {skip: "does not accept read or write concern"},
     getDatabaseVersion: {skip: "does not accept read or write concern"},
@@ -692,8 +685,6 @@ let testCases = {
     serverStatus: {skip: "does not accept read or write concern"},
     setAllowMigrations: {skip: "does not accept read or write concern"},
     setAuditConfig: {skip: "does not accept read or write concern"},
-    setChangeStreamOptions:
-        {skip: "does not accept read or write concern"},  // TODO SERVER-65353 remove in 6.1.
     setCommittedSnapshot: {skip: "internal command"},
     setDefaultRWConcern: {skip: "special case (must run after all other commands)"},
     setFeatureCompatibilityVersion: {skip: "does not accept read or write concern"},
diff --git a/jstests/sharding/reconfig_fails_no_cwwc_set_sharding.js b/jstests/sharding/reconfig_fails_no_cwwc_set_sharding.js
index 9753683ebb3..a805b82883a 100644
--- a/jstests/sharding/reconfig_fails_no_cwwc_set_sharding.js
+++ b/jstests/sharding/reconfig_fails_no_cwwc_set_sharding.js
@@ -58,7 +58,7 @@ let logPrefix = "While the shard is not part of a sharded cluster: ";
 let shardServer = new ReplSetTest(
     {name: "shardServer", nodes: 1, nodeOptions: {shardsvr: ""}, useHostName: true});
 shardServer.startSet();
-shardServer.initiate();
+shardServer.initiateWithHighElectionTimeout();
 
 jsTestLog(logPrefix + "Adding an arbiter node that will change IDWC to (w:1) should succeed.");
 let arbiter = shardServer.add();
@@ -78,7 +78,7 @@ logPrefix = "While the shard is part of a sharded cluster: ";
 shardServer = new ReplSetTest(
     {name: "shardServer", nodes: 1, nodeOptions: {shardsvr: ""}, useHostName: true});
 shardServer.startSet();
-shardServer.initiate();
+shardServer.initiateWithHighElectionTimeout();
 
 const st = new ShardingTest({shards: 0, mongos: 1});
 var admin = st.getDB('admin');
diff --git a/jstests/sharding/refine_collection_shard_key_basic.js b/jstests/sharding/refine_collection_shard_key_basic.js
index 671493d99ee..5f98a1c24aa 100644
--- a/jstests/sharding/refine_collection_shard_key_basic.js
+++ b/jstests/sharding/refine_collection_shard_key_basic.js
@@ -249,6 +249,8 @@ function validateUnrelatedCollAfterRefine(oldCollArr, oldChunkArr, oldTagsArr) {
 
 jsTestLog('********** SIMPLE TESTS **********');
 
+var result;
+
 // Should fail because arguments 'refineCollectionShardKey' and 'key' are invalid types.
 assert.commandFailedWithCode(
     mongos.adminCommand({refineCollectionShardKey: {_id: 1}, key: {_id: 1, aKey: 1}}),
@@ -355,36 +357,46 @@ assert.commandFailedWithCode(
 dropAndReshardColl({_id: 1});
 assert.commandWorked(mongos.getCollection(kNsName).createIndex({_id: 1, aKey: 1}, {sparse: true}));
 
-assert.commandFailedWithCode(
-    mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}}),
-    ErrorCodes.InvalidOptions);
+result = mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}});
+assert.commandFailedWithCode(result, ErrorCodes.InvalidOptions);
+assert(result.errmsg.includes("Index key is sparse."));
+
+// Should fail because index has a non-simple collation.
+dropAndReshardColl({aKey: 1});
+assert.commandWorked(mongos.getCollection(kNsName).createIndex({aKey: 1, bKey: 1}, {
+    collation: {
+        locale: "en",
+    }
+}));
+result = mongos.adminCommand({refineCollectionShardKey: kNsName, key: {aKey: 1, bKey: 1}});
+assert.commandFailedWithCode(result, ErrorCodes.InvalidOptions);
+assert(result.errmsg.includes("Index has a non-simple collation."));
 
 // Should fail because only a partial index exists for new shard key {_id: 1, aKey: 1}.
 dropAndReshardColl({_id: 1});
 assert.commandWorked(mongos.getCollection(kNsName).createIndex(
     {_id: 1, aKey: 1}, {partialFilterExpression: {aKey: {$gt: 0}}}));
 
-assert.commandFailedWithCode(
-    mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}}),
-    ErrorCodes.InvalidOptions);
+result = mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}});
+assert.commandFailedWithCode(result, ErrorCodes.InvalidOptions);
+assert(result.errmsg.includes("Index key is partial."));
 
 // Should fail because only a multikey index exists for new shard key {_id: 1, aKey: 1}.
 dropAndReshardColl({_id: 1});
 assert.commandWorked(mongos.getCollection(kNsName).createIndex({_id: 1, aKey: 1}));
 assert.commandWorked(mongos.getCollection(kNsName).insert({aKey: [1, 2, 3, 4, 5]}));
 
-assert.commandFailedWithCode(
-    mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}}),
-    ErrorCodes.InvalidOptions);
+result = mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}});
+assert.commandFailedWithCode(result, ErrorCodes.InvalidOptions);
+assert(result.errmsg.includes("Index key is multikey."));
 
 // Should fail because current shard key {a: 1} is unique, new shard key is {a: 1, b: 1}, and an
 // index only exists on {a: 1, b: 1, c: 1}.
 dropAndReshardCollUnique({a: 1});
 assert.commandWorked(mongos.getCollection(kNsName).createIndex({a: 1, b: 1, c: 1}));
 
-assert.commandFailedWithCode(
-    mongos.adminCommand({refineCollectionShardKey: kNsName, key: {a: 1, b: 1}}),
-    ErrorCodes.InvalidOptions);
+mongos.adminCommand({refineCollectionShardKey: kNsName, key: {a: 1, b: 1}});
+assert.commandFailedWithCode(result, ErrorCodes.InvalidOptions);
 
 // Should work because current shard key {_id: 1} is not unique, new shard key is {_id: 1, aKey:
 // 1}, and an index exists on {_id: 1, aKey: 1, bKey: 1}.
@@ -459,6 +471,43 @@ assert.commandFailedWithCode(
     mongos.adminCommand({refineCollectionShardKey: kNsName, key: {aKey: 1, bKey: 1}}),
     ErrorCodes.InvalidOptions);
 
+// Should fail because index key is sparse and index has non-simple collation.
+dropAndReshardColl({_id: 1});
+assert.commandWorked(mongos.getCollection(kNsName).createIndex({_id: 1, aKey: 1}, {
+    sparse: true,
+    collation: {
+        locale: "en",
+    }
+}));
+result = mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}});
+assert.commandFailedWithCode(result, ErrorCodes.InvalidOptions);
+assert(result.errmsg.includes("Index key is sparse.") &&
+       result.errmsg.includes("Index has a non-simple collation."));
+
+// Should fail because index key is multikey and is partial.
+dropAndReshardColl({_id: 1});
+assert.commandWorked(mongos.getCollection(kNsName).createIndex(
+    {_id: 1, aKey: 1}, {name: "index_1_part", partialFilterExpression: {aKey: {$gt: 0}}}));
+assert.commandWorked(
+    mongos.getCollection(kNsName).createIndex({_id: 1, aKey: 1}, {name: "index_2"}));
+assert.commandWorked(mongos.getCollection(kNsName).insert({aKey: [1, 2, 3, 4, 5]}));
+
+result = mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}});
+assert.commandFailedWithCode(result, ErrorCodes.InvalidOptions);
+assert(result.errmsg.includes("Index key is multikey.") &&
+       result.errmsg.includes("Index key is partial."));
+
+// Should fail because both indexes have keys that are incompatible: partial; sparse
+dropAndReshardColl({_id: 1});
+assert.commandWorked(mongos.getCollection(kNsName).createIndex(
+    {_id: 1, aKey: 1}, {name: "index_1_part", partialFilterExpression: {aKey: {$gt: 0}}}));
+assert.commandWorked(mongos.getCollection(kNsName).createIndex(
+    {_id: 1, aKey: 1}, {name: "index_2_sparse", sparse: true}));
+result = mongos.adminCommand({refineCollectionShardKey: kNsName, key: {_id: 1, aKey: 1}});
+assert.commandFailedWithCode(result, ErrorCodes.InvalidOptions);
+assert(result.errmsg.includes("Index key is partial.") &&
+       result.errmsg.includes("Index key is sparse."));
+
 // Should work because a 'useful' index exists for new shard key {_id: 1, aKey: 1}.
 dropAndReshardColl({_id: 1});
 assert.commandWorked(mongos.getCollection(kNsName).createIndex({_id: 1, aKey: 1}));
diff --git a/jstests/sharding/resharding_abort_while_monitoring_to_commit.js b/jstests/sharding/resharding_abort_while_monitoring_to_commit.js
new file mode 100644
index 00000000000..cf1eb4e65e1
--- /dev/null
+++ b/jstests/sharding/resharding_abort_while_monitoring_to_commit.js
@@ -0,0 +1,67 @@
+/**
+ * Test that the resharding operation is aborted if any of the recipient shards encounters
+ * an error during the Applying phase
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/discover_topology.js");
+load("jstests/sharding/libs/resharding_test_fixture.js");
+
+const reshardingTest = new ReshardingTest({numDonors: 2, numRecipients: 1});
+reshardingTest.setup();
+
+const donorShardNames = reshardingTest.donorShardNames;
+const sourceCollection = reshardingTest.createShardedCollection({
+    ns: "reshardingDb.coll",
+    shardKeyPattern: {oldKey: 1},
+    chunks: [
+        {min: {oldKey: MinKey}, max: {oldKey: 0}, shard: donorShardNames[0]},
+        {min: {oldKey: 0}, max: {oldKey: MaxKey}, shard: donorShardNames[1]},
+    ],
+});
+
+const mongos = sourceCollection.getMongo();
+const topology = DiscoverTopology.findConnectedNodes(mongos);
+
+const donor0 = new Mongo(topology.shards[donorShardNames[0]].primary);
+
+const recipientShardNames = reshardingTest.recipientShardNames;
+const recipient = new Mongo(topology.shards[recipientShardNames[0]].primary);
+
+// We have the recipient shard fail the _shardsvrReshardingOperationTime command to verify the
+// ReshardingCoordinator can successfully abort the resharding operation even when the commit
+// monitor doesn't see the recipient shard as being caught up enough to engage the critical section
+// on the donor shards.
+const shardsvrReshardingOperationTimeFailpoint = configureFailPoint(recipient, "failCommand", {
+    failInternalCommands: true,
+    errorCode: ErrorCodes.Interrupted,
+    failCommands: ["_shardsvrReshardingOperationTime"],
+});
+
+reshardingTest.withReshardingInBackground(
+    {
+        newShardKeyPattern: {newKey: 1},
+        newChunks: [{min: {newKey: MinKey}, max: {newKey: MaxKey}, shard: recipientShardNames[0]}],
+    },
+    () => {
+        // We wait until cloneTimestamp has been chosen to guarantee that any subsequent writes will
+        // be applied by the ReshardingOplogApplier.
+        reshardingTest.awaitCloneTimestampChosen();
+
+        // We connect directly to one of the donor shards to perform an operations which will later
+        // cause the recipient shard to error during its resharding oplog application. Connecting
+        // directly to the shard bypasses any synchronization which might otherwise occur from the
+        // Sharding DDL Coordinator.
+        const donor0Collection = donor0.getCollection(sourceCollection.getFullName());
+        assert.commandWorked(donor0Collection.runCommand("collMod"));
+    },
+    {
+        expectedErrorCode: ErrorCodes.OplogOperationUnsupported,
+    });
+
+shardsvrReshardingOperationTimeFailpoint.off();
+
+reshardingTest.teardown();
+})();
diff --git a/jstests/sharding/resharding_metrics.js b/jstests/sharding/resharding_metrics.js
index 2378240d2b9..0a920fb91e2 100644
--- a/jstests/sharding/resharding_metrics.js
+++ b/jstests/sharding/resharding_metrics.js
@@ -108,7 +108,7 @@ allNodes.forEach((hostName) => {
     assert(serverStatus.hasOwnProperty('countFailed'), debugStr());
 
     assert(serverStatus.hasOwnProperty('active'), debugStr());
-    assert(serverStatus.active.hasOwnProperty('documentsProcessed'), debugStr());
+    assert(serverStatus.active.hasOwnProperty('documentsCopied'), debugStr());
 
     assert(serverStatus.hasOwnProperty('oldestActive'), debugStr());
     assert(
diff --git a/jstests/sharding/resharding_metrics_increment.js b/jstests/sharding/resharding_metrics_increment.js
index 05feacbdadd..a8b188f64e9 100644
--- a/jstests/sharding/resharding_metrics_increment.js
+++ b/jstests/sharding/resharding_metrics_increment.js
@@ -125,7 +125,7 @@ const topology = DiscoverTopology.findConnectedNodes(mongos);
     jsTest.log(`Resharding stats for ${mongo}: ${tojson(sub)}`);
 
     verifyDict(sub.active, {
-        "documentsProcessed": e.documents,
+        "documentsCopied": e.documents,
         "oplogEntriesFetched": e.fetched,
         "oplogEntriesApplied": e.applied,
         "insertsApplied": e.opcounters.insert,
@@ -134,7 +134,7 @@ const topology = DiscoverTopology.findConnectedNodes(mongos);
     });
 
     // bytesCopied is harder to pin down but it should be >0.
-    assert.betweenIn(1, sub.active['bytesWritten'], 1024, 'bytesWritten');
+    assert.betweenIn(1, sub.active['bytesCopied'], 1024, 'bytesCopied');
 });
 
 reshardingTest.teardown();
diff --git a/jstests/sharding/run_restore.js b/jstests/sharding/run_restore.js
index d8ed2fba027..729132efcab 100644
--- a/jstests/sharding/run_restore.js
+++ b/jstests/sharding/run_restore.js
@@ -16,7 +16,7 @@ const s =
 
 let mongos = s.s0;
 let db = s.getDB("test");
-if (!FeatureFlagUtil.isEnabled(db, "SelectiveBackup")) {
+if (!FeatureFlagUtil.isEnabled(s.configRS.getPrimary().getDB("test"), "SelectiveBackup")) {
     jsTestLog("Skipping as featureFlagSelectiveBackup is not enabled");
     s.stop();
     return;
diff --git a/jstests/sharding/safe_secondary_reads_drop_recreate.js b/jstests/sharding/safe_secondary_reads_drop_recreate.js
index d6ad842361f..cbb3c4af39a 100644
--- a/jstests/sharding/safe_secondary_reads_drop_recreate.js
+++ b/jstests/sharding/safe_secondary_reads_drop_recreate.js
@@ -147,6 +147,7 @@ let testCases = {
     connPoolSync: {skip: "does not return user data"},
     connectionStatus: {skip: "does not return user data"},
     convertToCapped: {skip: "primary only"},
+    coordinateCommitTransaction: {skip: "unimplemented. Serves only as a stub."},
     count: {
         setUp: function(mongosConn) {
             assert.commandWorked(mongosConn.getCollection(nss).insert({x: 1}));
@@ -213,8 +214,6 @@ let testCases = {
     fsync: {skip: "does not return user data"},
     fsyncUnlock: {skip: "does not return user data"},
     getAuditConfig: {skip: "does not return user data"},
-    getChangeStreamOptions:
-        {skip: "does not return user data"},  // TODO SERVER-65353 remove in 6.1.
     getClusterParameter: {skip: "does not return user data"},
     getCmdLineOpts: {skip: "does not return user data"},
     getDefaultRWConcern: {skip: "does not return user data"},
@@ -324,7 +323,6 @@ let testCases = {
     serverStatus: {skip: "does not return user data"},
     setAllowMigrations: {skip: "primary only"},
     setAuditConfig: {skip: "does not return user data"},
-    setChangeStreamOptions: {skip: "primary only"},  // TODO SERVER-65353 remove in 6.1.
     setCommittedSnapshot: {skip: "does not return user data"},
     setDefaultRWConcern: {skip: "primary only"},
     setIndexCommitQuorum: {skip: "primary only"},
diff --git a/jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js b/jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js
index 3e01be669a4..6317bb85e40 100644
--- a/jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js
+++ b/jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js
@@ -163,6 +163,7 @@ let testCases = {
     connPoolSync: {skip: "does not return user data"},
     connectionStatus: {skip: "does not return user data"},
     convertToCapped: {skip: "primary only"},
+    coordinateCommitTransaction: {skip: "unimplemented. Serves only as a stub."},
     count: {
         setUp: function(mongosConn) {
             assert.commandWorked(mongosConn.getCollection(nss).insert({x: 1}));
@@ -246,8 +247,6 @@ let testCases = {
     fsync: {skip: "does not return user data"},
     fsyncUnlock: {skip: "does not return user data"},
     getAuditConfig: {skip: "does not return user data"},
-    getChangeStreamOptions:
-        {skip: "does not return user data"},  // TODO SERVER-65353 remove in 6.1.
     getClusterParameter: {skip: "does not return user data"},
     getCmdLineOpts: {skip: "does not return user data"},
     getDefaultRWConcern: {skip: "does not return user data"},
@@ -395,7 +394,6 @@ let testCases = {
     serverStatus: {skip: "does not return user data"},
     setAllowMigrations: {skip: "primary only"},
     setAuditConfig: {skip: "does not return user data"},
-    setChangeStreamOptions: {skip: "primary only"},  // TODO SERVER-65353 remove in 6.1.
     setCommittedSnapshot: {skip: "does not return user data"},
     setDefaultRWConcern: {skip: "primary only"},
     setIndexCommitQuorum: {skip: "primary only"},
diff --git a/jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js b/jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js
index 64ea7271f2e..302c98f91c7 100644
--- a/jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js
+++ b/jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js
@@ -150,6 +150,7 @@ let testCases = {
     connPoolSync: {skip: "does not return user data"},
     connectionStatus: {skip: "does not return user data"},
     convertToCapped: {skip: "primary only"},
+    coordinateCommitTransaction: {skip: "unimplemented. Serves only as a stub."},
     count: {
         setUp: function(mongosConn) {
             assert.commandWorked(mongosConn.getCollection(nss).insert({x: 1}));
@@ -218,8 +219,6 @@ let testCases = {
     fsync: {skip: "does not return user data"},
     fsyncUnlock: {skip: "does not return user data"},
     getAuditConfig: {skip: "does not return user data"},
-    getChangeStreamOptions:
-        {skip: "does not return user data"},  // TODO SERVER-65353 remove in 6.1.
     getClusterParameter: {skip: "does not return user data"},
     getCmdLineOpts: {skip: "does not return user data"},
     getDefaultRWConcern: {skip: "does not return user data"},
@@ -331,7 +330,6 @@ let testCases = {
     serverStatus: {skip: "does not return user data"},
     setAllowMigrations: {skip: "primary only"},
     setAuditConfig: {skip: "does not return user data"},
-    setChangeStreamOptions: {skip: "primary only"},  // TODO SERVER-65353 remove in 6.1.
     setCommittedSnapshot: {skip: "does not return user data"},
     setDefaultRWConcern: {skip: "primary only"},
     setIndexCommitQuorum: {skip: "primary only"},
@@ -514,7 +512,7 @@ for (let command of commands) {
                                     {"command.shardVersion.0": {"$exists": true}},
                                     {
                                         "command.shardVersion.0":
-                                            {$ne: ShardVersioningUtil.kIgnoredShardVersion[0]}
+                                            {$ne: ShardVersioningUtil.kIgnoredShardVersion.v}
                                     },
                                 ]
                             },
@@ -523,7 +521,7 @@ for (let command of commands) {
                                     {"command.shardVersion.v": {"$exists": true}},
                                     {
                                         "command.shardVersion.v":
-                                            {$ne: ShardVersioningUtil.kIgnoredShardVersion[0]}
+                                            {$ne: ShardVersioningUtil.kIgnoredShardVersion.v}
                                     },
                                 ]
                             },
@@ -536,7 +534,7 @@ for (let command of commands) {
                                     {"command.shardVersion.1": {"$exists": true}},
                                     {
                                         "command.shardVersion.1":
-                                            {$ne: ShardVersioningUtil.kIgnoredShardVersion[1]}
+                                            {$ne: ShardVersioningUtil.kIgnoredShardVersion.e}
                                     },
                                 ]
                             },
@@ -545,7 +543,7 @@ for (let command of commands) {
                                     {"command.shardVersion.e": {"$exists": true}},
                                     {
                                         "command.shardVersion.e":
-                                            {$ne: ShardVersioningUtil.kIgnoredShardVersion[1]}
+                                            {$ne: ShardVersioningUtil.kIgnoredShardVersion.e}
                                     },
                                 ]
                             },
diff --git a/jstests/sharding/set_cluster_parameter.js b/jstests/sharding/set_cluster_parameter.js
index 5ee591831a9..754245b02df 100644
--- a/jstests/sharding/set_cluster_parameter.js
+++ b/jstests/sharding/set_cluster_parameter.js
@@ -5,9 +5,6 @@
  * inMemory variants
  *
  * @tags: [
- *   # Requires all nodes to be running the latest binary.
- *   requires_fcv_60,
- *   featureFlagClusterWideConfig,
  *   does_not_support_stepdowns,
  *   requires_persistence,
  *  ]
diff --git a/jstests/sharding/sharding_balance2.js b/jstests/sharding/shard_max_size.js
index bfb57e1dec6..35d4fa70e22 100644
--- a/jstests/sharding/sharding_balance2.js
+++ b/jstests/sharding/shard_max_size.js
@@ -1,5 +1,7 @@
 /**
  * Test the maxSize setting for the addShard command.
+ *
+ * @tags: [does_not_support_stepdowns]
  */
 (function() {
 'use strict';
@@ -19,13 +21,6 @@ var s = new ShardingTest({
     }
 });
 
-// TODO SERVER-66754 review tests disabled because expecting initial chunks split
-if (FeatureFlagUtil.isEnabled(s.configRS.getPrimary().getDB('admin'), 'NoMoreAutoSplitter')) {
-    jsTestLog("Skipping as featureFlagNoMoreAutoSplitter is enabled");
-    s.stop();
-    return;
-}
-
 var db = s.getDB("test");
 
 var names = s.getConnNames();
@@ -49,7 +44,6 @@ while (inserted < (40 * 1024 * 1024)) {
 assert.commandWorked(bulk.execute());
 
 assert.commandWorked(s.s0.adminCommand({shardcollection: "test.foo", key: {_id: 1}}));
-assert.gt(findChunksUtil.countChunksForNs(s.config, "test.foo"), 10);
 
 var getShardSize = function(conn) {
     var listDatabases = conn.getDB('admin').runCommand({listDatabases: 1});
diff --git a/jstests/sharding/sharding_balance3.js b/jstests/sharding/sharding_balance3.js
deleted file mode 100644
index b13751bd537..00000000000
--- a/jstests/sharding/sharding_balance3.js
+++ /dev/null
@@ -1,84 +0,0 @@
-// Waits for the balancer to run once, then stops it and checks that it is no longer running.
-
-(function() {
-
-load("jstests/sharding/libs/find_chunks_util.js");
-load("jstests/libs/feature_flag_util.js");
-
-var s = new ShardingTest({
-    name: "slow_sharding_balance3",
-    shards: 2,
-    mongos: 1,
-    other: {
-        chunkSize: 1,
-        enableBalancer: true,
-        shardOptions:
-            {setParameter: {internalQueryMaxBlockingSortMemoryUsageBytes: 32 * 1024 * 1024}}
-    }
-});
-
-// TODO SERVER-66754 review tests disabled because expecting initial chunks split
-if (FeatureFlagUtil.isEnabled(s.configRS.getPrimary().getDB('admin'), 'NoMoreAutoSplitter')) {
-    jsTestLog("Skipping as featureFlagNoMoreAutoSplitter is enabled");
-    s.stop();
-    return;
-}
-
-s.adminCommand({enablesharding: "test"});
-s.ensurePrimaryShard('test', s.shard1.shardName);
-
-s.config.settings.find().forEach(printjson);
-
-db = s.getDB("test");
-
-bigString = "";
-while (bigString.length < 10000)
-    bigString += "asdasdasdasdadasdasdasdasdasdasdasdasda";
-
-inserted = 0;
-num = 0;
-var bulk = db.foo.initializeUnorderedBulkOp();
-while (inserted < (40 * 1024 * 1024)) {
-    bulk.insert({_id: num++, s: bigString});
-    inserted += bigString.length;
-}
-assert.commandWorked(bulk.execute());
-
-s.adminCommand({shardcollection: "test.foo", key: {_id: 1}});
-assert.lt(20, findChunksUtil.countChunksForNs(s.config, "test.foo"), "setup2");
-
-function diff1() {
-    var x = s.chunkCounts("foo");
-    printjson(x);
-    return Math.max(x[s.shard0.shardName], x[s.shard1.shardName]) -
-        Math.min(x[s.shard0.shardName], x[s.shard1.shardName]);
-}
-
-assert.lt(10, diff1());
-
-// Wait for balancer to kick in.
-var initialDiff = diff1();
-assert.soon(function() {
-    return diff1() != initialDiff;
-}, "Balancer did not kick in", 5 * 60 * 1000, 1000);
-
-print("* A");
-print("disabling the balancer");
-s.stopBalancer();
-s.config.settings.find().forEach(printjson);
-print("* B");
-
-print(diff1());
-
-var currDiff = diff1();
-var waitTime = 0;
-var startTime = Date.now();
-while (waitTime < (1000 * 60)) {
-    // Wait for 60 seconds to ensure balancer did not run
-    assert.eq(currDiff, diff1(), "balance with stopped flag should not have happened");
-    sleep(5000);
-    waitTime = Date.now() - startTime;
-}
-
-s.stop();
-})();
diff --git a/jstests/sharding/sharding_migrate_cursor1.js b/jstests/sharding/sharding_migrate_cursor1.js
deleted file mode 100644
index c0ef8d391fd..00000000000
--- a/jstests/sharding/sharding_migrate_cursor1.js
+++ /dev/null
@@ -1,98 +0,0 @@
-/**
- * SERVER-2068
- *
- * This test is labeled resource intensive because its total io_write is 131MB compared to a median
- * of 5MB across all sharding tests in wiredTiger.
- * @tags: [resource_intensive]
- */
-(function() {
-
-load("jstests/libs/feature_flag_util.js");
-
-var chunkSize = 25;
-
-var s = new ShardingTest(
-    {name: "migrate_cursor1", shards: 2, mongos: 1, other: {chunkSize: chunkSize}});
-// TODO SERVER-66754 review tests disabled because expecting initial chunks split
-if (FeatureFlagUtil.isEnabled(s.configRS.getPrimary().getDB('admin'), 'NoMoreAutoSplitter')) {
-    jsTestLog("Skipping as featureFlagNoMoreAutoSplitter is enabled");
-    s.stop();
-    return;
-}
-
-s.adminCommand({enablesharding: "test"});
-db = s.getDB("test");
-s.ensurePrimaryShard('test', s.shard1.shardName);
-t = db.foo;
-
-bigString = "";
-stringSize = 1024;
-
-while (bigString.length < stringSize)
-    bigString += "asdasdas";
-
-stringSize = bigString.length;
-docsPerChunk = Math.ceil((chunkSize * 1024 * 1024) / (stringSize - 12));
-numChunks = 5;
-numDocs = 20 * docsPerChunk;
-
-print("stringSize: " + stringSize + " docsPerChunk: " + docsPerChunk + " numDocs: " + numDocs);
-
-var bulk = t.initializeUnorderedBulkOp();
-for (var i = 0; i < numDocs; i++) {
-    bulk.insert({_id: i, s: bigString});
-}
-assert.commandWorked(bulk.execute());
-
-s.adminCommand({shardcollection: "test.foo", key: {_id: 1}});
-
-assert.lt(numChunks, s.config.chunks.find().count(), "initial 1");
-
-primary = s.getPrimaryShard("test").getDB("test").foo;
-secondaryName = s.getOther(primary.name);
-secondary = secondaryName.getDB("test").foo;
-
-assert.eq(numDocs, primary.count(), "initial 2");
-assert.eq(0, secondary.count(), "initial 3");
-assert.eq(numDocs, t.count(), "initial 4");
-
-x = primary.find({_id: {$lt: 500}}).batchSize(2);
-x.next();  // 1. Create an open cursor
-
-print("start moving chunks...");
-
-// 2. Move chunk from s0 to s1 without waiting for deletion.
-// Command returns, but the deletion on s0 will block due to the open cursor.
-s.adminCommand({moveChunk: "test.foo", find: {_id: 0}, to: secondaryName.name});
-
-// 3. Start second moveChunk command from s0 to s1.
-// This moveChunk should not observe the above deletion as a 'mod', transfer it to s1 and cause
-// deletion on s1.
-// This moveChunk will wait for deletion.
-join = startParallelShell(
-    "db.x.insert( {x:1} ); db.adminCommand( { moveChunk : 'test.foo' , find : { _id : " +
-    docsPerChunk * 3 + " } , to : '" + secondaryName.name + "', _waitForDelete: true } )");
-assert.soon(function() {
-    return db.x.count() > 0;
-}, "XXX", 30000, 1);
-
-// 4. Close the cursor to enable chunk deletion.
-print("itcount: " + x.itcount());
-
-x = null;
-for (i = 0; i < 5; i++)
-    gc();
-
-print("cursor should be gone");
-
-// 5. Waiting for the second moveChunk to finish its deletion.
-// Note the deletion for the first moveChunk may not be finished.
-join();
-
-// assert.soon( function(){ return numDocs == t.count(); } , "at end 1" )
-// 6. Check the total number of docs on both shards to make sure no doc is lost.
-// Use itcount() to ignore orphan docments.
-assert.eq(numDocs, t.find().itcount(), "at end 2");
-
-s.stop();
-})();
diff --git a/jstests/sharding/ssv_config_check.js b/jstests/sharding/ssv_config_check.js
deleted file mode 100644
index 0163e521c65..00000000000
--- a/jstests/sharding/ssv_config_check.js
+++ /dev/null
@@ -1,34 +0,0 @@
-/**
- * Test that setShardVersion fails if sent to the config server.
- */
-(function() {
-"use strict";
-
-var st = new ShardingTest({shards: 1});
-
-var testDB = st.s.getDB('test');
-testDB.adminCommand({enableSharding: 'test'});
-testDB.adminCommand({shardCollection: 'test.user', key: {x: 1}});
-
-testDB.user.insert({x: 1});
-
-var directConn = new Mongo(st.rs0.getPrimary().host);
-var adminDB = directConn.getDB('admin');
-
-var configStr = adminDB.runCommand({getShardVersion: 'test.user'}).configServer;
-
-var configAdmin = st.c0.getDB('admin');
-
-jsTest.log("Verify that setShardVersion fails on the config server");
-// Even if shardName sent is 'config' and connstring sent is config server's actual connstring.
-assert.commandFailedWithCode(configAdmin.runCommand({
-    setShardVersion: '',
-    init: true,
-    authoritative: true,
-    configdb: configStr,
-    shard: 'config'
-}),
-                             ErrorCodes.NoShardingEnabled);
-
-st.stop();
-})();
diff --git a/jstests/sharding/timeseries_coll_mod.js b/jstests/sharding/timeseries_coll_mod.js
index 2627fa21cf4..a4aa23bcc01 100644
--- a/jstests/sharding/timeseries_coll_mod.js
+++ b/jstests/sharding/timeseries_coll_mod.js
@@ -22,7 +22,7 @@ const viewNss = `${dbName}.${collName}`;
 const bucketNss = `${dbName}.system.buckets.${collName}`;
 const controlTimeField = `control.min.${timeField}`;
 
-function runBasicTest(failPoint) {
+function runBasicTest() {
     const st = new ShardingTest({shards: 2, rs: {nodes: 2}});
     const mongos = st.s0;
     const db = mongos.getDB(dbName);
@@ -37,14 +37,6 @@ function runBasicTest(failPoint) {
     assert.commandWorked(
         db.createCollection(collName, {timeseries: {timeField: timeField, metaField: metaField}}));
 
-    // Setting collModPrimaryDispatching failpoint to make sure the fallback logic of dispatching
-    // collMod command at primary shard works.
-    if (failPoint) {
-        const primary = st.getPrimaryShard(dbName);
-        assert.commandWorked(
-            primary.adminCommand({configureFailPoint: failPoint, mode: 'alwaysOn'}));
-    }
-
     // Updates for timeField and metaField are disabled.
     assert.commandFailedWithCode(db.runCommand({collMod: collName, timeseries: {timeField: 'x'}}),
                                  40415 /* Failed to parse */);
@@ -74,23 +66,14 @@ function runBasicTest(failPoint) {
     assert.commandWorked(
         db.runCommand({collMod: collName, index: {name: indexName, hidden: false}}));
 
-    if (failPoint) {
-        // Granularity update disabled for sharded time-series collection, when we're using primary
-        // dispatching logic.
-        assert.commandFailedWithCode(
-            db.runCommand({collMod: collName, timeseries: {granularity: 'hours'}}),
-            ErrorCodes.NotImplemented);
-    } else {
-        // Granularity update works for sharded time-series collection, when we're using DDL
-        // coordinator logic.
-        const getGranularity = () => db.getSiblingDB('config')
-                                         .collections.findOne({_id: bucketNss})
-                                         .timeseriesFields.granularity;
-        assert.eq(getGranularity(), 'minutes');
-        assert.commandWorked(
-            db.runCommand({collMod: collName, timeseries: {granularity: 'hours'}}));
-        assert.eq(getGranularity(), 'hours');
-    }
+    // Granularity update works for sharded time-series collection, when we're using DDL
+    // coordinator logic.
+    const getGranularity = () => db.getSiblingDB('config')
+                                     .collections.findOne({_id: bucketNss})
+                                     .timeseriesFields.granularity;
+    assert.eq(getGranularity(), 'minutes');
+    assert.commandWorked(db.runCommand({collMod: collName, timeseries: {granularity: 'hours'}}));
+    assert.eq(getGranularity(), 'hours');
     st.stop();
 }
 
@@ -175,8 +158,6 @@ function runReadAfterWriteTest() {
     st.stop();
 }
 
-runBasicTest('collModPrimaryDispatching');
-runBasicTest('collModCoordinatorPre60Compatible');
 runBasicTest();
 
 runReadAfterWriteTest();
diff --git a/jstests/sharding/timeseries_multiple_mongos.js b/jstests/sharding/timeseries_multiple_mongos.js
index dbf88f6fa56..e5d75cdc384 100644
--- a/jstests/sharding/timeseries_multiple_mongos.js
+++ b/jstests/sharding/timeseries_multiple_mongos.js
@@ -116,7 +116,7 @@ function runTest({shardKey, cmdObj, numProfilerEntries}) {
                         {"command.shardVersion.0": {"$exists": true}},
                         {
                             "command.shardVersion.0":
-                                {$ne: ShardVersioningUtil.kIgnoredShardVersion[0]}
+                                {$ne: ShardVersioningUtil.kIgnoredShardVersion.v}
                         },
                     ]
                 },
@@ -125,7 +125,7 @@ function runTest({shardKey, cmdObj, numProfilerEntries}) {
                         {"command.shardVersion.v": {"$exists": true}},
                         {
                             "command.shardVersion.v":
-                                {$ne: ShardVersioningUtil.kIgnoredShardVersion[0]}
+                                {$ne: ShardVersioningUtil.kIgnoredShardVersion.v}
                         },
                     ]
                 },
@@ -144,8 +144,8 @@ function runTest({shardKey, cmdObj, numProfilerEntries}) {
             filter = {
                 [queryField]: cmdCollName,
                 "$or": [
-                    {"command.shardVersion.0": ShardVersioningUtil.kIgnoredShardVersion[0]},
-                    {"command.shardVersion.v": ShardVersioningUtil.kIgnoredShardVersion[0]},
+                    {"command.shardVersion.0": ShardVersioningUtil.kIgnoredShardVersion.v},
+                    {"command.shardVersion.v": ShardVersioningUtil.kIgnoredShardVersion.v},
                 ]
             };
         }
diff --git a/jstests/sharding/topology_changes_bump_topology_time.js b/jstests/sharding/topology_changes_bump_topology_time.js
new file mode 100644
index 00000000000..2e644d4f3fb
--- /dev/null
+++ b/jstests/sharding/topology_changes_bump_topology_time.js
@@ -0,0 +1,70 @@
+/*
+1) Add shard
+2) topology time must increase
+3) remove shard
+4) topology time must increase
+*/
+
+function assertTopologyGt(topologyTime1, topologyTime2, msg) {
+    let msgError = `[${tojson(topologyTime1)} <= ${tojson(topologyTime2)}] ${msg}`;
+
+    assert.gt(timestampCmp(topologyTime1, topologyTime2), 0, msgError);
+}
+
+function cmdAsInternalClient(st, cmd) {
+    const command =
+        {[cmd]: 1, internalClient: {minWireVersion: NumberInt(0), maxWireVersion: NumberInt(7)}};
+    const connInternal = new Mongo(st.configRS.getPrimary().host);
+    const res = assert.commandWorked(connInternal.adminCommand(command));
+    connInternal.close();
+    return res;
+}
+
+function getTopologyTime(st) {
+    let res = cmdAsInternalClient(st, "hello");
+    return res.$topologyTime;
+}
+
+function printConfigShards(st, msg) {
+    print(msg, tojson(st.s.getDB("config").shards.find().toArray()));
+}
+
+(function() {
+
+'use strict';
+
+var st = new ShardingTest({shards: 1, rs: {nodes: 1}, config: 3});
+
+let initialTopology = getTopologyTime(st);
+
+// AddShard
+let rs = new ReplSetTest({name: "rs1", nodes: 1});
+rs.startSet({shardsvr: ""});
+rs.initiate();
+rs.awaitReplication();
+
+assert.commandWorked(st.s.getDB("admin").runCommand({addShard: rs.getURL(), name: "rs1"}));
+
+let topologyTimeAfterAddShard = getTopologyTime(st);
+
+// topology time must increase
+assertTopologyGt(topologyTimeAfterAddShard,
+                 initialTopology,
+                 "Current topologyTime should change after add shard, but it did not");
+
+assert.commandWorked(st.s.adminCommand({removeShard: "rs1"}));
+printConfigShards(st, "config.shards after first remove shard ");
+
+assert.commandWorked(st.s.adminCommand({removeShard: "rs1"}));
+printConfigShards(st, "config.shards after second remove shard ");
+
+let topologyTimeAfterRemoveShard = getTopologyTime(st);
+
+// topology time should change
+assertTopologyGt(topologyTimeAfterRemoveShard,
+                 topologyTimeAfterAddShard,
+                 "Current topologyTime should change after remove shard, but it did not");
+
+rs.stopSet();
+st.stop();
+})();
diff --git a/jstests/sharding/txn_two_phase_commit_wait_for_majority_commit_after_stepup.js b/jstests/sharding/txn_two_phase_commit_wait_for_majority_commit_after_stepup.js
index 607389418b9..d82ad7047c0 100644
--- a/jstests/sharding/txn_two_phase_commit_wait_for_majority_commit_after_stepup.js
+++ b/jstests/sharding/txn_two_phase_commit_wait_for_majority_commit_after_stepup.js
@@ -104,7 +104,7 @@ stopServerReplication([coordPrimary, coordSecondary]);
 // Induce the coordinator primary to step down.
 
 // The amount of time the node has to wait before becoming primary again.
-const stepDownSecs = 1;
+const stepDownSecs = 2;
 assert.commandWorked(coordPrimary.adminCommand({replSetStepDown: stepDownSecs, force: true}));
 
 failPoint.off();
diff --git a/jstests/sharding/version1.js b/jstests/sharding/version1.js
deleted file mode 100644
index dfe9a733156..00000000000
--- a/jstests/sharding/version1.js
+++ /dev/null
@@ -1,75 +0,0 @@
-(function() {
-'use strict';
-
-load("jstests/sharding/libs/find_chunks_util.js");
-
-var s = new ShardingTest({name: "version1", shards: 1});
-
-assert.commandWorked(s.s0.adminCommand({enablesharding: "alleyinsider"}));
-assert.commandWorked(s.s0.adminCommand({shardcollection: "alleyinsider.foo", key: {num: 1}}));
-
-var a = s.shard0.getDB("admin");
-
-assert.commandFailed(a.runCommand({setShardVersion: "alleyinsider.foo", configdb: s._configDB}));
-
-assert.commandFailed(
-    a.runCommand({setShardVersion: "alleyinsider.foo", configdb: s._configDB, version: "a"}));
-
-assert.commandFailed(a.runCommand(
-    {setShardVersion: "alleyinsider.foo", configdb: s._configDB, authoritative: true}));
-
-assert.commandFailed(
-    a.runCommand(
-        {setShardVersion: "alleyinsider.foo", configdb: s._configDB, version: new Timestamp(2, 0)}),
-    "should have failed b/c no auth");
-
-assert.commandFailed(a.runCommand({
-    setShardVersion: "alleyinsider.foo",
-    configdb: s._configDB,
-    version: {e: epoch, t: timestamp, v: new Timestamp(2, 0)},
-    authoritative: true
-}),
-                     "should have failed because first setShardVersion needs shard info");
-
-assert.commandFailed(a.runCommand({
-    setShardVersion: "alleyinsider.foo",
-    configdb: s._configDB,
-    version: {e: epoch, t: timestamp, v: new Timestamp(2, 0)},
-    authoritative: true,
-    shard: "s.shard0.shardName",
-    shardHost: s.s.host
-}),
-                     "should have failed because version is config is 1|0");
-
-var epoch = s.getDB('config').collections.findOne({_id: "alleyinsider.foo"}).lastmodEpoch;
-var timestamp = s.getDB('config').collections.findOne({_id: "alleyinsider.foo"}).timestamp;
-assert.commandWorked(a.runCommand({
-    setShardVersion: "alleyinsider.foo",
-    configdb: s._configDB,
-    version: {e: epoch, t: timestamp, v: new Timestamp(1, 0)},
-    authoritative: true,
-    shard: s.shard0.shardName,
-    shardHost: s.s.host
-}),
-                     "should have worked");
-
-assert.commandFailed(a.runCommand({
-    setShardVersion: "alleyinsider.foo",
-    configdb: "a",
-    version: {e: epoch, t: timestamp, v: new Timestamp(0, 2)},
-}));
-
-assert.commandFailed(a.runCommand({
-    setShardVersion: "alleyinsider.foo",
-    configdb: s._configDB,
-    version: {e: epoch, t: timestamp, v: new Timestamp(0, 2)},
-}));
-
-assert.commandFailed(a.runCommand({
-    setShardVersion: "alleyinsider.foo",
-    configdb: s._configDB,
-    version: {e: epoch, t: timestamp, v: new Timestamp(0, 1)},
-}));
-
-s.stop();
-})();
diff --git a/jstests/sharding/version2.js b/jstests/sharding/version2.js
deleted file mode 100644
index 80e00dd74db..00000000000
--- a/jstests/sharding/version2.js
+++ /dev/null
@@ -1,44 +0,0 @@
-(function() {
-'use strict';
-
-load("jstests/sharding/libs/find_chunks_util.js");
-
-/**
- * One-shard cluster test do not need to be tested in the multiversion suites.
- * @tags: [multiversion_incompatible]
- */
-var s = new ShardingTest({name: "version2", shards: 1});
-
-assert.commandWorked(s.s0.adminCommand({enablesharding: "alleyinsider"}));
-assert.commandWorked(s.s0.adminCommand({shardcollection: "alleyinsider.foo", key: {num: 1}}));
-assert.commandWorked(s.s0.adminCommand({shardcollection: "alleyinsider.bar", key: {num: 1}}));
-
-var a = s.shard0.getDB("admin");
-
-// Setup from one client
-assert.eq(a.runCommand({"getShardVersion": "alleyinsider.foo", configdb: s._configDB}).global.i, 0);
-
-var fooEpoch = s.getDB('config').collections.findOne({_id: "alleyinsider.foo"}).lastmodEpoch;
-var fooTimestamp = s.getDB('config').collections.findOne({_id: "alleyinsider.foo"}).timestamp;
-assert.commandWorked(a.runCommand({
-    setShardVersion: "alleyinsider.foo",
-    configdb: s._configDB,
-    authoritative: true,
-    version: {e: fooEpoch, t: fooTimestamp, v: new Timestamp(1, 0)},
-    shard: s.shard0.shardName,
-    shardHost: s.s.host,
-}));
-
-printjson(s.config.chunks.findOne());
-
-assert.eq(a.runCommand({"getShardVersion": "alleyinsider.foo", configdb: s._configDB}).global.t, 1);
-
-// From a different client
-var a2 = connect(`mongodb://${s.rs0.getPrimary().name}/admin`);
-
-assert.eq(a2.runCommand({"getShardVersion": "alleyinsider.foo", configdb: s._configDB}).global.t,
-          1,
-          "a2 global 1");
-
-s.stop();
-})();
diff --git a/site_scons/site_tools/build_metrics.py b/site_scons/site_tools/build_metrics.py
new file mode 100644
index 00000000000..0ab660a45e9
--- /dev/null
+++ b/site_scons/site_tools/build_metrics.py
@@ -0,0 +1,95 @@
+# Copyright 2020 MongoDB Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+"""Configure the build to track build performance."""
+
+import time
+import os
+import sys
+import atexit
+import json
+
+import psutil
+
+_SEC_TO_NANOSEC_FACTOR = 1000000000.0
+
+_BUILD_METRIC_DATA = {}
+
+
+def get_build_metric_dict():
+    global _BUILD_METRIC_DATA
+    return _BUILD_METRIC_DATA
+
+
+import atexit
+
+
+# This section is an excerpt of the original
+# https://stackoverflow.com/a/63029332/1644736
+class CaptureAtexits:
+    def __init__(self):
+        self.captured = []
+
+    def __eq__(self, other):
+        self.captured.append(other)
+        return False
+
+
+def finalize_build_metrics(env):
+    metrics = get_build_metric_dict()
+    metrics['end_time'] = time.time_ns()
+
+    build_metrics_file = env.GetOption('build-metrics')
+    if build_metrics_file == '-':
+        json.dump(metrics, sys.stdout, indent=4, sort_keys=True)
+    else:
+        with open(build_metrics_file, 'w') as f:
+            json.dump(metrics, f, indent=4, sort_keys=True)
+
+
+def add_meta_data(env, key, value):
+    get_build_metric_dict()[key] = value
+
+
+def generate(env, **kwargs):
+
+    # This will force our at exit to the of the stack ensuring
+    # that it is the last thing called when exiting.
+    c = CaptureAtexits()
+    atexit.unregister(c)
+    for func in c.captured:
+        atexit.unregister(func)
+    atexit.register(finalize_build_metrics, env)
+    for func in c.captured:
+        atexit.register(func)
+
+    env.AddMethod(get_build_metric_dict, "GetBuildMetricDictionary")
+    env.AddMethod(add_meta_data, "AddBuildMetricsMetaData")
+
+    metrics = get_build_metric_dict()
+    p = psutil.Process(os.getpid())
+
+    metrics['start_time'] = int(p.create_time() * _SEC_TO_NANOSEC_FACTOR)
+    metrics['scons_command'] = " ".join([sys.executable] + sys.argv)
+
+
+def exists(env):
+    return True
diff --git a/site_scons/site_tools/build_metrics/__init__.py b/site_scons/site_tools/build_metrics/__init__.py
new file mode 100644
index 00000000000..0f3bd6c078d
--- /dev/null
+++ b/site_scons/site_tools/build_metrics/__init__.py
@@ -0,0 +1,96 @@
+# Copyright 2020 MongoDB Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+"""Configure the build to track build performance."""
+
+import atexit
+import json
+import os
+import sys
+import time
+
+from jsonschema import validate
+import psutil
+
+_SEC_TO_NANOSEC_FACTOR = 1000000000.0
+
+_BUILD_METRIC_DATA = {}
+
+
+def get_build_metric_dict():
+    global _BUILD_METRIC_DATA
+    return _BUILD_METRIC_DATA
+
+
+# This section is an excerpt of the original
+# https://stackoverflow.com/a/63029332/1644736
+class CaptureAtexits:
+    def __init__(self):
+        self.captured = []
+
+    def __eq__(self, other):
+        self.captured.append(other)
+        return False
+
+
+def finalize_build_metrics(env):
+    metrics = get_build_metric_dict()
+    metrics['end_time'] = time.time_ns()
+
+    with open(os.path.join(os.path.dirname(__file__), "build_metrics_format.schema")) as f:
+        validate(metrics, json.load(f))
+
+    build_metrics_file = env.GetOption('build-metrics')
+    if build_metrics_file == '-':
+        json.dump(metrics, sys.stdout, indent=4, sort_keys=True)
+    else:
+        with open(build_metrics_file, 'w') as f:
+            json.dump(metrics, f, indent=4, sort_keys=True)
+
+
+def add_meta_data(env, key, value):
+    get_build_metric_dict()[key] = value
+
+
+def generate(env, **kwargs):
+
+    # This will force our at exit to the of the stack ensuring
+    # that it is the last thing called when exiting.
+    c = CaptureAtexits()
+    atexit.unregister(c)
+    for func in c.captured:
+        atexit.unregister(func)
+    atexit.register(finalize_build_metrics, env)
+    for func in c.captured:
+        atexit.register(func)
+
+    env.AddMethod(get_build_metric_dict, "GetBuildMetricDictionary")
+    env.AddMethod(add_meta_data, "AddBuildMetricsMetaData")
+
+    metrics = get_build_metric_dict()
+    p = psutil.Process(os.getpid())
+
+    metrics['start_time'] = int(p.create_time() * _SEC_TO_NANOSEC_FACTOR)
+    metrics['scons_command'] = " ".join([sys.executable] + sys.argv)
+
+
+def exists(env):
+    return True
diff --git a/site_scons/site_tools/build_metrics/build_metrics_format.schema b/site_scons/site_tools/build_metrics/build_metrics_format.schema
new file mode 100644
index 00000000000..5b05b1491c9
--- /dev/null
+++ b/site_scons/site_tools/build_metrics/build_metrics_format.schema
@@ -0,0 +1,10 @@
+{
+    "type" : "object",
+    "properties" : {
+        "start_time" : {"type" : "integer"},
+        "end_time" : {"type" : "integer"},
+        "evg_id" : {"type" : "string"},
+        "variant" : {"type" : "string"},
+        "scons_command" : {"type" : "string"}
+    }
+}
+\ No newline at end of file
diff --git a/src/mongo/base/data_range.h b/src/mongo/base/data_range.h
index a366513ce3d..316a97fa3e3 100644
--- a/src/mongo/base/data_range.h
+++ b/src/mongo/base/data_range.h
@@ -189,6 +189,10 @@ public:
         return !(lhs == rhs);
     }
 
+    std::ptrdiff_t debug_offset() const {
+        return _debug_offset;
+    }
+
 protected:
     // Shared implementation of split() logic between DataRange and ConstDataRange.
     template <typename RangeT,
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml
index 523c52065cf..1a9b676eae9 100644
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -490,6 +490,8 @@ error_codes:
 
     - {code: 374, name: TransactionAPIMustRetryTransaction, categories: [InternalOnly]}
     - {code: 375, name: TransactionAPIMustRetryCommit, categories: [InternalOnly]}
+    - {code: 376, name: ChangeStreamNotEnabled}
+    - {code: 377, name: FLEMaxTagLimitExceeded }
 
     # Error codes 4000-8999 are reserved.
 
diff --git a/src/mongo/bson/bson_validate.cpp b/src/mongo/bson/bson_validate.cpp
index 34b81338e7f..8725dfbc645 100644
--- a/src/mongo/bson/bson_validate.cpp
+++ b/src/mongo/bson/bson_validate.cpp
@@ -103,7 +103,10 @@ public:
             Cursor cursor = {_data, _data + _maxLength};
             int32_t len = cursor.template read<int32_t>();
             uassert(InvalidBSON, "BSON data has to be at least 5 bytes", len >= 5);
-            uassert(InvalidBSON, "Incorrect BSON length", static_cast<size_t>(len) <= _maxLength);
+            uassert(InvalidBSON,
+                    str::stream() << "Incorrect BSON length " << static_cast<size_t>(len)
+                                  << " should be less or equal to " << _maxLength,
+                    static_cast<size_t>(len) <= _maxLength);
             const char* end = _currFrame->end = _data + len;
             uassert(InvalidBSON, "BSON object not terminated with EOO", end[-1] == 0);
             _validateIterative(Cursor{cursor.ptr, end});
diff --git a/src/mongo/client/SConscript b/src/mongo/client/SConscript
index 1e613f942a0..b04eca892a9 100644
--- a/src/mongo/client/SConscript
+++ b/src/mongo/client/SConscript
@@ -165,29 +165,20 @@ env.Library(
     ],
 )
 
-env.Library(
-    target='client_query',
-    source=[
-        'query.cpp',
-    ],
-    LIBDEPS=[
-        '$BUILD_DIR/mongo/base',
-        'read_preference',
-    ],
-)
-
 clientDriverEnv = env.Clone()
 clientDriverEnv.InjectThirdParty('asio')
 
 clientDriverEnv.Library(
     target='clientdriver_minimal',
     source=[
+        'client_api_version_parameters.idl',
+        'client_deprecated.cpp',
         'dbclient_base.cpp',
         'dbclient_cursor.cpp',
         'index_spec.cpp',
-        'client_api_version_parameters.idl',
     ],
     LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
         '$BUILD_DIR/mongo/db/dbmessage',
         '$BUILD_DIR/mongo/db/pipeline/aggregation_request_helper',
         '$BUILD_DIR/mongo/db/query/command_request_response',
@@ -196,8 +187,8 @@ clientDriverEnv.Library(
         '$BUILD_DIR/mongo/rpc/command_status',
         '$BUILD_DIR/mongo/rpc/rpc',
         'authentication',
-        'client_query',
         'connection_string',
+        'read_preference',
     ],
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/idl/idl_parser',
diff --git a/src/mongo/client/async_client.cpp b/src/mongo/client/async_client.cpp
index 3a568fa7e31..568b93b7e81 100644
--- a/src/mongo/client/async_client.cpp
+++ b/src/mongo/client/async_client.cpp
@@ -319,7 +319,7 @@ Future<executor::RemoteCommandResponse> AsyncDBClient::runCommandRequest(
     auto startTimer = Timer();
     auto opMsgRequest = OpMsgRequest::fromDBAndBody(
         std::move(request.dbname), std::move(request.cmdObj), std::move(request.metadata));
-    opMsgRequest.securityToken = request.securityToken;
+    opMsgRequest.validatedTenancyScope = request.validatedTenancyScope;
     return runCommand(std::move(opMsgRequest), baton, request.options.fireAndForget)
         .then([this, startTimer = std::move(startTimer)](rpc::UniqueReply response) {
             return executor::RemoteCommandResponse(*response, startTimer.elapsed());
@@ -358,7 +358,7 @@ Future<executor::RemoteCommandResponse> AsyncDBClient::beginExhaustCommandReques
     executor::RemoteCommandRequest request, const BatonHandle& baton) {
     auto opMsgRequest = OpMsgRequest::fromDBAndBody(
         std::move(request.dbname), std::move(request.cmdObj), std::move(request.metadata));
-    opMsgRequest.securityToken = request.securityToken;
+    opMsgRequest.validatedTenancyScope = request.validatedTenancyScope;
 
     return runExhaustCommand(std::move(opMsgRequest), baton);
 }
diff --git a/src/mongo/client/client_deprecated.cpp b/src/mongo/client/client_deprecated.cpp
new file mode 100644
index 00000000000..2df1d1be3ac
--- /dev/null
+++ b/src/mongo/client/client_deprecated.cpp
@@ -0,0 +1,216 @@
+/**
+ *    Copyright (C) 2018-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/client/client_deprecated.h"
+
+#include "mongo/base/status.h"
+#include "mongo/base/status_with.h"
+#include "mongo/bson/util/builder.h"
+#include "mongo/db/query/query_request_helper.h"
+
+namespace mongo {
+namespace client_deprecated {
+
+namespace {
+bool isComplexQueryObj(const BSONObj& obj, bool* hasDollar) {
+    if (obj.hasElement("query")) {
+        if (hasDollar)
+            *hasDollar = false;
+        return true;
+    }
+
+    if (obj.hasElement("$query")) {
+        if (hasDollar)
+            *hasDollar = true;
+        return true;
+    }
+
+    return false;
+}
+
+BSONObj filterFromOpQueryObj(const BSONObj& obj) {
+    bool hasDollar;
+    if (!isComplexQueryObj(obj, &hasDollar)) {
+        return obj;
+    }
+
+    return obj.getObjectField(hasDollar ? "$query" : "query");
+}
+
+void initFindFromOptions(int options, FindCommandRequest* findCommand) {
+    bool tailable = (options & QueryOption_CursorTailable) != 0;
+    bool awaitData = (options & QueryOption_AwaitData) != 0;
+    if (awaitData) {
+        findCommand->setAwaitData(true);
+    }
+    if (tailable) {
+        findCommand->setTailable(true);
+    }
+
+    if ((options & QueryOption_NoCursorTimeout) != 0) {
+        findCommand->setNoCursorTimeout(true);
+    }
+    if ((options & QueryOption_PartialResults) != 0) {
+        findCommand->setAllowPartialResults(true);
+    }
+}
+
+/**
+ * Fills out the 'findCommand' output parameter based on the contents of 'querySettings'. Here,
+ * 'querySettings' has the same format as the "query" field of the no-longer-supported OP_QUERY wire
+ * protocol message. It can look something like this for example:
+ *
+ *    {$query: ..., $hint: ..., $min: ..., $max: ...}
+ *
+ * Note that this does not set the filter itself on the 'FindCommandRequest' -- this function only
+ * deals with options that can be packed into the filter object.
+ *
+ * Although the OP_QUERY wire protocol message is no longer ever sent over the wire by the internal
+ * client, this supports old callers of that still specify the operation they want to perform using
+ * an OP_QUERY-inspired format.
+ */
+Status initFindFromOpQueryObj(const BSONObj& querySettings, FindCommandRequest* findCommand) {
+    for (auto&& e : querySettings) {
+        StringData name = e.fieldNameStringData();
+
+        if (name == "$orderby" || name == "orderby") {
+            if (Object == e.type()) {
+                findCommand->setSort(e.embeddedObject().getOwned());
+            } else if (Array == e.type()) {
+                findCommand->setSort(e.embeddedObject());
+
+                // TODO: Is this ever used?  I don't think so.
+                // Quote:
+                // This is for languages whose "objects" are not well ordered (JSON is well
+                // ordered).
+                // [ { a : ... } , { b : ... } ] -> { a : ..., b : ... }
+                // note: this is slow, but that is ok as order will have very few pieces
+                BSONObjBuilder b;
+                char p[2] = "0";
+
+                while (1) {
+                    BSONObj j = findCommand->getSort().getObjectField(p);
+                    if (j.isEmpty()) {
+                        break;
+                    }
+                    BSONElement e = j.firstElement();
+                    if (e.eoo()) {
+                        return Status(ErrorCodes::BadValue, "bad order array");
+                    }
+                    if (!e.isNumber()) {
+                        return Status(ErrorCodes::BadValue, "bad order array [2]");
+                    }
+                    b.append(e);
+                    (*p)++;
+                    if (!(*p <= '9')) {
+                        return Status(ErrorCodes::BadValue, "too many ordering elements");
+                    }
+                }
+
+                findCommand->setSort(b.obj());
+            } else {
+                return Status(ErrorCodes::BadValue, "sort must be object or array");
+            }
+        } else if (name == "term") {
+            findCommand->setTerm(e.safeNumberLong());
+        } else if (name == "readConcern") {
+            if (e.type() != BSONType::Object) {
+                return Status(ErrorCodes::BadValue, "readConcern must be an object");
+            }
+            findCommand->setReadConcern(e.embeddedObject().getOwned());
+        } else if (name.startsWith("$")) {
+            name = name.substr(1);  // chop first char
+            if (name == "min") {
+                if (!e.isABSONObj()) {
+                    return Status(ErrorCodes::BadValue, "$min must be a BSONObj");
+                }
+                findCommand->setMin(e.embeddedObject().getOwned());
+            } else if (name == "max") {
+                if (!e.isABSONObj()) {
+                    return Status(ErrorCodes::BadValue, "$max must be a BSONObj");
+                }
+                findCommand->setMax(e.embeddedObject().getOwned());
+            } else if (name == "hint") {
+                if (e.isABSONObj()) {
+                    findCommand->setHint(e.embeddedObject().getOwned());
+                } else if (String == e.type()) {
+                    findCommand->setHint(e.wrap());
+                } else {
+                    return Status(ErrorCodes::BadValue,
+                                  "$hint must be either a string or nested object");
+                }
+            } else if (name == "returnKey") {
+                // Won't throw.
+                if (e.trueValue()) {
+                    findCommand->setReturnKey(true);
+                }
+            } else if (name == "showDiskLoc") {
+                // Won't throw.
+                if (e.trueValue()) {
+                    findCommand->setShowRecordId(true);
+                    query_request_helper::addShowRecordIdMetaProj(findCommand);
+                }
+            } else if (name == "maxTimeMS") {
+                StatusWith<int> maxTimeMS = parseMaxTimeMS(e);
+                if (!maxTimeMS.isOK()) {
+                    return maxTimeMS.getStatus();
+                }
+                findCommand->setMaxTimeMS(maxTimeMS.getValue());
+            } else if (name == "readOnce") {
+                if (e.booleanSafe()) {
+                    findCommand->setReadOnce(true);
+                }
+            } else if (name == "_requestResumeToken") {
+                if (e.booleanSafe()) {
+                    findCommand->setRequestResumeToken(true);
+                }
+            } else if (name == "_resumeAfter") {
+                findCommand->setResumeAfter(e.embeddedObjectUserCheck().getOwned());
+            }
+        }
+    }
+
+    return Status::OK();
+}
+
+}  // namespace
+
+void initFindFromLegacyOptions(BSONObj bsonOptions, int options, FindCommandRequest* findCommand) {
+    invariant(findCommand);
+    BSONObj filter = filterFromOpQueryObj(bsonOptions);
+    if (!filter.isEmpty()) {
+        findCommand->setFilter(filter.getOwned());
+    }
+
+    uassertStatusOK(initFindFromOpQueryObj(bsonOptions, findCommand));
+    initFindFromOptions(options, findCommand);
+}
+
+}  // namespace client_deprecated
+}  // namespace mongo
diff --git a/src/mongo/client/client_deprecated.h b/src/mongo/client/client_deprecated.h
new file mode 100644
index 00000000000..fa4509c62f8
--- /dev/null
+++ b/src/mongo/client/client_deprecated.h
@@ -0,0 +1,55 @@
+/**
+ *    Copyright (C) 2018-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/bson/json.h"
+#include "mongo/client/read_preference.h"
+#include "mongo/db/query/find_command_gen.h"
+#include "mongo/rpc/message.h"
+
+namespace mongo {
+/**
+ * WARNING: Do not add new uses of anything in this namespace! This exists only to support code
+ * paths that still use an OP_QUERY-derived query representation. Additional callers should not be
+ * added because OP_QUERY is no longer supported by the shell or server.
+ */
+namespace client_deprecated {
+/**
+ * WARNING: This function exists only to support special code paths that use an OP_QUERY-style query
+ * representation (even though the OP_QUERY wire protocol message itself is no longer supported). Do
+ * not add new callers.
+ *
+ * Sets the relevant fields in 'findCommand' based on the 'bsonOptions' object and the 'options' bit
+ * vector. 'bsonOptions' is formatted like the query object of an OP_QUERY wire protocol message.
+ * Similarly, 'options' is a bit vector which is interpreted like the OP_QUERY flags field.
+ */
+void initFindFromLegacyOptions(BSONObj bsonOptions, int options, FindCommandRequest* findCommand);
+}  // namespace client_deprecated
+}  // namespace mongo
diff --git a/src/mongo/client/dbclient_base.cpp b/src/mongo/client/dbclient_base.cpp
index e0022ab5d03..2c2bcf36412 100644
--- a/src/mongo/client/dbclient_base.cpp
+++ b/src/mongo/client/dbclient_base.cpp
@@ -49,7 +49,7 @@
 #include "mongo/client/dbclient_cursor.h"
 #include "mongo/config.h"
 #include "mongo/db/api_parameters_gen.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/commands.h"
 #include "mongo/db/json.h"
 #include "mongo/db/namespace_string.h"
@@ -190,8 +190,8 @@ void appendMetadata(OperationContext* opCtx,
     request.body = bob.obj();
 
     if (opCtx) {
-        if (auto securityToken = auth::getSecurityToken(opCtx)) {
-            request.securityToken = securityToken->toBSON();
+        if (auto validatedTenancyScope = auth::ValidatedTenancyScope::get(opCtx)) {
+            request.validatedTenancyScope = validatedTenancyScope;
         }
     }
 }
@@ -584,33 +584,11 @@ bool DBClientBase::exists(const string& ns) {
 
 const uint64_t DBClientBase::INVALID_SOCK_CREATION_TIME = std::numeric_limits<uint64_t>::max();
 
-unique_ptr<DBClientCursor> DBClientBase::query_DEPRECATED(const NamespaceStringOrUUID& nsOrUuid,
-                                                          const BSONObj& filter,
-                                                          const Query& querySettings,
-                                                          int limit,
-                                                          int nToSkip,
-                                                          const BSONObj* fieldsToReturn,
-                                                          int queryOptions,
-                                                          int batchSize,
-                                                          boost::optional<BSONObj> readConcernObj) {
-    unique_ptr<DBClientCursor> c(new DBClientCursor(this,
-                                                    nsOrUuid,
-                                                    filter,
-                                                    querySettings,
-                                                    limit,
-                                                    nToSkip,
-                                                    fieldsToReturn,
-                                                    queryOptions,
-                                                    batchSize,
-                                                    readConcernObj));
-    if (c->init())
-        return c;
-    return nullptr;
-}
-
 std::unique_ptr<DBClientCursor> DBClientBase::find(FindCommandRequest findRequest,
-                                                   const ReadPreferenceSetting& readPref) {
-    auto cursor = std::make_unique<DBClientCursor>(this, std::move(findRequest), readPref);
+                                                   const ReadPreferenceSetting& readPref,
+                                                   ExhaustMode exhaustMode) {
+    auto cursor = std::make_unique<DBClientCursor>(
+        this, std::move(findRequest), readPref, exhaustMode == ExhaustMode::kOn);
     if (cursor->init()) {
         return cursor;
     }
@@ -619,8 +597,9 @@ std::unique_ptr<DBClientCursor> DBClientBase::find(FindCommandRequest findReques
 
 void DBClientBase::find(FindCommandRequest findRequest,
                         const ReadPreferenceSetting& readPref,
+                        ExhaustMode exhaustMode,
                         std::function<void(const BSONObj&)> callback) {
-    auto cursor = this->find(std::move(findRequest), readPref);
+    auto cursor = this->find(std::move(findRequest), readPref, exhaustMode);
     while (cursor->more()) {
         callback(cursor->nextSafe());
     }
@@ -632,7 +611,7 @@ BSONObj DBClientBase::findOne(FindCommandRequest findRequest,
             "caller cannot provide a limit when calling DBClientBase::findOne()",
             !findRequest.getLimit());
     findRequest.setLimit(1);
-    auto cursor = this->find(std::move(findRequest), readPref);
+    auto cursor = this->find(std::move(findRequest), readPref, ExhaustMode::kOff);
 
     uassert(5951201, "DBClientBase::findOne() could not produce cursor", cursor);
 
@@ -647,46 +626,12 @@ BSONObj DBClientBase::findOne(const NamespaceStringOrUUID& nssOrUuid, BSONObj fi
 
 unique_ptr<DBClientCursor> DBClientBase::getMore(const string& ns, long long cursorId) {
     unique_ptr<DBClientCursor> c(
-        new DBClientCursor(this, NamespaceString(ns), cursorId, 0 /* limit */, 0 /* options */));
+        new DBClientCursor(this, NamespaceString(ns), cursorId, false /*isExhaust*/));
     if (c->init())
         return c;
     return nullptr;
 }
 
-unsigned long long DBClientBase::query_DEPRECATED(
-    std::function<void(DBClientCursorBatchIterator&)> f,
-    const NamespaceStringOrUUID& nsOrUuid,
-    const BSONObj& filter,
-    const Query& querySettings,
-    const BSONObj* fieldsToReturn,
-    int queryOptions,
-    int batchSize,
-    boost::optional<BSONObj> readConcernObj) {
-    // mask options
-    queryOptions &= (int)(QueryOption_NoCursorTimeout | QueryOption_SecondaryOk);
-
-    unique_ptr<DBClientCursor> c(this->query_DEPRECATED(nsOrUuid,
-                                                        filter,
-                                                        querySettings,
-                                                        0,
-                                                        0,
-                                                        fieldsToReturn,
-                                                        queryOptions,
-                                                        batchSize,
-                                                        readConcernObj));
-    // query_DEPRECATED() throws on network error so OK to uassert with numeric code here.
-    uassert(16090, "socket error for mapping query", c.get());
-
-    unsigned long long n = 0;
-
-    while (c->more()) {
-        DBClientCursorBatchIterator i(*c);
-        f(i);
-        n += i.n();
-    }
-    return n;
-}
-
 namespace {
 OpMsgRequest createInsertRequest(const string& ns,
                                  const vector<BSONObj>& v,
diff --git a/src/mongo/client/dbclient_base.h b/src/mongo/client/dbclient_base.h
index 28dda9ada30..3c933f6d3d3 100644
--- a/src/mongo/client/dbclient_base.h
+++ b/src/mongo/client/dbclient_base.h
@@ -39,7 +39,6 @@
 #include "mongo/client/dbclient_cursor.h"
 #include "mongo/client/index_spec.h"
 #include "mongo/client/mongo_uri.h"
-#include "mongo/client/query.h"
 #include "mongo/client/read_preference.h"
 #include "mongo/config.h"
 #include "mongo/db/dbmessage.h"
@@ -73,6 +72,15 @@ std::string nsGetDB(const std::string& ns);
 std::string nsGetCollection(const std::string& ns);
 
 /**
+ * Allows callers of the internal client 'find()' API below to request an exhaust cursor.
+ *
+ * Such cursors use a special OP_MSG facility under the hood. When exhaust is requested, the server
+ * writes the full results of the query into the socket (split into getMore batches), without
+ * waiting for explicit getMore requests from the client.
+ */
+enum class ExhaustMode { kOn, kOff };
+
+/**
  * Abstract class that implements the core db operations.
  */
 class DBClientBase {
@@ -507,15 +515,21 @@ public:
      * Issues a find command described by 'findRequest', and returns the resulting cursor.
      */
     virtual std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
-                                                 const ReadPreferenceSetting& readPref);
+                                                 const ReadPreferenceSetting& readPref,
+                                                 ExhaustMode exhaustMode);
 
     /**
-     * Identical to the 'find()' overload above, but uses a default value of "primary" for the read
-     * preference.
+     * Convenience overloads. Identical to the 'find()' overload above, but default values of
+     * "primary" read preference and 'ExhaustMode::kOff' are used when not supplied by the caller.
      */
     std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest) {
         ReadPreferenceSetting defaultReadPref{};
-        return find(std::move(findRequest), defaultReadPref);
+        return find(std::move(findRequest), defaultReadPref, ExhaustMode::kOff);
+    }
+
+    std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
+                                         const ReadPreferenceSetting& readPref) {
+        return find(std::move(findRequest), readPref, ExhaustMode::kOff);
     }
 
     /**
@@ -523,8 +537,16 @@ public:
      * returning a cursor to the caller, iterates the cursor under the hood and calls the provided
      * 'callback' function against each of the documents produced by the cursor.
      */
+    void find(FindCommandRequest findRequest, std::function<void(const BSONObj&)> callback) {
+        find(std::move(findRequest),
+             ReadPreferenceSetting{},
+             ExhaustMode::kOff,
+             std::move(callback));
+    }
+
     void find(FindCommandRequest findRequest,
               const ReadPreferenceSetting& readPref,
+              ExhaustMode exhaustMode,
               std::function<void(const BSONObj&)> callback);
 
     /**
@@ -555,29 +577,6 @@ public:
     BSONObj findOne(const NamespaceStringOrUUID& nssOrUuid, BSONObj filter);
 
     /**
-     * Legacy find API. Do not add new callers! Use the 'find*()' methods above instead.
-     */
-    virtual std::unique_ptr<DBClientCursor> query_DEPRECATED(
-        const NamespaceStringOrUUID& nsOrUuid,
-        const BSONObj& filter,
-        const Query& querySettings = Query(),
-        int limit = 0,
-        int nToSkip = 0,
-        const BSONObj* fieldsToReturn = nullptr,
-        int queryOptions = 0,
-        int batchSize = 0,
-        boost::optional<BSONObj> readConcernObj = boost::none);
-    virtual unsigned long long query_DEPRECATED(
-        std::function<void(DBClientCursorBatchIterator&)> f,
-        const NamespaceStringOrUUID& nsOrUuid,
-        const BSONObj& filter,
-        const Query& querySettings = Query(),
-        const BSONObj* fieldsToReturn = nullptr,
-        int queryOptions = QueryOption_Exhaust,
-        int batchSize = 0,
-        boost::optional<BSONObj> readConcernObj = boost::none);
-
-    /**
      * Don't use this - called automatically by DBClientCursor for you.
      *   'cursorId': Id of cursor to retrieve.
      *   Returns an handle to a previously allocated cursor.
diff --git a/src/mongo/client/dbclient_connection.cpp b/src/mongo/client/dbclient_connection.cpp
index 1c84cb370bf..1b87829c1cd 100644
--- a/src/mongo/client/dbclient_connection.cpp
+++ b/src/mongo/client/dbclient_connection.cpp
@@ -625,67 +625,6 @@ uint64_t DBClientConnection::getSockCreationMicroSec() const {
     }
 }
 
-unsigned long long DBClientConnection::query_DEPRECATED(
-    std::function<void(DBClientCursorBatchIterator&)> f,
-    const NamespaceStringOrUUID& nsOrUuid,
-    const BSONObj& filter,
-    const Query& querySettings,
-    const BSONObj* fieldsToReturn,
-    int queryOptions,
-    int batchSize,
-    boost::optional<BSONObj> readConcernObj) {
-    if (!(queryOptions & QueryOption_Exhaust)) {
-        return DBClientBase::query_DEPRECATED(f,
-                                              nsOrUuid,
-                                              filter,
-                                              querySettings,
-                                              fieldsToReturn,
-                                              queryOptions,
-                                              batchSize,
-                                              readConcernObj);
-    }
-
-    // mask options
-    queryOptions &=
-        (int)(QueryOption_NoCursorTimeout | QueryOption_SecondaryOk | QueryOption_Exhaust);
-
-    unique_ptr<DBClientCursor> c(this->query_DEPRECATED(nsOrUuid,
-                                                        filter,
-                                                        querySettings,
-                                                        0,
-                                                        0,
-                                                        fieldsToReturn,
-                                                        queryOptions,
-                                                        batchSize,
-                                                        readConcernObj));
-    // Note that this->query will throw for network errors, so it is OK to return a numeric
-    // error code here.
-    uassert(13386, "socket error for mapping query", c.get());
-
-    unsigned long long n = 0;
-
-    try {
-        while (1) {
-            while (c->moreInCurrentBatch()) {
-                DBClientCursorBatchIterator i(*c);
-                f(i);
-                n += i.n();
-            }
-
-            if (!c->more())
-                break;
-        }
-    } catch (std::exception&) {
-        /* connection CANNOT be used anymore as more data may be on the way from the server.
-           we have to reconnect.
-           */
-        _markFailed(kEndSession);
-        throw;
-    }
-
-    return n;
-}
-
 DBClientConnection::DBClientConnection(bool _autoReconnect,
                                        double so_timeout,
                                        MongoURI uri,
diff --git a/src/mongo/client/dbclient_connection.h b/src/mongo/client/dbclient_connection.h
index 9187852758d..61096ba59b3 100644
--- a/src/mongo/client/dbclient_connection.h
+++ b/src/mongo/client/dbclient_connection.h
@@ -37,7 +37,6 @@
 #include "mongo/client/dbclient_base.h"
 #include "mongo/client/index_spec.h"
 #include "mongo/client/mongo_uri.h"
-#include "mongo/client/query.h"
 #include "mongo/client/read_preference.h"
 #include "mongo/config.h"
 #include "mongo/db/dbmessage.h"
@@ -63,7 +62,6 @@ struct RemoteCommandResponse;
 }
 
 class DBClientCursor;
-class DBClientCursorBatchIterator;
 
 /**
  *  A basic connection to the database.
@@ -143,38 +141,6 @@ public:
      */
     void logout(const std::string& dbname, BSONObj& info) override;
 
-    std::unique_ptr<DBClientCursor> query_DEPRECATED(
-        const NamespaceStringOrUUID& nsOrUuid,
-        const BSONObj& filter,
-        const Query& querySettings = Query(),
-        int limit = 0,
-        int nToSkip = 0,
-        const BSONObj* fieldsToReturn = nullptr,
-        int queryOptions = 0,
-        int batchSize = 0,
-        boost::optional<BSONObj> readConcernObj = boost::none) override {
-        checkConnection();
-        return DBClientBase::query_DEPRECATED(nsOrUuid,
-                                              filter,
-                                              querySettings,
-                                              limit,
-                                              nToSkip,
-                                              fieldsToReturn,
-                                              queryOptions,
-                                              batchSize,
-                                              readConcernObj);
-    }
-
-    unsigned long long query_DEPRECATED(
-        std::function<void(DBClientCursorBatchIterator&)>,
-        const NamespaceStringOrUUID& nsOrUuid,
-        const BSONObj& filter,
-        const Query& querySettings,
-        const BSONObj* fieldsToReturn,
-        int queryOptions,
-        int batchSize = 0,
-        boost::optional<BSONObj> readConcernObj = boost::none) override;
-
     using DBClientBase::runCommandWithTarget;
     std::pair<rpc::UniqueReply, DBClientBase*> runCommandWithTarget(OpMsgRequest request) override;
     std::pair<rpc::UniqueReply, std::shared_ptr<DBClientBase>> runCommandWithTarget(
diff --git a/src/mongo/client/dbclient_cursor.cpp b/src/mongo/client/dbclient_cursor.cpp
index 735c8a28d00..dc6a32acd07 100644
--- a/src/mongo/client/dbclient_cursor.cpp
+++ b/src/mongo/client/dbclient_cursor.cpp
@@ -27,13 +27,6 @@
  *    it in the license file.
  */
 
-/**
- * Connect to a Mongo database as a database, from C++.
- */
-
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/client/dbclient_cursor.h"
 
 #include <memory>
@@ -79,291 +72,32 @@ BSONObj addMetadata(DBClientBase* client, BSONObj command) {
     }
 }
 
-Message assembleCommandRequest(DBClientBase* cli,
+Message assembleCommandRequest(DBClientBase* client,
                                StringData database,
-                               int legacyQueryOptions,
-                               BSONObj legacyQuery) {
-    auto request = rpc::upconvertRequest(database, std::move(legacyQuery), legacyQueryOptions);
-    request.body = addMetadata(cli, std::move(request.body));
-    return request.serialize();
-}
-
-Message assembleFromFindCommandRequest(DBClientBase* client,
-                                       StringData database,
-                                       const FindCommandRequest& request,
-                                       const ReadPreferenceSetting& readPref) {
-    BSONObj findCmd = request.toBSON(BSONObj());
-
+                               BSONObj commandObj,
+                               const ReadPreferenceSetting& readPref) {
     // Add the $readPreference field to the request.
     {
-        BSONObjBuilder builder{findCmd};
+        BSONObjBuilder builder{commandObj};
         readPref.toContainingBSON(&builder);
-        findCmd = builder.obj();
+        commandObj = builder.obj();
     }
 
-    findCmd = addMetadata(client, std::move(findCmd));
-    auto opMsgRequest = OpMsgRequest::fromDBAndBody(database, findCmd);
+    commandObj = addMetadata(client, std::move(commandObj));
+    auto opMsgRequest = OpMsgRequest::fromDBAndBody(database, commandObj);
     return opMsgRequest.serialize();
 }
-
-/**
- * Initializes options based on the value of the 'options' bit vector.
- *
- * This contains flags such as tailable, exhaust, and noCursorTimeout.
- */
-void initFromInt(int options, FindCommandRequest* findCommand) {
-    bool tailable = (options & QueryOption_CursorTailable) != 0;
-    bool awaitData = (options & QueryOption_AwaitData) != 0;
-    if (awaitData) {
-        findCommand->setAwaitData(true);
-    }
-    if (tailable) {
-        findCommand->setTailable(true);
-    }
-
-    if ((options & QueryOption_NoCursorTimeout) != 0) {
-        findCommand->setNoCursorTimeout(true);
-    }
-    if ((options & QueryOption_PartialResults) != 0) {
-        findCommand->setAllowPartialResults(true);
-    }
-}
-
-/**
- * Fills out the 'findCommand' output parameter based on the contents of 'querySettings'. Here,
- * 'querySettings' has the same format as the "query" field of the no-longer-supported OP_QUERY wire
- * protocol message. It can look something like this for example:
- *
- *    {$query: ..., $hint: ..., $min: ..., $max: ...}
- *
- * Although the OP_QUERY wire protocol message is no longer ever sent over the wire by the internal
- * client, callers of the internal client may still specify the operation they want to perform using
- * an OP_QUERY-inspired format until DBClientCursor's legacy API is removed.
- */
-Status initFullQuery(const BSONObj& querySettings, FindCommandRequest* findCommand) {
-    for (auto&& e : querySettings) {
-        StringData name = e.fieldNameStringData();
-
-        if (name == "$orderby" || name == "orderby") {
-            if (Object == e.type()) {
-                findCommand->setSort(e.embeddedObject().getOwned());
-            } else if (Array == e.type()) {
-                findCommand->setSort(e.embeddedObject());
-
-                // TODO: Is this ever used?  I don't think so.
-                // Quote:
-                // This is for languages whose "objects" are not well ordered (JSON is well
-                // ordered).
-                // [ { a : ... } , { b : ... } ] -> { a : ..., b : ... }
-                // note: this is slow, but that is ok as order will have very few pieces
-                BSONObjBuilder b;
-                char p[2] = "0";
-
-                while (1) {
-                    BSONObj j = findCommand->getSort().getObjectField(p);
-                    if (j.isEmpty()) {
-                        break;
-                    }
-                    BSONElement e = j.firstElement();
-                    if (e.eoo()) {
-                        return Status(ErrorCodes::BadValue, "bad order array");
-                    }
-                    if (!e.isNumber()) {
-                        return Status(ErrorCodes::BadValue, "bad order array [2]");
-                    }
-                    b.append(e);
-                    (*p)++;
-                    if (!(*p <= '9')) {
-                        return Status(ErrorCodes::BadValue, "too many ordering elements");
-                    }
-                }
-
-                findCommand->setSort(b.obj());
-            } else {
-                return Status(ErrorCodes::BadValue, "sort must be object or array");
-            }
-        } else if (name.startsWith("$")) {
-            name = name.substr(1);  // chop first char
-            if (name == "min") {
-                if (!e.isABSONObj()) {
-                    return Status(ErrorCodes::BadValue, "$min must be a BSONObj");
-                }
-                findCommand->setMin(e.embeddedObject().getOwned());
-            } else if (name == "max") {
-                if (!e.isABSONObj()) {
-                    return Status(ErrorCodes::BadValue, "$max must be a BSONObj");
-                }
-                findCommand->setMax(e.embeddedObject().getOwned());
-            } else if (name == "hint") {
-                if (e.isABSONObj()) {
-                    findCommand->setHint(e.embeddedObject().getOwned());
-                } else if (String == e.type()) {
-                    findCommand->setHint(e.wrap());
-                } else {
-                    return Status(ErrorCodes::BadValue,
-                                  "$hint must be either a string or nested object");
-                }
-            } else if (name == "returnKey") {
-                // Won't throw.
-                if (e.trueValue()) {
-                    findCommand->setReturnKey(true);
-                }
-            } else if (name == "showDiskLoc") {
-                // Won't throw.
-                if (e.trueValue()) {
-                    findCommand->setShowRecordId(true);
-                    query_request_helper::addShowRecordIdMetaProj(findCommand);
-                }
-            } else if (name == "maxTimeMS") {
-                StatusWith<int> maxTimeMS = parseMaxTimeMS(e);
-                if (!maxTimeMS.isOK()) {
-                    return maxTimeMS.getStatus();
-                }
-                findCommand->setMaxTimeMS(maxTimeMS.getValue());
-            }
-        }
-    }
-
-    return Status::OK();
-}
-
-
-Status initFindCommandRequest(int ntoskip,
-                              int queryOptions,
-                              const BSONObj& filter,
-                              const Query& querySettings,
-                              const BSONObj& proj,
-                              FindCommandRequest* findCommand) {
-    if (!proj.isEmpty()) {
-        findCommand->setProjection(proj.getOwned());
-    }
-    if (ntoskip) {
-        findCommand->setSkip(ntoskip);
-    }
-
-    // Initialize flags passed as 'queryOptions' bit vector.
-    initFromInt(queryOptions, findCommand);
-
-    findCommand->setFilter(filter.getOwned());
-    Status status = initFullQuery(querySettings.getFullSettingsDeprecated(), findCommand);
-    if (!status.isOK()) {
-        return status;
-    }
-
-    // It's not possible to specify readConcern in a legacy query message, so initialize it to
-    // an empty readConcern object, ie. equivalent to `readConcern: {}`.  This ensures that
-    // mongos passes this empty readConcern to shards.
-    findCommand->setReadConcern(BSONObj());
-
-    return query_request_helper::validateFindCommandRequest(*findCommand);
-}
-
-StatusWith<std::unique_ptr<FindCommandRequest>> fromLegacyQuery(NamespaceStringOrUUID nssOrUuid,
-                                                                const BSONObj& filter,
-                                                                const Query& querySettings,
-                                                                const BSONObj& proj,
-                                                                int ntoskip,
-                                                                int queryOptions) {
-    auto findCommand = std::make_unique<FindCommandRequest>(std::move(nssOrUuid));
-
-    Status status = initFindCommandRequest(
-        ntoskip, queryOptions, filter, querySettings, proj, findCommand.get());
-    if (!status.isOK()) {
-        return status;
-    }
-
-    return std::move(findCommand);
-}
-
-int queryOptionsFromFindCommand(const FindCommandRequest& findCmd,
-                                const ReadPreferenceSetting& readPref) {
-    int queryOptions = 0;
-    if (readPref.canRunOnSecondary()) {
-        queryOptions = queryOptions & QueryOption_SecondaryOk;
-    }
-    if (findCmd.getTailable()) {
-        queryOptions = queryOptions & QueryOption_CursorTailable;
-    }
-    if (findCmd.getNoCursorTimeout()) {
-        queryOptions = queryOptions & QueryOption_NoCursorTimeout;
-    }
-    if (findCmd.getAwaitData()) {
-        queryOptions = queryOptions & QueryOption_AwaitData;
-    }
-    if (findCmd.getAllowPartialResults()) {
-        queryOptions = queryOptions & QueryOption_PartialResults;
-    }
-    return queryOptions;
-}
-
 }  // namespace
 
-Message DBClientCursor::initFromLegacyRequest() {
-    auto findCommand = fromLegacyQuery(_nsOrUuid,
-                                       _filter,
-                                       _querySettings,
-                                       _fieldsToReturn ? *_fieldsToReturn : BSONObj(),
-                                       _nToSkip,
-                                       _opts);
-    // If there was a problem building the query request, report that.
-    uassertStatusOK(findCommand.getStatus());
-
-    if (_limit) {
-        findCommand.getValue()->setLimit(_limit);
-    }
-    if (_batchSize) {
-        findCommand.getValue()->setBatchSize(_batchSize);
-    }
-
-    const BSONObj querySettings = _querySettings.getFullSettingsDeprecated();
-    if (querySettings.getBoolField("$readOnce")) {
-        // Legacy queries don't handle readOnce.
-        findCommand.getValue()->setReadOnce(true);
-    }
-    if (querySettings.getBoolField(FindCommandRequest::kRequestResumeTokenFieldName)) {
-        // Legacy queries don't handle requestResumeToken.
-        findCommand.getValue()->setRequestResumeToken(true);
-    }
-    if (querySettings.hasField(FindCommandRequest::kResumeAfterFieldName)) {
-        // Legacy queries don't handle resumeAfter.
-        findCommand.getValue()->setResumeAfter(
-            querySettings.getObjectField(FindCommandRequest::kResumeAfterFieldName));
-    }
-    if (auto replTerm = querySettings[FindCommandRequest::kTermFieldName]) {
-        // Legacy queries don't handle term.
-        findCommand.getValue()->setTerm(replTerm.numberLong());
-    }
-    // Legacy queries don't handle readConcern.
-    // We prioritize the readConcern parsed from the query object over '_readConcernObj'.
-    if (auto readConcern = querySettings[repl::ReadConcernArgs::kReadConcernFieldName]) {
-        findCommand.getValue()->setReadConcern(readConcern.Obj());
-    } else if (_readConcernObj) {
-        findCommand.getValue()->setReadConcern(_readConcernObj);
-    }
-    BSONObj cmd = findCommand.getValue()->toBSON(BSONObj());
-    if (auto readPref = querySettings["$readPreference"]) {
-        // FindCommandRequest doesn't handle $readPreference.
-        cmd = BSONObjBuilder(std::move(cmd)).append(readPref).obj();
-    }
-
-    return assembleCommandRequest(_client, _ns.db(), _opts, std::move(cmd));
-}
-
 Message DBClientCursor::assembleInit() {
     if (_cursorId) {
         return assembleGetMore();
     }
 
     // We haven't gotten a cursorId yet so we need to issue the initial find command.
-    if (_findRequest) {
-        // The caller described their find command using the modern 'FindCommandRequest' API.
-        return assembleFromFindCommandRequest(_client, _ns.db(), *_findRequest, _readPref);
-    } else {
-        // The caller used a legacy API to describe the find operation, which may include $-prefixed
-        // directives in the format previously expected for an OP_QUERY. We need to upconvert this
-        // OP_QUERY-inspired format to a find command.
-        return initFromLegacyRequest();
-    }
+    invariant(_findRequest);
+    BSONObj findCmd = _findRequest->toBSON(BSONObj());
+    return assembleCommandRequest(_client, _ns.db(), std::move(findCmd), _readPref);
 }
 
 Message DBClientCursor::assembleGetMore() {
@@ -378,10 +112,10 @@ Message DBClientCursor::assembleGetMore() {
         getMoreRequest.setTerm(static_cast<std::int64_t>(*_term));
     }
     getMoreRequest.setLastKnownCommittedOpTime(_lastKnownCommittedOpTime);
-    auto msg = assembleCommandRequest(_client, _ns.db(), _opts, getMoreRequest.toBSON({}));
+    auto msg = assembleCommandRequest(_client, _ns.db(), getMoreRequest.toBSON({}), _readPref);
 
     // Set the exhaust flag if needed.
-    if (_opts & QueryOption_Exhaust && msg.operation() == dbMsg) {
+    if (_isExhaust) {
         OpMsg::setFlag(&msg, OpMsg::kExhaustSupported);
     }
     return msg;
@@ -412,8 +146,7 @@ bool DBClientCursor::init() {
 void DBClientCursor::requestMore() {
     // For exhaust queries, once the stream has been initiated we get data blasted to us
     // from the remote server, without a need to send any more 'getMore' requests.
-    const auto isExhaust = _opts & QueryOption_Exhaust;
-    if (isExhaust && _connectionHasPendingReplies) {
+    if (_isExhaust && _connectionHasPendingReplies) {
         return exhaustReceiveMore();
     }
 
@@ -438,7 +171,7 @@ void DBClientCursor::requestMore() {
 }
 
 /**
- * With QueryOption_Exhaust, the server just blasts data at us. The end of a stream is marked with a
+ * For exhaust cursors, the server just blasts data at us. The end of a stream is marked with a
  * cursor id of 0.
  */
 void DBClientCursor::exhaustReceiveMore() {
@@ -456,9 +189,9 @@ BSONObj DBClientCursor::commandDataReceived(const Message& reply) {
     invariant(op == opReply || op == dbMsg);
 
     // Check if the reply indicates that it is part of an exhaust stream.
-    const auto isExhaust = OpMsg::isFlagSet(reply, OpMsg::kMoreToCome);
-    _connectionHasPendingReplies = isExhaust;
-    if (isExhaust) {
+    const auto isExhaustReply = OpMsg::isFlagSet(reply, OpMsg::kMoreToCome);
+    _connectionHasPendingReplies = isExhaustReply;
+    if (isExhaustReply) {
         _lastRequestId = reply.header().getId();
     }
 
@@ -592,96 +325,33 @@ void DBClientCursor::attach(AScopedConnection* conn) {
 
 DBClientCursor::DBClientCursor(DBClientBase* client,
                                const NamespaceStringOrUUID& nsOrUuid,
-                               const BSONObj& filter,
-                               const Query& querySettings,
-                               int limit,
-                               int nToSkip,
-                               const BSONObj* fieldsToReturn,
-                               int queryOptions,
-                               int batchSize,
-                               boost::optional<BSONObj> readConcernObj)
-    : DBClientCursor(client,
-                     nsOrUuid,
-                     filter,
-                     querySettings,
-                     0,  // cursorId
-                     limit,
-                     nToSkip,
-                     fieldsToReturn,
-                     queryOptions,
-                     batchSize,
-                     {},
-                     readConcernObj,
-                     boost::none) {}
-
-DBClientCursor::DBClientCursor(DBClientBase* client,
-                               const NamespaceStringOrUUID& nsOrUuid,
                                long long cursorId,
-                               int limit,
-                               int queryOptions,
+                               bool isExhaust,
                                std::vector<BSONObj> initialBatch,
                                boost::optional<Timestamp> operationTime,
                                boost::optional<BSONObj> postBatchResumeToken)
-    : DBClientCursor(client,
-                     nsOrUuid,
-                     BSONObj(),  // filter
-                     Query(),    // querySettings
-                     cursorId,
-                     limit,
-                     0,        // nToSkip
-                     nullptr,  // fieldsToReturn
-                     queryOptions,
-                     0,
-                     std::move(initialBatch),  // batchSize
-                     boost::none,
-                     operationTime,
-                     postBatchResumeToken) {}
-
-DBClientCursor::DBClientCursor(DBClientBase* client,
-                               const NamespaceStringOrUUID& nsOrUuid,
-                               const BSONObj& filter,
-                               const Query& querySettings,
-                               long long cursorId,
-                               int limit,
-                               int nToSkip,
-                               const BSONObj* fieldsToReturn,
-                               int queryOptions,
-                               int batchSize,
-                               std::vector<BSONObj> initialBatch,
-                               boost::optional<BSONObj> readConcernObj,
-                               boost::optional<Timestamp> operationTime,
-                               boost::optional<BSONObj> postBatchResumeToken)
     : _batch{std::move(initialBatch)},
       _client(client),
       _originalHost(_client->getServerAddress()),
       _nsOrUuid(nsOrUuid),
       _ns(nsOrUuid.nss() ? *nsOrUuid.nss() : NamespaceString(nsOrUuid.dbname())),
       _cursorId(cursorId),
-      _batchSize(batchSize == 1 ? 2 : batchSize),
-      _limit(limit),
-      _filter(filter),
-      _querySettings(querySettings),
-      _nToSkip(nToSkip),
-      _fieldsToReturn(fieldsToReturn),
-      _readConcernObj(readConcernObj),
-      _opts(queryOptions),
+      _isExhaust(isExhaust),
       _operationTime(operationTime),
-      _postBatchResumeToken(postBatchResumeToken) {
-    tassert(5746103, "DBClientCursor limit must be non-negative", _limit >= 0);
-}
+      _postBatchResumeToken(postBatchResumeToken) {}
 
 DBClientCursor::DBClientCursor(DBClientBase* client,
                                FindCommandRequest findRequest,
-                               const ReadPreferenceSetting& readPref)
+                               const ReadPreferenceSetting& readPref,
+                               bool isExhaust)
     : _client(client),
       _originalHost(_client->getServerAddress()),
       _nsOrUuid(findRequest.getNamespaceOrUUID()),
       _ns(_nsOrUuid.nss() ? *_nsOrUuid.nss() : NamespaceString(_nsOrUuid.dbname())),
       _batchSize(findRequest.getBatchSize().value_or(0)),
-      _limit(findRequest.getLimit().value_or(0)),
       _findRequest(std::move(findRequest)),
       _readPref(readPref),
-      _opts(queryOptionsFromFindCommand(*_findRequest, _readPref)) {
+      _isExhaust(isExhaust) {
     // Internal clients should always pass an explicit readConcern. If the caller did not already
     // pass a readConcern than we must explicitly initialize an empty readConcern so that it ends up
     // in the serialized version of the find command which will be sent across the wire.
@@ -725,8 +395,7 @@ StatusWith<std::unique_ptr<DBClientCursor>> DBClientCursor::fromAggregationReque
     return {std::make_unique<DBClientCursor>(client,
                                              aggRequest.getNamespace(),
                                              cursorId,
-                                             0,
-                                             useExhaust ? QueryOption_Exhaust : 0,
+                                             useExhaust,
                                              firstBatch,
                                              operationTime,
                                              postBatchResumeToken)};
@@ -754,5 +423,4 @@ void DBClientCursor::kill() {
     _cursorId = 0;
 }
 
-
 }  // namespace mongo
diff --git a/src/mongo/client/dbclient_cursor.h b/src/mongo/client/dbclient_cursor.h
index 520b1c1236c..f13f861d96c 100644
--- a/src/mongo/client/dbclient_cursor.h
+++ b/src/mongo/client/dbclient_cursor.h
@@ -31,10 +31,8 @@
 
 #include <stack>
 
-#include "mongo/client/query.h"
+#include "mongo/client/read_preference.h"
 #include "mongo/db/dbmessage.h"
-#include "mongo/db/jsobj.h"
-#include "mongo/db/json.h"
 #include "mongo/db/namespace_string.h"
 #include "mongo/db/query/find_command_gen.h"
 #include "mongo/rpc/message.h"
@@ -61,30 +59,26 @@ public:
         bool secondaryOk,
         bool useExhaust);
 
+    /**
+     * Constructs a 'DBClientCursor' that will be opened by issuing the find command described by
+     * 'findRequest'.
+     */
     DBClientCursor(DBClientBase* client,
-                   const NamespaceStringOrUUID& nsOrUuid,
-                   const BSONObj& filter,
-                   const Query& querySettings,
-                   int limit,
-                   int nToSkip,
-                   const BSONObj* fieldsToReturn,
-                   int queryOptions,
-                   int bs,
-                   boost::optional<BSONObj> readConcernObj = boost::none);
+                   FindCommandRequest findRequest,
+                   const ReadPreferenceSetting& readPref,
+                   bool isExhaust);
 
+    /**
+     * Constructs a 'DBClientCursor' from a pre-existing cursor id.
+     */
     DBClientCursor(DBClientBase* client,
                    const NamespaceStringOrUUID& nsOrUuid,
                    long long cursorId,
-                   int limit,
-                   int options,
+                   bool isExhaust,
                    std::vector<BSONObj> initialBatch = {},
                    boost::optional<Timestamp> operationTime = boost::none,
                    boost::optional<BSONObj> postBatchResumeToken = boost::none);
 
-    DBClientCursor(DBClientBase* client,
-                   FindCommandRequest findRequest,
-                   const ReadPreferenceSetting& readPref);
-
     virtual ~DBClientCursor();
 
     /**
@@ -169,11 +163,11 @@ public:
     }
 
     bool tailable() const {
-        return (_opts & QueryOption_CursorTailable) != 0;
+        return _findRequest && _findRequest->getTailable();
     }
 
     bool tailableAwaitData() const {
-        return tailable() && (_opts & QueryOption_AwaitData);
+        return tailable() && _findRequest->getAwaitData();
     }
 
     /**
@@ -276,21 +270,6 @@ protected:
     Batch _batch;
 
 private:
-    DBClientCursor(DBClientBase* client,
-                   const NamespaceStringOrUUID& nsOrUuid,
-                   const BSONObj& filter,
-                   const Query& querySettings,
-                   long long cursorId,
-                   int limit,
-                   int nToSkip,
-                   const BSONObj* fieldsToReturn,
-                   int queryOptions,
-                   int bs,
-                   std::vector<BSONObj> initialBatch,
-                   boost::optional<BSONObj> readConcernObj,
-                   boost::optional<Timestamp> operationTime,
-                   boost::optional<BSONObj> postBatchResumeToken = boost::none);
-
     void dataReceived(const Message& reply) {
         bool retry;
         std::string lazyHost;
@@ -311,13 +290,6 @@ private:
     Message assembleInit();
     Message assembleGetMore();
 
-    /**
-     * Constructs the initial find commmand request based on a legacy OP_QUERY-style description of
-     * the find operation. Only used if the caller constructed the 'DBClientCursor' with the legacy
-     * API.
-     */
-    Message initFromLegacyRequest();
-
     DBClientBase* _client;
     std::string _originalHost;
     NamespaceStringOrUUID _nsOrUuid;
@@ -335,32 +307,16 @@ private:
     bool _connectionHasPendingReplies = false;
     int _lastRequestId = 0;
 
-    int _batchSize;
-    int _limit = 0;
+    int _batchSize = 0;
 
-    // If the caller describes the find command being executed by this cursor as a
-    // 'FindCommandRequest', then that request object and the associated read preference are set
-    // here. Otherwise, if the caller uses the legacy OP_QUERY-inspired API, these members are
-    // default-initialized but never used.
+    // A description of the find command provided by the caller which is used to open the cursor.
+    //
+    // Has a value of boost::none if the caller constructed this cursor using a pre-existing cursor
+    // id.
     boost::optional<FindCommandRequest> _findRequest;
-    ReadPreferenceSetting _readPref;
 
-    // These data members are only used if the cursor was constructed using the legacy
-    // OP_QUERY-inspired API. If the cursor was constructed using the 'FindCommandRequest'-based
-    // API, these are initialized to their default values but never used.
-    BSONObj _filter;
-    Query _querySettings;
-    int _nToSkip = 0;
-    const BSONObj* _fieldsToReturn = nullptr;
-    boost::optional<BSONObj> _readConcernObj;
-
-    // This has the same meaning as the flags bit vector from the no-longer-supported OP_QUERY wire
-    // protocol message. However, it is initialized even if the caller constructed the cursor using
-    // the 'FindCommandRequest`-based API.
-    //
-    // We should eventually stop using the OP_QUERY flags bit vector in server code, since OP_QUERY
-    // is no longer supported.
-    int _opts;
+    ReadPreferenceSetting _readPref;
+    bool _isExhaust;
 
     Milliseconds _awaitDataTimeout = Milliseconds{0};
     boost::optional<long long> _term;
@@ -369,38 +325,4 @@ private:
     boost::optional<BSONObj> _postBatchResumeToken;
 };
 
-/** iterate over objects in current batch only - will not cause a network call
- */
-class DBClientCursorBatchIterator {
-public:
-    DBClientCursorBatchIterator(DBClientCursor& c) : _c(c), _n() {}
-    bool moreInCurrentBatch() {
-        return _c.moreInCurrentBatch();
-    }
-    BSONObj nextSafe() {
-        massert(13383, "BatchIterator empty", moreInCurrentBatch());
-        ++_n;
-        return _c.nextSafe();
-    }
-    int n() const {
-        return _n;
-    }
-    // getNamespaceString() will return the NamespaceString returned by the 'find' command.
-    const NamespaceString& getNamespaceString() {
-        return _c.getNamespaceString();
-    }
-
-    long long getCursorId() const {
-        return _c.getCursorId();
-    }
-
-    boost::optional<BSONObj> getPostBatchResumeToken() const {
-        return _c.getPostBatchResumeToken();
-    }
-
-private:
-    DBClientCursor& _c;
-    int _n;
-};
-
 }  // namespace mongo
diff --git a/src/mongo/client/dbclient_cursor_test.cpp b/src/mongo/client/dbclient_cursor_test.cpp
index 10a23615ab0..21e9cfcf082 100644
--- a/src/mongo/client/dbclient_cursor_test.cpp
+++ b/src/mongo/client/dbclient_cursor_test.cpp
@@ -150,8 +150,8 @@ TEST_F(DBClientCursorTest, DBClientCursorCallsMetaDataReaderOncePerBatch) {
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(
-        &conn, NamespaceStringOrUUID(nss), BSONObj{}, Query(), 0, 0, nullptr, 0, 0);
+    FindCommandRequest findCmd{nss};
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, false);
     cursor.setBatchSize(2);
 
     // Set up mock 'find' response.
@@ -197,15 +197,8 @@ TEST_F(DBClientCursorTest, DBClientCursorHandlesOpMsgExhaustCorrectly) {
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query(),
-                          0,
-                          0,
-                          nullptr,
-                          QueryOption_Exhaust,
-                          0);
+    FindCommandRequest findCmd{nss};
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, true /*isExhaust*/);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -268,15 +261,8 @@ TEST_F(DBClientCursorTest, DBClientCursorResendsGetMoreIfMoreToComeFlagIsOmitted
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query(),
-                          0,
-                          0,
-                          nullptr,
-                          QueryOption_Exhaust,
-                          0);
+    FindCommandRequest findCmd{nss};
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, true /*isExhaust*/);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -356,19 +342,11 @@ TEST_F(DBClientCursorTest, DBClientCursorResendsGetMoreIfMoreToComeFlagIsOmitted
 }
 
 TEST_F(DBClientCursorTest, DBClientCursorMoreThrowsExceptionOnNonOKResponse) {
-
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query(),
-                          0,
-                          0,
-                          nullptr,
-                          QueryOption_Exhaust,
-                          0);
+    FindCommandRequest findCmd{nss};
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, true /*isExhaust*/);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -395,19 +373,11 @@ TEST_F(DBClientCursorTest, DBClientCursorMoreThrowsExceptionOnNonOKResponse) {
 }
 
 TEST_F(DBClientCursorTest, DBClientCursorMoreThrowsExceptionWhenMoreToComeFlagSetWithZeroCursorId) {
-
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query(),
-                          0,
-                          0,
-                          nullptr,
-                          QueryOption_Exhaust,
-                          0);
+    FindCommandRequest findCmd{nss};
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, true /*isExhaust*/);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -436,19 +406,12 @@ TEST_F(DBClientCursorTest, DBClientCursorMoreThrowsExceptionWhenMoreToComeFlagSe
 }
 
 TEST_F(DBClientCursorTest, DBClientCursorPassesReadOnceFlag) {
-
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query().readOnce(true),
-                          0,
-                          0,
-                          nullptr,
-                          /*QueryOption*/ 0,
-                          0);
+    FindCommandRequest findCmd{nss};
+    findCmd.setReadOnce(true);
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, false);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -473,15 +436,10 @@ TEST_F(DBClientCursorTest, DBClientCursorPassesResumeFields) {
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query().requestResumeToken(true).resumeAfter(BSON("$recordId" << 5LL)),
-                          0,
-                          0,
-                          nullptr,
-                          /*QueryOption*/ 0,
-                          0);
+    FindCommandRequest findCmd{nss};
+    findCmd.setRequestResumeToken(true);
+    findCmd.setResumeAfter(BSON("$recordId" << 5LL));
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, false);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -514,15 +472,9 @@ TEST_F(DBClientCursorTest, DBClientCursorTailable) {
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query(),
-                          0,
-                          0,
-                          nullptr,
-                          QueryOption_CursorTailable,
-                          0);
+    FindCommandRequest findCmd{nss};
+    findCmd.setTailable(true);
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, false);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -614,15 +566,10 @@ TEST_F(DBClientCursorTest, DBClientCursorTailableAwaitData) {
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query(),
-                          0,
-                          0,
-                          nullptr,
-                          QueryOption_CursorTailable | QueryOption_AwaitData,
-                          0);
+    FindCommandRequest findCmd{nss};
+    findCmd.setTailable(true);
+    findCmd.setAwaitData(true);
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, false);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -681,15 +628,10 @@ TEST_F(DBClientCursorTest, DBClientCursorTailableAwaitDataExhaust) {
     // Set up the DBClientCursor and a mock client connection.
     DBClientConnectionForTest conn;
     const NamespaceString nss("test", "coll");
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          BSONObj{},
-                          Query(),
-                          0,
-                          0,
-                          nullptr,
-                          QueryOption_CursorTailable | QueryOption_AwaitData | QueryOption_Exhaust,
-                          0);
+    FindCommandRequest findCmd{nss};
+    findCmd.setTailable(true);
+    findCmd.setAwaitData(true);
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, true /*isExhaust*/);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
@@ -835,15 +777,15 @@ TEST_F(DBClientCursorTest, DBClientCursorOplogQuery) {
     const long long maxTimeMS = 5000LL;
     const long long term = 5;
 
-    DBClientCursor cursor(&conn,
-                          NamespaceStringOrUUID(nss),
-                          filterObj,
-                          Query().readConcern(readConcernObj).maxTimeMS(maxTimeMS).term(term),
-                          0,
-                          0,
-                          nullptr,
-                          QueryOption_CursorTailable | QueryOption_AwaitData,
-                          0);
+    FindCommandRequest findCmd{nss};
+    findCmd.setFilter(filterObj);
+    findCmd.setReadConcern(readConcernObj);
+    findCmd.setMaxTimeMS(maxTimeMS);
+    findCmd.setTerm(term);
+    findCmd.setTailable(true);
+    findCmd.setAwaitData(true);
+
+    DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, false);
     cursor.setBatchSize(0);
 
     // Set up mock 'find' response.
diff --git a/src/mongo/client/dbclient_mockcursor.cpp b/src/mongo/client/dbclient_mockcursor.cpp
index 7082f55517e..0e33d4360d1 100644
--- a/src/mongo/client/dbclient_mockcursor.cpp
+++ b/src/mongo/client/dbclient_mockcursor.cpp
@@ -42,7 +42,7 @@ DBClientMockCursor::DBClientMockCursor(mongo::DBClientBase* client,
                                        const BSONArray& mockCollection,
                                        const bool provideResumeToken,
                                        unsigned long batchSize)
-    : mongo::DBClientCursor(client, NamespaceString(), 0, 0, 0),
+    : mongo::DBClientCursor(client, NamespaceString(), 0 /*cursorId*/, false /*isExhaust*/),
       _collectionArray(mockCollection),
       _iter(_collectionArray),
       _provideResumeToken(provideResumeToken),
diff --git a/src/mongo/client/dbclient_mockcursor.h b/src/mongo/client/dbclient_mockcursor.h
index 1138ee41286..7430a1aa3cb 100644
--- a/src/mongo/client/dbclient_mockcursor.h
+++ b/src/mongo/client/dbclient_mockcursor.h
@@ -35,9 +35,6 @@
 
 namespace mongo {
 
-// DBClientMockCursor supports only a small subset of DBClientCursor operations.
-// It supports only iteration, including use of DBClientCursorBatchIterator.  If a batchsize
-// is given, iteration is broken up into multiple batches at batchSize boundaries.
 class DBClientMockCursor : public DBClientCursor {
 public:
     DBClientMockCursor(mongo::DBClientBase* client,
diff --git a/src/mongo/client/dbclient_rs.cpp b/src/mongo/client/dbclient_rs.cpp
index 4b130c36142..a48fe50a2fa 100644
--- a/src/mongo/client/dbclient_rs.cpp
+++ b/src/mongo/client/dbclient_rs.cpp
@@ -27,15 +27,13 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/client/dbclient_rs.h"
 
 #include <memory>
 #include <utility>
 
 #include "mongo/bson/util/builder.h"
+#include "mongo/client/client_deprecated.h"
 #include "mongo/client/connpool.h"
 #include "mongo/client/dbclient_cursor.h"
 #include "mongo/client/global_conn_pool.h"
@@ -87,42 +85,6 @@ public:
  */
 const size_t MAX_RETRY = 3;
 
-/**
- * Extracts the read preference settings from the query document. Note that this method
- * assumes that the query is ok for secondaries so it defaults to
- * ReadPreference::SecondaryPreferred when nothing is specified. Supports the following
- * format:
- *
- * Format A (official format):
- * { query: <actual query>, $readPreference: <read pref obj> }
- *
- * Format B (unofficial internal format from mongos):
- * { <actual query>, $queryOptions: { $readPreference: <read pref obj> }}
- *
- * @param query the raw query document
- *
- * @return the read preference setting if a read preference exists, otherwise the default read
- *         preference of Primary_Only. If the tags field was not present, it will contain one
- *         empty tag document {} which matches any tag.
- *
- * @throws AssertionException if the read preference object is malformed
- */
-std::unique_ptr<ReadPreferenceSetting> _extractReadPref(const Query& querySettings,
-                                                        int queryOptions) {
-    // Default read pref is primary only or secondary preferred with secondaryOK
-    const auto defaultReadPref = queryOptions & QueryOption_SecondaryOk
-        ? ReadPreference::SecondaryPreferred
-        : ReadPreference::PrimaryOnly;
-
-    BSONObj readPrefContainingObj = querySettings.getFullSettingsDeprecated();
-    if (auto elem = readPrefContainingObj["$queryOptions"]) {
-        // The readPreference is embedded in the $queryOptions field.
-        readPrefContainingObj = elem.Obj();
-    }
-    return std::make_unique<ReadPreferenceSetting>(uassertStatusOK(
-        ReadPreferenceSetting::fromContainingBSON(readPrefContainingObj, defaultReadPref)));
-}
-
 }  // namespace
 
 // --------------------------------
@@ -538,7 +500,8 @@ void DBClientReplicaSet::remove(const string& ns,
 }
 
 std::unique_ptr<DBClientCursor> DBClientReplicaSet::find(FindCommandRequest findRequest,
-                                                         const ReadPreferenceSetting& readPref) {
+                                                         const ReadPreferenceSetting& readPref,
+                                                         ExhaustMode exhaustMode) {
     invariant(findRequest.getNamespaceOrUUID().nss());
     const std::string nss = findRequest.getNamespaceOrUUID().nss()->ns();
     if (_isSecondaryQuery(nss, findRequest.toBSON(BSONObj{}), readPref)) {
@@ -562,7 +525,8 @@ std::unique_ptr<DBClientCursor> DBClientReplicaSet::find(FindCommandRequest find
                     break;
                 }
 
-                std::unique_ptr<DBClientCursor> cursor = conn->find(findRequest, readPref);
+                std::unique_ptr<DBClientCursor> cursor =
+                    conn->find(findRequest, readPref, exhaustMode);
 
                 return checkSecondaryQueryResult(std::move(cursor));
             } catch (const DBException& ex) {
@@ -587,90 +551,7 @@ std::unique_ptr<DBClientCursor> DBClientReplicaSet::find(FindCommandRequest find
                 "dbclient_rs query to primary node",
                 "replicaSet"_attr = _getMonitor()->getName());
 
-    return checkPrimary()->find(std::move(findRequest), readPref);
-}
-
-unique_ptr<DBClientCursor> DBClientReplicaSet::query_DEPRECATED(
-    const NamespaceStringOrUUID& nsOrUuid,
-    const BSONObj& filter,
-    const Query& querySettings,
-    int limit,
-    int nToSkip,
-    const BSONObj* fieldsToReturn,
-    int queryOptions,
-    int batchSize,
-    boost::optional<BSONObj> readConcernObj) {
-    shared_ptr<ReadPreferenceSetting> readPref(_extractReadPref(querySettings, queryOptions));
-    invariant(nsOrUuid.nss());
-    const string ns = nsOrUuid.nss()->ns();
-    if (_isSecondaryQuery(ns, filter, *readPref)) {
-        LOGV2_DEBUG(20133,
-                    3,
-                    "dbclient_rs query using secondary or tagged node selection in {replicaSet}, "
-                    "read pref is {readPref} "
-                    "(primary : {primary}, lastTagged : {lastTagged})",
-                    "dbclient_rs query using secondary or tagged node selection",
-                    "replicaSet"_attr = _getMonitor()->getName(),
-                    "readPref"_attr = readPref->toString(),
-                    "primary"_attr =
-                        (_primary.get() != nullptr ? _primary->getServerAddress() : "[not cached]"),
-                    "lastTagged"_attr = (_lastSecondaryOkConn.get() != nullptr
-                                             ? _lastSecondaryOkConn->getServerAddress()
-                                             : "[not cached]"));
-
-        string lastNodeErrMsg;
-
-        for (size_t retry = 0; retry < MAX_RETRY; retry++) {
-            try {
-                DBClientConnection* conn = selectNodeUsingTags(readPref);
-
-                if (conn == nullptr) {
-                    break;
-                }
-
-                unique_ptr<DBClientCursor> cursor = conn->query_DEPRECATED(nsOrUuid,
-                                                                           filter,
-                                                                           querySettings,
-                                                                           limit,
-                                                                           nToSkip,
-                                                                           fieldsToReturn,
-                                                                           queryOptions,
-                                                                           batchSize,
-                                                                           readConcernObj);
-
-                return checkSecondaryQueryResult(std::move(cursor));
-            } catch (const DBException& ex) {
-                const Status status = ex.toStatus(str::stream() << "can't query replica set node "
-                                                                << _lastSecondaryOkHost);
-                lastNodeErrMsg = status.reason();
-                _invalidateLastSecondaryOkCache(status);
-            }
-        }
-
-        StringBuilder assertMsg;
-        assertMsg << "Failed to do query, no good nodes in " << _getMonitor()->getName();
-        if (!lastNodeErrMsg.empty()) {
-            assertMsg << ", last error: " << lastNodeErrMsg;
-        }
-
-        uasserted(16370, assertMsg.str());
-    }
-
-    LOGV2_DEBUG(20134,
-                3,
-                "dbclient_rs query to primary node in {replicaSet}",
-                "dbclient_rs query to primary node",
-                "replicaSet"_attr = _getMonitor()->getName());
-
-    return checkPrimary()->query_DEPRECATED(nsOrUuid,
-                                            filter,
-                                            querySettings,
-                                            limit,
-                                            nToSkip,
-                                            fieldsToReturn,
-                                            queryOptions,
-                                            batchSize,
-                                            readConcernObj);
+    return checkPrimary()->find(std::move(findRequest), readPref, exhaustMode);
 }
 
 void DBClientReplicaSet::killCursor(const NamespaceString& ns, long long cursorID) {
@@ -817,70 +698,6 @@ void DBClientReplicaSet::say(Message& toSend, bool isRetry, string* actualServer
     if (!isRetry)
         _lastClient = nullptr;
 
-    const int lastOp = toSend.operation();
-
-    if (lastOp == dbQuery) {
-        // TODO: might be possible to do this faster by changing api
-        DbMessage dm(toSend);
-        QueryMessage qm(dm);
-
-        shared_ptr<ReadPreferenceSetting> readPref(
-            _extractReadPref(Query::fromBSONDeprecated(qm.query), qm.queryOptions));
-        if (_isSecondaryQuery(qm.ns, qm.query, *readPref)) {
-            LOGV2_DEBUG(20141,
-                        3,
-                        "dbclient_rs say using secondary or tagged node selection in {replicaSet}, "
-                        "read pref is {readPref} "
-                        "(primary : {primary}, lastTagged : {lastTagged})",
-                        "dbclient_rs say using secondary or tagged node selection",
-                        "replicaSet"_attr = _getMonitor()->getName(),
-                        "readPref"_attr = readPref->toString(),
-                        "primary"_attr = (_primary.get() != nullptr ? _primary->getServerAddress()
-                                                                    : "[not cached]"),
-                        "lastTagged"_attr = (_lastSecondaryOkConn.get() != nullptr
-                                                 ? _lastSecondaryOkConn->getServerAddress()
-                                                 : "[not cached]"));
-
-            string lastNodeErrMsg;
-
-            for (size_t retry = 0; retry < MAX_RETRY; retry++) {
-                try {
-                    DBClientConnection* conn = selectNodeUsingTags(readPref);
-
-                    if (conn == nullptr) {
-                        break;
-                    }
-
-                    if (actualServer != nullptr) {
-                        *actualServer = conn->getServerAddress();
-                    }
-
-                    conn->say(toSend);
-
-                    _lastClient = conn;
-                } catch (const DBException& ex) {
-                    const Status status =
-                        ex.toStatus(str::stream() << "can't callLazy replica set node "
-                                                  << _lastSecondaryOkHost.toString());
-                    lastNodeErrMsg = status.reason();
-                    _invalidateLastSecondaryOkCache(status);
-
-                    continue;
-                }
-
-                return;
-            }
-
-            StringBuilder assertMsg;
-            assertMsg << "Failed to call say, no good nodes in " << _getMonitor()->getName();
-            if (!lastNodeErrMsg.empty()) {
-                assertMsg << ", last error: " << lastNodeErrMsg;
-            }
-
-            uasserted(16380, assertMsg.str());
-        }
-    }
-
     LOGV2_DEBUG(20142,
                 3,
                 "dbclient_rs say to primary node in {replicaSet}",
@@ -982,60 +799,6 @@ bool DBClientReplicaSet::call(Message& toSend,
                               Message& response,
                               bool assertOk,
                               string* actualServer) {
-    const char* ns = nullptr;
-
-    if (toSend.operation() == dbQuery) {
-        // TODO: might be possible to do this faster by changing api
-        DbMessage dm(toSend);
-        QueryMessage qm(dm);
-        ns = qm.ns;
-
-        shared_ptr<ReadPreferenceSetting> readPref(
-            _extractReadPref(Query::fromBSONDeprecated(qm.query), qm.queryOptions));
-        if (_isSecondaryQuery(ns, qm.query, *readPref)) {
-            LOGV2_DEBUG(
-                20145,
-                3,
-                "dbclient_rs call using secondary or tagged node selection in {replicaSet}, "
-                "read pref is {readPref} "
-                "(primary : {primary}, lastTagged : {lastTagged})",
-                "dbclient_rs call using secondary or tagged node selection",
-                "replicaSet"_attr = _getMonitor()->getName(),
-                "readPref"_attr = readPref->toString(),
-                "primary"_attr =
-                    (_primary.get() != nullptr ? _primary->getServerAddress() : "[not cached]"),
-                "lastTagged"_attr = (_lastSecondaryOkConn.get() != nullptr
-                                         ? _lastSecondaryOkConn->getServerAddress()
-                                         : "[not cached]"));
-
-            for (size_t retry = 0; retry < MAX_RETRY; retry++) {
-                try {
-                    DBClientConnection* conn = selectNodeUsingTags(readPref);
-
-                    if (conn == nullptr) {
-                        return false;
-                    }
-
-                    if (actualServer != nullptr) {
-                        *actualServer = conn->getServerAddress();
-                    }
-
-                    return conn->call(toSend, response, assertOk, nullptr);
-                } catch (const DBException& ex) {
-                    if (actualServer)
-                        *actualServer = "";
-
-                    const Status status = ex.toStatus();
-                    _invalidateLastSecondaryOkCache(status.withContext(
-                        str::stream() << "can't call replica set node " << _lastSecondaryOkHost));
-                }
-            }
-
-            // Was not able to successfully send after max retries
-            return false;
-        }
-    }
-
     LOGV2_DEBUG(20146,
                 3,
                 "dbclient_rs call to primary node in {replicaSet}",
@@ -1049,20 +812,6 @@ bool DBClientReplicaSet::call(Message& toSend,
     if (!m->call(toSend, response, assertOk, nullptr))
         return false;
 
-    if (ns) {
-        QueryResult::View res = response.singleData().view2ptr();
-        if (res.getNReturned() == 1) {
-            BSONObj x(res.data());
-            if (str::contains(ns, "$cmd")) {
-                if (isNotPrimaryErrorString(x["errmsg"]))
-                    isNotPrimary();
-            } else {
-                if (isNotPrimaryErrorString(getErrField(x)))
-                    isNotPrimary();
-            }
-        }
-    }
-
     return true;
 }
 
diff --git a/src/mongo/client/dbclient_rs.h b/src/mongo/client/dbclient_rs.h
index d31ff67f5b6..fa796039f2c 100644
--- a/src/mongo/client/dbclient_rs.h
+++ b/src/mongo/client/dbclient_rs.h
@@ -57,7 +57,7 @@ typedef std::shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorPtr;
 */
 class DBClientReplicaSet : public DBClientBase {
 public:
-    using DBClientBase::query_DEPRECATED;
+    using DBClientBase::find;
 
     /** Call connect() after constructing. autoReconnect is always on for DBClientReplicaSet
      * connections. */
@@ -89,19 +89,8 @@ public:
     // ----------- simple functions --------------
 
     std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
-                                         const ReadPreferenceSetting& readPref) override;
-
-    /** throws userassertion "no primary found" */
-    std::unique_ptr<DBClientCursor> query_DEPRECATED(
-        const NamespaceStringOrUUID& nsOrUuid,
-        const BSONObj& filter,
-        const Query& querySettings,
-        int limit = 0,
-        int nToSkip = 0,
-        const BSONObj* fieldsToReturn = nullptr,
-        int queryOptions = 0,
-        int batchSize = 0,
-        boost::optional<BSONObj> readConcernObj = boost::none) override;
+                                         const ReadPreferenceSetting& readPref,
+                                         ExhaustMode exhaustMode) override;
 
     void insert(const std::string& ns,
                 BSONObj obj,
diff --git a/src/mongo/client/dbclient_rs_test.cpp b/src/mongo/client/dbclient_rs_test.cpp
index c04a2ddd7aa..7053d8fe623 100644
--- a/src/mongo/client/dbclient_rs_test.cpp
+++ b/src/mongo/client/dbclient_rs_test.cpp
@@ -152,57 +152,16 @@ void assertNodeSelected(MockReplicaSet* replSet, ReadPreference rp, StringData h
     assertOneOfNodesSelected(replSet, rp, std::vector<std::string>{host.toString()});
 }
 
-/**
- * Runs a find operation against 'replConn' using both the modern 'find()' API and the deprecated
- * API. In both cases, verifies the results by passing the resulting cursor to 'assertionFunc'.
- *
- * The operation is a simple find command against the given NamespaceString with no arguments other
- * than 'readPref'.
- */
-void assertWithBothQueryApis(DBClientReplicaSet& replConn,
-                             const NamespaceString& nss,
-                             ReadPreference readPref,
-                             std::function<void(std::unique_ptr<DBClientCursor>)> assertionFunc) {
-    std::unique_ptr<DBClientCursor> cursor =
-        replConn.find(FindCommandRequest{nss}, ReadPreferenceSetting{readPref});
-    assertionFunc(std::move(cursor));
-
-    Query readPrefHolder;
-    readPrefHolder.readPref(readPref, BSONArray{});
-    cursor = replConn.query_DEPRECATED(nss, BSONObj{}, readPrefHolder);
-    assertionFunc(std::move(cursor));
-}
-
-/**
- * Runs a find operation against 'replConn' using both the modern 'find()' API and the deprecated
- * API. In both cases, verifies that the find operation throws an exception.
- *
- * The operation is a simple find command against the given NamespaceString with no arguments other
- * than 'readPref'.
- */
-void assertBothQueryApisThrow(DBClientReplicaSet& replConn,
-                              const NamespaceString& nss,
-                              ReadPreference readPref) {
-    ASSERT_THROWS(replConn.find(FindCommandRequest{nss}, ReadPreferenceSetting{readPref}),
-                  AssertionException);
-
-    Query readPrefHolder;
-    readPrefHolder.readPref(readPref, BSONArray{});
-    ASSERT_THROWS(replConn.query_DEPRECATED(nss, BSONObj{}, readPrefHolder), AssertionException);
-}
-
 TEST_F(BasicRS, QueryPrimary) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::PrimaryOnly,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor =
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::PrimaryOnly});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
 }
 
 TEST_F(BasicRS, CommandPrimary) {
@@ -214,14 +173,11 @@ TEST_F(BasicRS, QuerySecondaryOnly) {
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::SecondaryOnly,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getSecondaries().front(),
-                                              doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor =
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::SecondaryOnly});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getSecondaries().front(), doc[HostField.name()].str());
 }
 
 TEST_F(BasicRS, CommandSecondaryOnly) {
@@ -234,13 +190,11 @@ TEST_F(BasicRS, QueryPrimaryPreferred) {
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::PrimaryPreferred,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor =
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::PrimaryPreferred});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
 }
 
 TEST_F(BasicRS, CommandPrimaryPreferred) {
@@ -252,14 +206,11 @@ TEST_F(BasicRS, QuerySecondaryPreferred) {
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::SecondaryPreferred,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getSecondaries().front(),
-                                              doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor = replConn.find(std::move(findCmd),
+                                ReadPreferenceSetting{ReadPreference::SecondaryPreferred});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getSecondaries().front(), doc[HostField.name()].str());
 }
 
 TEST_F(BasicRS, CommandSecondaryPreferred) {
@@ -319,7 +270,10 @@ TEST_F(AllNodesDown, QueryPrimary) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertBothQueryApisThrow(replConn, NamespaceString{IdentityNS}, ReadPreference::PrimaryOnly);
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    ASSERT_THROWS(
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::PrimaryOnly}),
+        AssertionException);
 }
 
 TEST_F(AllNodesDown, CommandPrimary) {
@@ -330,7 +284,10 @@ TEST_F(AllNodesDown, QuerySecondaryOnly) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertBothQueryApisThrow(replConn, NamespaceString{IdentityNS}, ReadPreference::SecondaryOnly);
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    ASSERT_THROWS(
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::SecondaryOnly}),
+        AssertionException);
 }
 
 TEST_F(AllNodesDown, CommandSecondaryOnly) {
@@ -341,8 +298,10 @@ TEST_F(AllNodesDown, QueryPrimaryPreferred) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertBothQueryApisThrow(
-        replConn, NamespaceString{IdentityNS}, ReadPreference::PrimaryPreferred);
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    ASSERT_THROWS(
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::PrimaryPreferred}),
+        AssertionException);
 }
 
 TEST_F(AllNodesDown, CommandPrimaryPreferred) {
@@ -353,8 +312,10 @@ TEST_F(AllNodesDown, QuerySecondaryPreferred) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertBothQueryApisThrow(
-        replConn, NamespaceString{IdentityNS}, ReadPreference::SecondaryPreferred);
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    ASSERT_THROWS(replConn.find(std::move(findCmd),
+                                ReadPreferenceSetting{ReadPreference::SecondaryPreferred}),
+                  AssertionException);
 }
 
 TEST_F(AllNodesDown, CommandSecondaryPreferred) {
@@ -365,7 +326,9 @@ TEST_F(AllNodesDown, QueryNearest) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertBothQueryApisThrow(replConn, NamespaceString{IdentityNS}, ReadPreference::Nearest);
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    ASSERT_THROWS(replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::Nearest}),
+                  AssertionException);
 }
 
 TEST_F(AllNodesDown, CommandNearest) {
@@ -409,7 +372,10 @@ TEST_F(PrimaryDown, QueryPrimary) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertBothQueryApisThrow(replConn, NamespaceString{IdentityNS}, ReadPreference::PrimaryOnly);
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    ASSERT_THROWS(
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::PrimaryOnly}),
+        AssertionException);
 }
 
 TEST_F(PrimaryDown, CommandPrimary) {
@@ -421,14 +387,11 @@ TEST_F(PrimaryDown, QuerySecondaryOnly) {
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::SecondaryOnly,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getSecondaries().front(),
-                                              doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor =
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::SecondaryOnly});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getSecondaries().front(), doc[HostField.name()].str());
 }
 
 TEST_F(PrimaryDown, CommandSecondaryOnly) {
@@ -441,14 +404,11 @@ TEST_F(PrimaryDown, QueryPrimaryPreferred) {
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::PrimaryPreferred,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getSecondaries().front(),
-                                              doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor =
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::PrimaryPreferred});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getSecondaries().front(), doc[HostField.name()].str());
 }
 
 TEST_F(PrimaryDown, CommandPrimaryPreferred) {
@@ -461,14 +421,11 @@ TEST_F(PrimaryDown, QuerySecondaryPreferred) {
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::SecondaryPreferred,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getSecondaries().front(),
-                                              doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor = replConn.find(std::move(findCmd),
+                                ReadPreferenceSetting{ReadPreference::SecondaryPreferred});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getSecondaries().front(), doc[HostField.name()].str());
 }
 
 TEST_F(PrimaryDown, CommandSecondaryPreferred) {
@@ -480,14 +437,10 @@ TEST_F(PrimaryDown, Nearest) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::Nearest,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getSecondaries().front(),
-                                              doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor = replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::Nearest});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getSecondaries().front(), doc[HostField.name()].str());
 }
 
 /**
@@ -529,13 +482,11 @@ TEST_F(SecondaryDown, QueryPrimary) {
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::PrimaryOnly,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor =
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::PrimaryOnly});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
 }
 
 TEST_F(SecondaryDown, CommandPrimary) {
@@ -546,7 +497,10 @@ TEST_F(SecondaryDown, QuerySecondaryOnly) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertBothQueryApisThrow(replConn, NamespaceString{IdentityNS}, ReadPreference::SecondaryOnly);
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    ASSERT_THROWS(
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::SecondaryOnly}),
+        AssertionException);
 }
 
 TEST_F(SecondaryDown, CommandSecondaryOnly) {
@@ -558,13 +512,11 @@ TEST_F(SecondaryDown, QueryPrimaryPreferred) {
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
     // Note: IdentityNS contains the name of the server.
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::PrimaryPreferred,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor =
+        replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::PrimaryPreferred});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
 }
 
 TEST_F(SecondaryDown, CommandPrimaryPreferred) {
@@ -575,13 +527,11 @@ TEST_F(SecondaryDown, QuerySecondaryPreferred) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::SecondaryPreferred,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor = replConn.find(std::move(findCmd),
+                                ReadPreferenceSetting{ReadPreference::SecondaryPreferred});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
 }
 
 TEST_F(SecondaryDown, CommandSecondaryPreferred) {
@@ -592,13 +542,10 @@ TEST_F(SecondaryDown, QueryNearest) {
     MockReplicaSet* replSet = getReplSet();
     DBClientReplicaSet replConn(replSet->getSetName(), replSet->getHosts(), StringData());
 
-    assertWithBothQueryApis(replConn,
-                            NamespaceString{IdentityNS},
-                            ReadPreference::Nearest,
-                            [&](std::unique_ptr<DBClientCursor> cursor) {
-                                BSONObj doc = cursor->next();
-                                ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
-                            });
+    FindCommandRequest findCmd{NamespaceString{IdentityNS}};
+    auto cursor = replConn.find(std::move(findCmd), ReadPreferenceSetting{ReadPreference::Nearest});
+    BSONObj doc = cursor->next();
+    ASSERT_EQUALS(replSet->getPrimary(), doc[HostField.name()].str());
 }
 
 TEST_F(SecondaryDown, CommandNearest) {
diff --git a/src/mongo/client/query.cpp b/src/mongo/client/query.cpp
deleted file mode 100644
index 39d7e1316e2..00000000000
--- a/src/mongo/client/query.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/client/query.h"
-
-#include "mongo/base/status.h"
-#include "mongo/base/status_with.h"
-#include "mongo/bson/util/builder.h"
-
-namespace mongo {
-
-using std::string;
-
-const BSONField<BSONObj> Query::ReadPrefField("$readPreference");
-const BSONField<string> Query::ReadPrefModeField("mode");
-const BSONField<BSONArray> Query::ReadPrefTagsField("tags");
-
-void Query::makeComplex() {
-    if (isComplex())
-        return;
-    BSONObjBuilder b;
-    b.append("query", obj);
-    obj = b.obj();
-}
-
-Query& Query::sort(const BSONObj& s) {
-    appendComplex("orderby", s);
-    return *this;
-}
-
-Query& Query::hint(BSONObj keyPattern) {
-    appendComplex("$hint", keyPattern);
-    return *this;
-}
-
-bool Query::isComplex(const BSONObj& obj, bool* hasDollar) {
-    if (obj.hasElement("query")) {
-        if (hasDollar)
-            *hasDollar = false;
-        return true;
-    }
-
-    if (obj.hasElement("$query")) {
-        if (hasDollar)
-            *hasDollar = true;
-        return true;
-    }
-
-    return false;
-}
-
-BSONObj Query::getFilter() const {
-    bool hasDollar;
-    if (!isComplex(&hasDollar))
-        return obj;
-
-    return obj.getObjectField(hasDollar ? "$query" : "query");
-}
-
-Query& Query::readPref(ReadPreference pref, const BSONArray& tags) {
-    appendComplex(ReadPrefField.name().c_str(),
-                  ReadPreferenceSetting(pref, TagSet(tags)).toInnerBSON());
-    return *this;
-}
-
-bool Query::isComplex(bool* hasDollar) const {
-    return isComplex(obj, hasDollar);
-}
-
-Query& Query::appendElements(BSONObj elements) {
-    makeComplex();
-    BSONObjBuilder b(std::move(obj));
-    b.appendElements(elements);
-    obj = b.obj();
-    return *this;
-}
-
-Query& Query::requestResumeToken(bool enable) {
-    appendComplex("$_requestResumeToken", enable);
-    return *this;
-}
-
-Query& Query::resumeAfter(BSONObj point) {
-    appendComplex("$_resumeAfter", point);
-    return *this;
-}
-
-Query& Query::maxTimeMS(long long timeout) {
-    appendComplex("$maxTimeMS", timeout);
-    return *this;
-}
-
-Query& Query::term(long long value) {
-    appendComplex("term", value);
-    return *this;
-}
-
-Query& Query::readConcern(BSONObj rc) {
-    appendComplex("readConcern", rc);
-    return *this;
-}
-
-Query& Query::readOnce(bool enable) {
-    appendComplex("$readOnce", enable);
-    return *this;
-}
-}  // namespace mongo
diff --git a/src/mongo/client/query.h b/src/mongo/client/query.h
deleted file mode 100644
index af85022fb07..00000000000
--- a/src/mongo/client/query.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/bson/json.h"
-#include "mongo/client/read_preference.h"
-#include "mongo/rpc/message.h"
-
-namespace mongo {
-
-/**
- * Represents a subset of query settings, such as sort, hint, etc. It is only used in the context of
- * the deprecated query API in 'DBClientBase', which has been superseded by `DBClientBase::find()`
- * and friends. Additional uses of this class should not be added to the code base!
- */
-class Query {
-public:
-    static const BSONField<BSONObj> ReadPrefField;
-    static const BSONField<std::string> ReadPrefModeField;
-    static const BSONField<BSONArray> ReadPrefTagsField;
-
-    /**
-     * Creating a Query object from raw BSON is on its way out. Please don't add new callers under
-     * any circumstances.
-     */
-    static Query fromBSONDeprecated(const BSONObj& b) {
-        Query q;
-        q.obj = b;
-        return q;
-    }
-
-    Query() : obj(BSONObj()) {}
-
-    /** Add a sort (ORDER BY) criteria to the query expression.
-        @param sortPattern the sort order template.  For example to order by name ascending, time
-            descending:
-          { name : 1, ts : -1 }
-        i.e.
-          BSON( "name" << 1 << "ts" << -1 )
-        or
-          fromjson(" name : 1, ts : -1 ")
-    */
-    Query& sort(const BSONObj& sortPattern);
-
-    /** Provide a hint to the query.
-        @param keyPattern Key pattern for the index to use.
-        Example:
-          hint("{ts:1}")
-    */
-    Query& hint(BSONObj keyPattern);
-
-    /**
-     * Sets the read preference for this query.
-     *
-     * @param pref the read preference mode for this query.
-     * @param tags the set of tags to use for this query.
-     */
-    Query& readPref(ReadPreference pref, const BSONArray& tags);
-
-    BSONObj getFilter() const;
-
-    /**
-     * A temporary accessor that returns a reference to the internal BSON object. No new callers
-     * should be introduced!
-     * NB: must be implemented in the header because db/query/query_request cannot link against
-     * client/client_query.
-     */
-    const BSONObj& getFullSettingsDeprecated() const {
-        return obj;
-    }
-
-    /**
-     * The setters below were added to make the contents of the Query's settings internal BSON
-     * explicit. They will be reviewed and deprecated/removed as appropriate.
-     */
-    Query& appendElements(BSONObj elements);
-    Query& requestResumeToken(bool enable);
-    Query& resumeAfter(BSONObj point);
-    Query& maxTimeMS(long long timeout);
-    Query& term(long long value);
-    Query& readConcern(BSONObj rc);
-    Query& readOnce(bool enable);
-
-private:
-    BSONObj obj;
-
-    /**
-     * @return true if this query has an orderby, hint, or some other field
-     */
-    bool isComplex(bool* hasDollar = nullptr) const;
-    static bool isComplex(const BSONObj& obj, bool* hasDollar = nullptr);
-
-    void makeComplex();
-    template <class T>
-    void appendComplex(const char* fieldName, const T& val) {
-        makeComplex();
-        BSONObjBuilder b(std::move(obj));
-        b.append(fieldName, val);
-        obj = b.obj();
-    }
-};
-
-inline std::ostream& operator<<(std::ostream& s, const Query& q) {
-    return s << q.getFullSettingsDeprecated().toString();
-}
-
-}  // namespace mongo
diff --git a/src/mongo/crypto/encryption_fields.idl b/src/mongo/crypto/encryption_fields.idl
index 903a1d4f415..d40c9153ee6 100644
--- a/src/mongo/crypto/encryption_fields.idl
+++ b/src/mongo/crypto/encryption_fields.idl
@@ -45,6 +45,11 @@ feature_flags:
         default: true
         version: 6.0
 
+    featureFlagFLE2Range:
+      description: "Enable support for range indexes in Queryable Encryption"
+      cpp_varname: gFeatureFlagFLE2Range
+      default: false
+
 structs:
 
     QueryTypeConfig:
@@ -58,7 +63,7 @@ structs:
             contention:
                 description: "Contention factor for field, 0 means it has extremely high set number of distinct values"
                 type: exactInt64
-                default: 0
+                default: 4
                 unstable: true
                 validator: { gte: 0 }
 
diff --git a/src/mongo/crypto/fle_crypto.cpp b/src/mongo/crypto/fle_crypto.cpp
index 38800792351..f55db25f970 100644
--- a/src/mongo/crypto/fle_crypto.cpp
+++ b/src/mongo/crypto/fle_crypto.cpp
@@ -53,6 +53,7 @@
 #include "mongo/base/error_codes.h"
 #include "mongo/base/status.h"
 #include "mongo/bson/bson_depth.h"
+#include "mongo/bson/bsonmisc.h"
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/bson/bsontypes.h"
@@ -153,6 +154,8 @@ PrfBlock blockToArray(const SHA256Block& block) {
     return data;
 }
 
+}  // namespace
+
 PrfBlock PrfBlockfromCDR(ConstDataRange block) {
     uassert(6373501, "Invalid prf length", block.length() == sizeof(PrfBlock));
 
@@ -161,6 +164,7 @@ PrfBlock PrfBlockfromCDR(ConstDataRange block) {
     return ret;
 }
 
+namespace {
 ConstDataRange hmacKey(const KeyMaterial& keyMaterial) {
     static_assert(kHmacKeyOffset + crypto::sym256KeySize <= crypto::kFieldLevelEncryptionKeySize);
     invariant(crypto::kFieldLevelEncryptionKeySize == keyMaterial->size());
@@ -212,15 +216,18 @@ ConstDataRange binDataToCDR(const BSONElement element) {
     return ConstDataRange(data, data + len);
 }
 
-ConstDataRange binDataToCDR(const Value& value) {
-    uassert(6334103, "Expected binData Value type", value.getType() == BinData);
-
-    auto binData = value.getBinData();
+ConstDataRange binDataToCDR(const BSONBinData binData) {
     int len = binData.length;
     const char* data = static_cast<const char*>(binData.data);
     return ConstDataRange(data, data + len);
 }
 
+ConstDataRange binDataToCDR(const Value& value) {
+    uassert(6334103, "Expected binData Value type", value.getType() == BinData);
+
+    return binDataToCDR(value.getBinData());
+}
+
 template <typename T>
 void toBinData(StringData field, T t, BSONObjBuilder* builder) {
     BSONObj obj = t.toBSON();
@@ -292,7 +299,7 @@ void toEncryptedBinData(StringData field,
 
 std::pair<EncryptedBinDataType, ConstDataRange> fromEncryptedBinData(BSONElement element) {
     uassert(
-        6373502, "Expected binData with subtype Encrypt", element.isBinData(BinDataType::Encrypt));
+        6672414, "Expected binData with subtype Encrypt", element.isBinData(BinDataType::Encrypt));
 
     return fromEncryptedConstDataRange(binDataToCDR(element));
 }
@@ -965,7 +972,6 @@ void parseAndVerifyInsertUpdatePayload(std::vector<EDCServerPayloadInfo>* pField
 
 void collectEDCServerInfo(std::vector<EDCServerPayloadInfo>* pFields,
                           ConstDataRange cdr,
-
                           StringData fieldPath) {
 
     // TODO - validate field is actually indexed in the schema?
@@ -1163,6 +1169,28 @@ uint64_t generateRandomContention(uint64_t cm) {
 
 }  // namespace
 
+std::pair<EncryptedBinDataType, ConstDataRange> fromEncryptedBinData(const Value& value) {
+    uassert(6672416, "Expected binData with subtype Encrypt", value.getType() == BinData);
+
+    auto binData = value.getBinData();
+
+    uassert(6672415, "Expected binData with subtype Encrypt", binData.type == BinDataType::Encrypt);
+
+    return fromEncryptedConstDataRange(binDataToCDR(binData));
+}
+
+BSONBinData toBSONBinData(const std::vector<uint8_t>& buf) {
+    return BSONBinData(buf.data(), buf.size(), Encrypt);
+}
+
+std::vector<uint8_t> toEncryptedVector(EncryptedBinDataType dt, const PrfBlock& block) {
+    std::vector<uint8_t> buf(block.size() + 1);
+    buf[0] = static_cast<uint8_t>(dt);
+
+    std::copy(block.data(), block.data() + block.size(), buf.data() + 1);
+
+    return buf;
+}
 
 CollectionsLevel1Token FLELevel1TokenGenerator::generateCollectionsLevel1Token(
     FLEIndexKey indexKey) {
@@ -1364,6 +1392,8 @@ std::pair<BSONType, std::vector<uint8_t>> FLEClientCrypto::decrypt(ConstDataRang
         return {EOO, vectorFromCDR(pair.second)};
     } else if (pair.first == EncryptedBinDataType::kFLE2InsertUpdatePayload) {
         return {EOO, vectorFromCDR(pair.second)};
+    } else if (pair.first == EncryptedBinDataType::kFLE2TransientRaw) {
+        return {EOO, vectorFromCDR(pair.second)};
     } else {
         uasserted(6373507, "Not supported");
     }
@@ -1720,6 +1750,8 @@ FLE2FindEqualityPayload FLEClientCrypto::serializeFindPayload(FLEIndexKeyAndId i
     auto value = ConstDataRange(element.value(), element.value() + element.valuesize());
 
     auto collectionToken = FLELevel1TokenGenerator::generateCollectionsLevel1Token(indexKey.key);
+    auto serverToken =
+        FLELevel1TokenGenerator::generateServerDataEncryptionLevel1Token(indexKey.key);
 
     auto edcToken = FLECollectionTokenGenerator::generateEDCToken(collectionToken);
     auto escToken = FLECollectionTokenGenerator::generateESCToken(collectionToken);
@@ -1738,6 +1770,7 @@ FLE2FindEqualityPayload FLEClientCrypto::serializeFindPayload(FLEIndexKeyAndId i
     payload.setEscDerivedToken(escDatakey.toCDR());
     payload.setEccDerivedToken(eccDatakey.toCDR());
     payload.setMaxCounter(maxContentionFactor);
+    payload.setServerEncryptionToken(serverToken.toCDR());
 
     return payload;
 }
@@ -2019,7 +2052,8 @@ ESCDerivedFromDataTokenAndContentionFactorToken EDCServerPayloadInfo::getESCToke
 }
 
 void EDCServerCollection::validateEncryptedFieldInfo(BSONObj& obj,
-                                                     const EncryptedFieldConfig& efc) {
+                                                     const EncryptedFieldConfig& efc,
+                                                     bool bypassDocumentValidation) {
     stdx::unordered_set<std::string> indexedFields;
     for (auto f : efc.getFields()) {
         if (f.getQueries().has_value()) {
@@ -2036,6 +2070,11 @@ void EDCServerCollection::validateEncryptedFieldInfo(BSONObj& obj,
                     indexedFields.contains(fieldPath.toString()));
         }
     });
+
+    // We should ensure that the user is not manually modifying the safe content array.
+    uassert(6666200,
+            str::stream() << "Cannot modify " << kSafeContent << " field in document.",
+            !obj.hasField(kSafeContent) || bypassDocumentValidation);
 }
 
 
@@ -2076,6 +2115,44 @@ PrfBlock EDCServerCollection::generateTag(const FLE2IndexedEqualityEncryptedValu
     return generateTag(edcTwiceDerived, indexedValue.count);
 }
 
+
+StatusWith<FLE2IndexedEqualityEncryptedValue> EDCServerCollection::decryptAndParse(
+    ServerDataEncryptionLevel1Token token, ConstDataRange serializedServerValue) {
+    auto pair = fromEncryptedConstDataRange(serializedServerValue);
+    uassert(6672412,
+            "Wrong encrypted field type",
+            pair.first == EncryptedBinDataType::kFLE2EqualityIndexedValue);
+
+    return FLE2IndexedEqualityEncryptedValue::decryptAndParse(token, pair.second);
+}
+
+StatusWith<FLE2IndexedEqualityEncryptedValue> EDCServerCollection::decryptAndParse(
+    ConstDataRange token, ConstDataRange serializedServerValue) {
+    auto serverToken = FLETokenFromCDR<FLETokenType::ServerDataEncryptionLevel1Token>(token);
+
+    return FLE2IndexedEqualityEncryptedValue::decryptAndParse(serverToken, serializedServerValue);
+}
+
+std::vector<EDCDerivedFromDataTokenAndContentionFactorToken> EDCServerCollection::generateEDCTokens(
+    EDCDerivedFromDataToken token, uint64_t maxContentionFactor) {
+    std::vector<EDCDerivedFromDataTokenAndContentionFactorToken> tokens;
+    tokens.reserve(maxContentionFactor);
+
+    for (uint64_t i = 0; i <= maxContentionFactor; ++i) {
+        tokens.push_back(FLEDerivedFromDataTokenAndContentionFactorTokenGenerator::
+                             generateEDCDerivedFromDataTokenAndContentionFactorToken(token, i));
+    }
+
+    return tokens;
+}
+
+std::vector<EDCDerivedFromDataTokenAndContentionFactorToken> EDCServerCollection::generateEDCTokens(
+    ConstDataRange rawToken, uint64_t maxContentionFactor) {
+    auto token = FLETokenFromCDR<FLETokenType::EDCDerivedFromDataToken>(rawToken);
+
+    return generateEDCTokens(token, maxContentionFactor);
+}
+
 BSONObj EDCServerCollection::finalizeForInsert(
     const BSONObj& doc, const std::vector<EDCServerPayloadInfo>& serverPayload) {
     std::vector<TagInfo> tags;
@@ -2305,6 +2382,7 @@ EncryptedFieldConfig EncryptionInformationHelpers::getAndValidateSchema(
     return efc;
 }
 
+
 std::pair<EncryptedBinDataType, ConstDataRange> fromEncryptedConstDataRange(ConstDataRange cdr) {
     ConstDataRangeCursor cdrc(cdr);
 
@@ -2377,6 +2455,12 @@ ParsedFindPayload::ParsedFindPayload(ConstDataRange cdr) {
     escToken = FLETokenFromCDR<FLETokenType::ESCDerivedFromDataToken>(payload.getEscDerivedToken());
     eccToken = FLETokenFromCDR<FLETokenType::ECCDerivedFromDataToken>(payload.getEccDerivedToken());
     edcToken = FLETokenFromCDR<FLETokenType::EDCDerivedFromDataToken>(payload.getEdcDerivedToken());
+
+    if (payload.getServerEncryptionToken().has_value()) {
+        serverToken = FLETokenFromCDR<FLETokenType::ServerDataEncryptionLevel1Token>(
+            payload.getServerEncryptionToken().value());
+    }
+
     maxCounter = payload.getMaxCounter();
 }
 
diff --git a/src/mongo/crypto/fle_crypto.h b/src/mongo/crypto/fle_crypto.h
index 5d8285f5790..5feac8ca2d3 100644
--- a/src/mongo/crypto/fle_crypto.h
+++ b/src/mongo/crypto/fle_crypto.h
@@ -41,6 +41,7 @@
 #include "mongo/base/status_with.h"
 #include "mongo/base/string_data.h"
 #include "mongo/bson/bsonelement.h"
+#include "mongo/bson/bsonmisc.h"
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/bsontypes.h"
 #include "mongo/crypto/aead_encryption.h"
@@ -1009,13 +1010,21 @@ public:
     /**
      * Validate that payload is compatible with schema
      */
-    static void validateEncryptedFieldInfo(BSONObj& obj, const EncryptedFieldConfig& efc);
+    static void validateEncryptedFieldInfo(BSONObj& obj,
+                                           const EncryptedFieldConfig& efc,
+                                           bool bypassDocumentValidation);
 
     /**
      * Get information about all FLE2InsertUpdatePayload payloads
      */
     static std::vector<EDCServerPayloadInfo> getEncryptedFieldInfo(BSONObj& obj);
 
+    static StatusWith<FLE2IndexedEqualityEncryptedValue> decryptAndParse(
+        ServerDataEncryptionLevel1Token token, ConstDataRange serializedServerValue);
+
+    static StatusWith<FLE2IndexedEqualityEncryptedValue> decryptAndParse(
+        ConstDataRange token, ConstDataRange serializedServerValue);
+
     /**
      * Generate a search tag
      *
@@ -1026,6 +1035,14 @@ public:
     static PrfBlock generateTag(const FLE2IndexedEqualityEncryptedValue& indexedValue);
 
     /**
+     * Generate all the EDC tokens
+     */
+    static std::vector<EDCDerivedFromDataTokenAndContentionFactorToken> generateEDCTokens(
+        EDCDerivedFromDataToken token, uint64_t maxContentionFactor);
+    static std::vector<EDCDerivedFromDataTokenAndContentionFactorToken> generateEDCTokens(
+        ConstDataRange rawToken, uint64_t maxContentionFactor);
+
+    /**
      * Consumes a payload from a MongoDB client for insert.
      *
      * Converts FLE2InsertUpdatePayload to a final insert payload and updates __safeContent__ with
@@ -1163,6 +1180,7 @@ struct ParsedFindPayload {
     ESCDerivedFromDataToken escToken;
     ECCDerivedFromDataToken eccToken;
     EDCDerivedFromDataToken edcToken;
+    boost::optional<ServerDataEncryptionLevel1Token> serverToken;
     boost::optional<std::int64_t> maxCounter;
 
     explicit ParsedFindPayload(BSONElement fleFindPayload);
@@ -1170,4 +1188,15 @@ struct ParsedFindPayload {
     explicit ParsedFindPayload(ConstDataRange cdr);
 };
 
+/**
+ * Utility functions manipulating buffers
+ */
+PrfBlock PrfBlockfromCDR(ConstDataRange block);
+
+std::vector<uint8_t> toEncryptedVector(EncryptedBinDataType dt, const PrfBlock& block);
+
+BSONBinData toBSONBinData(const std::vector<uint8_t>& buf);
+
+std::pair<EncryptedBinDataType, ConstDataRange> fromEncryptedBinData(const Value& value);
+
 }  // namespace mongo
diff --git a/src/mongo/crypto/fle_crypto_test.cpp b/src/mongo/crypto/fle_crypto_test.cpp
index 75c1976c097..4c4355ebb9f 100644
--- a/src/mongo/crypto/fle_crypto_test.cpp
+++ b/src/mongo/crypto/fle_crypto_test.cpp
@@ -33,6 +33,7 @@
 #include "mongo/crypto/fle_crypto.h"
 
 #include <algorithm>
+#include <cstdint>
 #include <iostream>
 #include <limits>
 #include <stack>
@@ -696,7 +697,8 @@ std::vector<char> generatePlaceholder(
     BSONElement value,
     Operation operation,
     mongo::Fle2AlgorithmInt algorithm = mongo::Fle2AlgorithmInt::kEquality,
-    boost::optional<UUID> key = boost::none) {
+    boost::optional<UUID> key = boost::none,
+    uint64_t contention = 0) {
     FLE2EncryptionPlaceholder ep;
 
     if (operation == Operation::kFind) {
@@ -709,7 +711,7 @@ std::vector<char> generatePlaceholder(
     ep.setUserKeyId(userKeyId);
     ep.setIndexKeyId(key.value_or(indexKeyId));
     ep.setValue(value);
-    ep.setMaxContentionCounter(0);
+    ep.setMaxContentionCounter(contention);
 
     BSONObj obj = ep.toBSON();
 
@@ -726,7 +728,7 @@ BSONObj encryptDocument(BSONObj obj,
     auto result = FLEClientCrypto::transformPlaceholders(obj, keyVault);
 
     if (nullptr != efc) {
-        EDCServerCollection::validateEncryptedFieldInfo(result, *efc);
+        EDCServerCollection::validateEncryptedFieldInfo(result, *efc, false);
     }
 
     // Start Server Side
@@ -832,6 +834,41 @@ void roundTripMultiencrypted(BSONObj doc1,
     assertPayload(finalDoc["encrypted2"], operation2);
 }
 
+// Used to generate the test data for the ExpressionFLETest in expression_test.cpp
+TEST(FLE_EDC, PrintTest) {
+    auto doc = BSON("value" << 1);
+    auto element = doc.firstElement();
+
+    TestKeyVault keyVault;
+
+    auto inputDoc = BSON("plainText"
+                         << "sample"
+                         << "encrypted" << element);
+
+    {
+        auto buf = generatePlaceholder(element, Operation::kInsert, Fle2AlgorithmInt::kEquality);
+        BSONObjBuilder builder;
+        builder.append("plainText", "sample");
+        builder.appendBinData("encrypted", buf.size(), BinDataType::Encrypt, buf.data());
+
+        auto finalDoc = encryptDocument(builder.obj(), &keyVault);
+
+        std::cout << finalDoc.jsonString() << std::endl;
+    }
+
+    {
+        auto buf = generatePlaceholder(
+            element, Operation::kInsert, Fle2AlgorithmInt::kEquality, boost::none, 50);
+        BSONObjBuilder builder;
+        builder.append("plainText", "sample");
+        builder.appendBinData("encrypted", buf.size(), BinDataType::Encrypt, buf.data());
+
+        auto finalDoc = encryptDocument(builder.obj(), &keyVault);
+
+        std::cout << finalDoc.jsonString() << std::endl;
+    }
+}
+
 TEST(FLE_EDC, Allowed_Types) {
     const std::vector<std::pair<BSONObj, BSONType>> universallyAllowedObjects{
         {BSON("sample"
@@ -1928,4 +1965,25 @@ TEST(CompactionHelpersTest, countDeletedTest) {
     ASSERT_EQ(CompactionHelpers::countDeleted(input), 20);
 }
 
+TEST(EDCServerCollectionTest, GenerateEDCTokens) {
+
+    auto doc = BSON("sample" << 123456);
+    auto element = doc.firstElement();
+
+    auto value = ConstDataRange(element.value(), element.value() + element.valuesize());
+
+    auto collectionToken = FLELevel1TokenGenerator::generateCollectionsLevel1Token(getIndexKey());
+    auto edcToken = FLECollectionTokenGenerator::generateEDCToken(collectionToken);
+
+    EDCDerivedFromDataToken edcDatakey =
+        FLEDerivedFromDataTokenGenerator::generateEDCDerivedFromDataToken(edcToken, value);
+
+
+    ASSERT_EQ(EDCServerCollection::generateEDCTokens(edcDatakey, 0).size(), 1);
+    ASSERT_EQ(EDCServerCollection::generateEDCTokens(edcDatakey, 1).size(), 2);
+    ASSERT_EQ(EDCServerCollection::generateEDCTokens(edcDatakey, 2).size(), 3);
+    ASSERT_EQ(EDCServerCollection::generateEDCTokens(edcDatakey, 3).size(), 4);
+}
+
+
 }  // namespace mongo
diff --git a/src/mongo/crypto/fle_field_schema.idl b/src/mongo/crypto/fle_field_schema.idl
index 030fa36ef3f..d9e2b54b890 100644
--- a/src/mongo/crypto/fle_field_schema.idl
+++ b/src/mongo/crypto/fle_field_schema.idl
@@ -51,6 +51,10 @@ enums:
           kFLE2UnindexedEncryptedValue : 6 # see FLE2IndexedEqualityEncryptedValue
           kFLE2EqualityIndexedValue : 7
 
+          # Transient encrypted data in query rewrites, not persisted
+          # same as BinDataGeneral but redacted
+          kFLE2TransientRaw : 8
+
     FleVersion:
         description: "The version / type of field-level encryption in use."
         type: int
diff --git a/src/mongo/crypto/fle_tags.cpp b/src/mongo/crypto/fle_tags.cpp
index a0de37b2f42..4737ff13144 100644
--- a/src/mongo/crypto/fle_tags.cpp
+++ b/src/mongo/crypto/fle_tags.cpp
@@ -56,8 +56,11 @@ void verifyTagsWillFit(size_t tagCount, size_t memoryLimit) {
     constexpr size_t largestElementSize = arrayElementSize(std::numeric_limits<size_t>::digits10);
     constexpr size_t ridiculousNumberOfTags =
         std::numeric_limits<size_t>::max() / largestElementSize;
-    uassert(6653300, "Encrypted rewrite too many tags", tagCount < ridiculousNumberOfTags);
-    uassert(6401800,
+
+    uassert(ErrorCodes::FLEMaxTagLimitExceeded,
+            "Encrypted rewrite too many tags",
+            tagCount < ridiculousNumberOfTags);
+    uassert(ErrorCodes::FLEMaxTagLimitExceeded,
             "Encrypted rewrite memory limit exceeded",
             sizeArrayElementsMemory(tagCount) <= memoryLimit);
 }
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 379103e36fb..f06e89c7c3d 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -108,16 +108,6 @@ env.Library(
 )
 
 env.Library(
-    target='initialize_snmp',
-    source=[
-        'initialize_snmp.cpp',
-    ],
-    LIBDEPS=[
-        '$BUILD_DIR/mongo/base',
-    ],
-)
-
-env.Library(
     target="dbmessage",
     source=[
         "dbmessage.cpp",
@@ -520,6 +510,14 @@ env.Library(
 )
 
 env.Library(
+    target='change_streams_cluster_parameter',
+    source=['change_streams_cluster_parameter.idl', 'change_streams_cluster_parameter.cpp'],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/idl/cluster_server_parameter',
+    ],
+)
+
+env.Library(
     target='change_stream_change_collection_manager',
     source=[
         'change_stream_change_collection_manager.cpp',
@@ -1078,35 +1076,33 @@ env.Library(
     source=[
         "op_observer_impl.cpp",
     ],
-    LIBDEPS=[
+    LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/base',
         '$BUILD_DIR/mongo/db/catalog/collection_catalog',
+        '$BUILD_DIR/mongo/db/catalog/collection_options',
+        '$BUILD_DIR/mongo/db/catalog/commit_quorum_options',
+        '$BUILD_DIR/mongo/db/catalog/database_holder',
+        '$BUILD_DIR/mongo/db/catalog/import_collection_oplog_entry',
+        '$BUILD_DIR/mongo/db/concurrency/exception_util',
+        '$BUILD_DIR/mongo/db/pipeline/change_stream_pre_image_helpers',
         '$BUILD_DIR/mongo/db/pipeline/change_stream_preimage',
+        '$BUILD_DIR/mongo/db/repl/image_collection_entry',
+        '$BUILD_DIR/mongo/db/repl/oplog',
+        '$BUILD_DIR/mongo/db/repl/repl_server_parameters',
         '$BUILD_DIR/mongo/db/repl/tenant_migration_access_blocker',
+        '$BUILD_DIR/mongo/db/s/sharding_api_d',
         '$BUILD_DIR/mongo/db/timeseries/bucket_catalog',
+        '$BUILD_DIR/mongo/db/views/views_mongod',
         '$BUILD_DIR/mongo/s/coreshard',
         '$BUILD_DIR/mongo/s/grid',
         'batched_write_context',
-        'catalog/collection_options',
-        'catalog/database_holder',
-        'op_observer',
-        'op_observer_util',
-        'read_write_concern_defaults',
-        'repl/oplog',
-        's/sharding_api_d',
-        'views/views_mongod',
-    ],
-    LIBDEPS_PRIVATE=[
-        '$BUILD_DIR/mongo/db/catalog/commit_quorum_options',
-        '$BUILD_DIR/mongo/db/catalog/import_collection_oplog_entry',
-        '$BUILD_DIR/mongo/db/concurrency/exception_util',
-        '$BUILD_DIR/mongo/db/pipeline/change_stream_pre_image_helpers',
-        '$BUILD_DIR/mongo/db/server_feature_flags',
         'dbhelpers',
         'internal_transactions_feature_flag',
         'multitenancy',
-        'repl/image_collection_entry',
-        'repl/repl_server_parameters',
+        'op_observer',
+        'op_observer_util',
+        'read_write_concern_defaults',
+        'server_feature_flags',
         'transaction',
     ],
 )
@@ -2197,6 +2193,7 @@ env.Library(
     ],
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/db/catalog/catalog_impl',
+        '$BUILD_DIR/mongo/db/catalog/database_holder',
         '$BUILD_DIR/mongo/db/commands/mongod',
         '$BUILD_DIR/mongo/db/index/index_access_method_factory',
         '$BUILD_DIR/mongo/db/index/index_access_methods',
@@ -2393,6 +2390,7 @@ env.Library(
         # NOTE: If you need to add a static or mongo initializer to mongod startup,
         # please add that library here, as a private library dependency.
         '$BUILD_DIR/mongo/executor/network_interface_factory',
+        '$BUILD_DIR/mongo/logv2/logv2_options',
         '$BUILD_DIR/mongo/rpc/rpc',
         '$BUILD_DIR/mongo/s/commands/sharded_cluster_sharding_commands',
         '$BUILD_DIR/mongo/scripting/scripting_server',
@@ -2441,7 +2439,6 @@ env.Library(
         'index/index_access_methods',
         'index/index_descriptor',
         'index_builds_coordinator_mongod',
-        'initialize_snmp',
         'introspect',
         'keys_collection_client_direct',
         'kill_sessions_local',
@@ -2519,6 +2516,7 @@ env.Library(
         '$BUILD_DIR/mongo/client/clientdriver_minimal',
         '$BUILD_DIR/mongo/db/change_stream_change_collection_manager',
         '$BUILD_DIR/mongo/db/change_stream_options_manager',
+        '$BUILD_DIR/mongo/db/change_streams_cluster_parameter',
         '$BUILD_DIR/mongo/db/pipeline/change_stream_expired_pre_image_remover',
         '$BUILD_DIR/mongo/idl/cluster_server_parameter',
         '$BUILD_DIR/mongo/idl/cluster_server_parameter_op_observer',
@@ -2551,7 +2549,6 @@ env.Library(
         'index/index_access_method_factory',
         'index/index_access_methods',
         'index_builds_coordinator_mongod',
-        'initialize_snmp',
         'keys_collection_client_direct',
         'kill_sessions',
         'kill_sessions_local',
@@ -2692,16 +2689,17 @@ if wiredtiger:
         source=[
             'cancelable_operation_context_test.cpp',
             'catalog_raii_test.cpp',
+            'change_streams_cluster_parameter_test.cpp',
             'client_strand_test.cpp',
             'client_context_test.cpp',
             'collection_index_usage_tracker_test.cpp',
             'commands_test.cpp',
             'curop_test.cpp',
+            'database_name_test.cpp',
             'dbdirectclient_test.cpp',
             'dbmessage_test.cpp',
             'db_raii_test.cpp',
             'db_raii_multi_collection_test.cpp',
-            'dollar_tenant_decoration_test.cpp',
             "explain_test.cpp",
             'field_parser_test.cpp',
             'field_ref_set_test.cpp',
@@ -2737,7 +2735,6 @@ if wiredtiger:
             'session_catalog_mongod_test.cpp',
             'session_catalog_test.cpp',
             'startup_warnings_mongod_test.cpp',
-            'tenant_database_name_test.cpp',
             'thread_client_test.cpp',
             'time_proof_service_test.cpp',
             'transaction_api_test.cpp',
@@ -2761,15 +2758,31 @@ if wiredtiger:
             '$BUILD_DIR/mongo/crypto/encrypted_field_config',
             '$BUILD_DIR/mongo/crypto/fle_crypto',
             '$BUILD_DIR/mongo/db/auth/auth',
+            '$BUILD_DIR/mongo/db/auth/authmocks',
             '$BUILD_DIR/mongo/db/auth/security_token',
             '$BUILD_DIR/mongo/db/catalog/catalog_test_fixture',
+            '$BUILD_DIR/mongo/db/catalog/database_holder',
             '$BUILD_DIR/mongo/db/catalog/import_collection_oplog_entry',
             '$BUILD_DIR/mongo/db/catalog/index_build_entry_idl',
             '$BUILD_DIR/mongo/db/catalog/local_oplog_info',
+            '$BUILD_DIR/mongo/db/change_streams_cluster_parameter',
             '$BUILD_DIR/mongo/db/mongohasher',
+            '$BUILD_DIR/mongo/db/pipeline/change_stream_expired_pre_image_remover',
             '$BUILD_DIR/mongo/db/query/common_query_enums_and_helpers',
             '$BUILD_DIR/mongo/db/query/query_test_service_context',
+            '$BUILD_DIR/mongo/db/repl/image_collection_entry',
+            '$BUILD_DIR/mongo/db/repl/mock_repl_coord_server_fixture',
+            '$BUILD_DIR/mongo/db/repl/oplog_interface_local',
+            '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
+            '$BUILD_DIR/mongo/db/repl/repl_server_parameters',
+            '$BUILD_DIR/mongo/db/repl/replica_set_aware_service',
+            '$BUILD_DIR/mongo/db/repl/replmocks',
+            '$BUILD_DIR/mongo/db/repl/storage_interface_impl',
+            '$BUILD_DIR/mongo/db/repl/tenant_migration_access_blocker',
+            '$BUILD_DIR/mongo/db/s/shard_server_test_fixture',
             '$BUILD_DIR/mongo/db/s/sharding_api_d',
+            '$BUILD_DIR/mongo/db/stats/fill_locker_info',
+            '$BUILD_DIR/mongo/db/stats/transaction_stats',
             '$BUILD_DIR/mongo/db/storage/wiredtiger/storage_wiredtiger',
             '$BUILD_DIR/mongo/executor/async_timer_mock',
             '$BUILD_DIR/mongo/idl/idl_parser',
@@ -2780,8 +2793,7 @@ if wiredtiger:
             '$BUILD_DIR/mongo/util/clock_source_mock',
             '$BUILD_DIR/mongo/util/net/network',
             '$BUILD_DIR/mongo/util/net/ssl_options_server',
-            'auth/authmocks',
-            'catalog/database_holder',
+            'batched_write_context',
             'catalog_raii',
             'collection_index_usage_tracker',
             'commands',
@@ -2812,16 +2824,7 @@ if wiredtiger:
             'range_arithmetic',
             'read_write_concern_defaults_mock',
             'record_id_helpers',
-            'repl/image_collection_entry',
-            'repl/mock_repl_coord_server_fixture',
-            'repl/oplog_interface_local',
-            'repl/repl_coordinator_interface',
-            'repl/repl_server_parameters',
-            'repl/replica_set_aware_service',
-            'repl/replmocks',
-            'repl/storage_interface_impl',
             'rw_concern_d',
-            's/shard_server_test_fixture',
             'server_options_core',
             'server_options_servers',
             'service_context',
@@ -2836,8 +2839,6 @@ if wiredtiger:
             'signed_logical_time',
             'snapshot_window_options',
             'startup_warnings_mongod',
-            'stats/fill_locker_info',
-            'stats/transaction_stats',
             'time_proof_service',
             'transaction',
             'transaction_api',
diff --git a/src/mongo/db/auth/README.md b/src/mongo/db/auth/README.md
index 87354dded20..2e49ab5c20a 100644
--- a/src/mongo/db/auth/README.md
+++ b/src/mongo/db/auth/README.md
@@ -11,6 +11,7 @@
   - [Cluster Authentication](#cluster-authentication)
   - [Localhost Auth Bypass](#localhost-auth-bypass)
 - [Authorization](#authorization)
+  - [AuthName](#authname) (`UserName` and `RoleName`)
   - [Users](#users)
     - [User Roles](#user-roles)
     - [User Credentials](#user-credentials)
@@ -21,6 +22,9 @@
     - [Role Authentication Restrictions](#role-authentication-restrictions)
   - [User and Role Management](#user-and-role-management)
     - [UMC Transactions](#umc-transactions)
+  - [Privilege](#privilege)
+    - [ResourcePattern](#resourcepattern)
+    - [ActionType](#actiontype)
   - [Command Execution](#command-execution)
   - [Authorization Caching](#authorization-caching)
   - [Authorization Manager External State](#authorization-manager-external-state)
@@ -294,23 +298,39 @@ execute commands.
 [Here](https://github.com/mongodb/mongo/blob/r4.4.0/src/mongo/db/auth/authorization_session_impl.cpp#L126)
 is the authorization session calling into the authorization manager to acquire a user.
 
-Clients are expected to authenticate at most one time on a connection, and a
-client which opts into API Version 1 will receive an error if it attempts to
-authenticate more than once.  However, legacy clients which have not opted into
-an API Version may authenticate multiple times.  If a legacy client
-authenticates as UserA on a database and then authenticates as UserB on the
-same database, its AuthorizationSession will implicitly logout UserA and
-replace its cached User object with that of UserB. Alternatively, if a legacy
-client authenticates as UserA on one database and then authenticates as UserB
-on a second database, its AuthorizationSession will store User objects for both
-UserA and UserB, and will consider itself authorized for the union of the two
-users' privileges.  Because modern drivers no longer allow applications to
-authenticate with multiple user identities, this behavior in
-AuthorizationSession is deprecated, and support for it will eventually be
-removed.
+Clients are expected to authenticate at most one time on a connection.
+Attempting to reauthenticate as the currently authenticated user results
+in a warning being emitted to the global log, but the operation succeeds.
+Attempting to authenticate as a new user on an already authenticated connection is an error.
 
+### AuthName
 
-### User
+The [AuthName](auth_name.h) template
+provides the generic implementation for `UserName` and `RoleName` implementations.
+Each of these objects is made up of three component pieces of information.
+
+| Field | Accessor | Use |
+| -- | -- | -- |
+| `_name` | `getName()` | The symbolic name associated with the user or role, (e.g. 'Alice') |
+| `_db` | `getDB()` | The authentication database associated with the named auth identifier (e.g. 'admin' or 'test') |
+| `_tenant` | `getTenant()` | When used in multitenancy mode, this value retains a `TenantId` for authorization checking. |
+
+[`UserName`](user_name.h) and [`RoleName`](role_name.h) specializations are CRTP defined
+to include additional `getUser()` and `getRole()` accessors which proxy to `getName()`,
+and provide a set of `constexpr StringData` identifiers relating to their type.
+
+#### Serializations
+
+* `getDisplayName()` and `toString()` create a new string of the form `name@db` for use in log messages.
+* `getUnambiguousName()` creates a new string of the form `db.name` for use in generating `_id` fields for authzn documents and generating unique hashes for logical session identifiers.
+
+#### Multitenancy
+
+`AuthName` objects may be associated with a `TenantId` either separately via `AuthName(StringData name, StringData db, boost::optional<TenantId> tenant = boost::none)` or using the compound `DatabaseName` type `AuthName(StringData name, DatabaseName db)`.
+
+When a `TenantId` is associated with an `AuthName`, it will NOT be included in `BSON` or `String` serializations unless explicitly requested with a boolean argument to these functions.
+
+### Users
 
 `User` objects contain authorization information with regards to a specific user in a database. The
 `AuthorizationManager` has control over creation, management, and deletion of a `UserHandle` object,
@@ -498,6 +518,48 @@ Authentication restrictions defined on a role have the same meaning as
 those defined directly on users.  The effective set of `authenticationRestrictions`
 imposed on a user is the union of all direct and indirect authentication restrictions.
 
+### Privilege
+
+A [Privilege](privilege.h) represents a tuple of [ResourcePattern](resource_pattern.h) and
+[set](action_set.h) of [ActionType](action_type.idl)s which describe the resources which
+may be acted upon by a user, and what actions they may perform, respectively.
+
+A [PrivilegeVector](privilege.h) is an alias for `std::vector<Privilege>` and represents
+the full set of privileges across all resource and actionype conbinations for the user or role.
+
+#### ResourcePattern
+
+A resource pattern is a combination of a [MatchType](action_type.idl) with a `NamespaceString` to possibly narrow the scope of that `MatchType`.  Most MatchTypes refer to some storage resource, such as a specific collection or database, however `kMatchClusterResource` refers to an entire host, replica set, or cluster.
+
+| MatchType | As encoded in a privilege doc | Usage |
+| -- | -- | -- |
+| `kMatchNever` | _Unexpressable_ | A base type only used internally to indicate that the privilege specified by the ResourcePattern can not match any real resource |
+| `kMatchClusterResource` | `{ cluster : true }` | Commonly used with host and cluster management actions such as `ActionType::addShard`, `ActionType::setParameter`, or `ActionType::shutdown`. |
+| `kMatchAnyResource` | `{ anyResource: true }` | Matches all storage resources, even [non-normal namespaces](#normal-namespace) such as `db.system.views`. |
+| `kMatchAnyNormalResource` | `{ db: '', collection: '' }` | Matches all [normal](#normal-namespace) storage resources. Used with [builtin role](builtin_roles.cpp) `readWriteAnyDatabase`. |
+| `kMatchDatabaseName` | `{ db: 'dbname', collection: '' }` | Matches all [normal](#normal-namespace) storage resources for a specific named database. Used with [builtin role](builtin_roles.cpp) `readWrite`. |
+| `kMatchCollectionName` | `{ db: '', collection: 'collname' }` | Matches all storage resources, normal or not, which have the exact collection suffix '`collname`'.  For example, to provide read-only access to `*.system.js`. |
+| `kMatchExactNamespace` | `{ db: 'dbname', collection: 'collname' }` | Matches the exact namespace '`dbname`.`collname`'. |
+| `kMatchAnySystemBucketResource` | `{ db: '', system_buckets: '' }` | Matches the namespace pattern `*.system.buckets.*`. |
+| `kMatchAnySystemBucketInDBResource` | `{ db: 'dbname', system_buckets: '' }` | Matches the namespace pattern `dbname.system.buckets.*`. |
+| `kMatchAnySystemBucketInAnyDBResource` | `{ db: '', system_buckets: 'suffix' }` | Matches the namespace pattern `*.system.buckets.suffix`. |
+| `kMatchExactSystemBucketResource` | `{ db: 'dbname', system_buckets: 'suffix' }` | Matches the exact namespace `dbname.system.buckets.suffix`. |
+
+##### Normal Namespace
+
+A "normal" resource is a `namespace` which does not match either of the following patterns:
+
+| Namespace pattern | Examples | Usage |
+| -- | -- | -- |
+| `local.replset.*` | `local.replset.initialSyncId` | Namespaces used by Replication to manage per-host state. |
+| `*.system.*` | `admin.system.version` `myDB.system.views` | Collections used by the database to support user collections. |
+
+See also: [NamespaceString::isNormalCollection()](../namespace_string.h)
+
+#### ActionType
+
+An [ActionType](action_type.idl) is a task which a client may be expected to perform.  These are combined with [ResourcePattern](#resourcepattern)s to produce a [Privilege](#privilege).  Note that not all `ActionType`s make sense with all `ResourcePattern`s (e.g. `ActionType::shutdown` applied to `ResourcePattern` `{ db: 'test', collection: 'my.awesome.collection' }`), however the system will generally not prohibit declaring these combinations.
+
 ### User and Role Management
 
 `User Management Commands`, sometimes referred to as `UMCs` provide an
@@ -530,6 +592,13 @@ allowing a rollback.
 The [UMCTransaction](https://github.com/mongodb/mongo/blob/92cc84b0171942375ccbd2312a052bc7e9f159dd/src/mongo/db/commands/user_management_commands.cpp#L756)
 class provides an abstraction around this mechanism.
 
+#### Multitenancy
+
+When acting in multitenancy mode, each tenant uses distinct storage for their users and roles.
+For example, given a `TenantId` of `"012345678ABCDEF01234567"`, all users for that tenant will
+be found in the `012345678ABCDEF01234567_admin.system.users` collection, and all roles will be
+found in the `012345678ABCDEF01234567_admin.system.roles` collection.
+
 ### Command Execution
 
 When a client attempts to execute a command, the service entry point calls
diff --git a/src/mongo/db/auth/SConscript b/src/mongo/db/auth/SConscript
index 3df9d6922f2..0f4dcbc61bb 100644
--- a/src/mongo/db/auth/SConscript
+++ b/src/mongo/db/auth/SConscript
@@ -7,8 +7,9 @@ env = env.Clone()
 env.Library(
     target='security_token',
     source=[
-        'security_token.cpp',
+        'security_token_authentication_guard.cpp',
         'security_token.idl',
+        'validated_tenancy_scope.cpp',
     ],
     LIBDEPS=[
         '$BUILD_DIR/mongo/base',
@@ -540,6 +541,7 @@ env.CppUnitTest(
         'sasl_scram_test.cpp',
         'security_key_test.cpp',
         'user_document_parser_test.cpp',
+        'validated_tenancy_scope_test.cpp',
     ],
     LIBDEPS=[
         '$BUILD_DIR/mongo/base',
diff --git a/src/mongo/db/auth/auth_name.h b/src/mongo/db/auth/auth_name.h
index 62fd1d6fa8e..6c5b052e5c7 100644
--- a/src/mongo/db/auth/auth_name.h
+++ b/src/mongo/db/auth/auth_name.h
@@ -39,6 +39,7 @@
 #include "mongo/base/string_data.h"
 #include "mongo/bson/bsonelement.h"
 #include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/database_name.h"
 #include "mongo/db/tenant_id.h"
 #include "mongo/stdx/variant.h"
 
@@ -71,6 +72,10 @@ public:
         _tenant = std::move(tenant);
     }
 
+    template <typename Name>
+    AuthName(Name name, const DatabaseName& dbname)
+        : AuthName(name, dbname.db(), dbname.tenantId()) {}
+
     /**
      * Parses a string of the form "db.name" into an AuthName object with an optional tenant.
      */
@@ -105,6 +110,10 @@ public:
         return _db;
     }
 
+    DatabaseName getDatabaseName() const {
+        return DatabaseName(_tenant, _db);
+    }
+
     /**
      * Gets the TenantId, if any, associated with this AuthName.
      */
diff --git a/src/mongo/db/auth/authorization_manager.h b/src/mongo/db/auth/authorization_manager.h
index c8072025264..fca637f77d3 100644
--- a/src/mongo/db/auth/authorization_manager.h
+++ b/src/mongo/db/auth/authorization_manager.h
@@ -298,7 +298,7 @@ public:
      * Delegates method call to the underlying AuthzManagerExternalState.
      */
     virtual Status getRoleDescriptionsForDB(OperationContext* opCtx,
-                                            StringData dbname,
+                                            const DatabaseName& dbname,
                                             PrivilegeFormat privilegeFormat,
                                             AuthenticationRestrictionsFormat,
                                             bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authorization_manager_impl.cpp b/src/mongo/db/auth/authorization_manager_impl.cpp
index 5a010ea5f9b..4ecfb5af2df 100644
--- a/src/mongo/db/auth/authorization_manager_impl.cpp
+++ b/src/mongo/db/auth/authorization_manager_impl.cpp
@@ -476,7 +476,7 @@ Status AuthorizationManagerImpl::getRolesAsUserFragment(
 
 Status AuthorizationManagerImpl::getRoleDescriptionsForDB(
     OperationContext* opCtx,
-    StringData dbname,
+    const DatabaseName& dbname,
     PrivilegeFormat privileges,
     AuthenticationRestrictionsFormat restrictions,
     bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authorization_manager_impl.h b/src/mongo/db/auth/authorization_manager_impl.h
index 58a8e3e5c51..81f247b2b1d 100644
--- a/src/mongo/db/auth/authorization_manager_impl.h
+++ b/src/mongo/db/auth/authorization_manager_impl.h
@@ -91,7 +91,7 @@ public:
                                   BSONObj* result) override;
 
     Status getRoleDescriptionsForDB(OperationContext* opCtx,
-                                    StringData dbname,
+                                    const DatabaseName& dbname,
                                     PrivilegeFormat privilegeFormat,
                                     AuthenticationRestrictionsFormat,
                                     bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authorization_session_impl.cpp b/src/mongo/db/auth/authorization_session_impl.cpp
index 173a19cfd58..5ebc878a07c 100644
--- a/src/mongo/db/auth/authorization_session_impl.cpp
+++ b/src/mongo/db/auth/authorization_session_impl.cpp
@@ -43,7 +43,7 @@
 #include "mongo/db/auth/action_type.h"
 #include "mongo/db/auth/authz_session_external_state.h"
 #include "mongo/db/auth/privilege.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/bson/dotted_path_support.h"
 #include "mongo/db/client.h"
 #include "mongo/db/namespace_string.h"
@@ -245,14 +245,15 @@ Status AuthorizationSessionImpl::addAndAuthorizeUser(OperationContext* opCtx,
 
     stdx::lock_guard<Client> lk(*opCtx->getClient());
 
-    if (auto token = auth::getSecurityToken(opCtx)) {
+    auto validatedTenancyScope = auth::ValidatedTenancyScope::get(opCtx);
+    if (validatedTenancyScope && validatedTenancyScope->hasAuthenticatedUser()) {
         uassert(
             6161501,
             "Attempt to authorize via security token on connection with established authentication",
             _authenticationMode != AuthenticationMode::kConnection);
         uassert(6161502,
                 "Attempt to authorize a user other than that present in the security token",
-                token->getAuthenticatedUser() == userName);
+                validatedTenancyScope->authenticatedUser() == userName);
         validateSecurityTokenUserPrivileges(user->getPrivileges());
         _authenticationMode = AuthenticationMode::kSecurityToken;
     } else {
diff --git a/src/mongo/db/auth/authz_manager_external_state.h b/src/mongo/db/auth/authz_manager_external_state.h
index b693d3e0622..96cf71257e0 100644
--- a/src/mongo/db/auth/authz_manager_external_state.h
+++ b/src/mongo/db/auth/authz_manager_external_state.h
@@ -42,6 +42,7 @@
 #include "mongo/db/auth/role_name.h"
 #include "mongo/db/auth/user.h"
 #include "mongo/db/auth/user_name.h"
+#include "mongo/db/database_name.h"
 #include "mongo/db/jsobj.h"
 
 namespace mongo {
@@ -161,7 +162,7 @@ public:
      * contain a "warnings" array, with std::string messages describing inconsistencies.
      */
     virtual Status getRoleDescriptionsForDB(OperationContext* opCtx,
-                                            StringData dbname,
+                                            const DatabaseName& dbname,
                                             PrivilegeFormat showPrivileges,
                                             AuthenticationRestrictionsFormat,
                                             bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authz_manager_external_state_d.cpp b/src/mongo/db/auth/authz_manager_external_state_d.cpp
index b602cc1b963..788526abc82 100644
--- a/src/mongo/db/auth/authz_manager_external_state_d.cpp
+++ b/src/mongo/db/auth/authz_manager_external_state_d.cpp
@@ -68,7 +68,7 @@ Status AuthzManagerExternalStateMongod::query(
         FindCommandRequest findRequest{collectionName};
         findRequest.setFilter(filter);
         findRequest.setProjection(projection);
-        client.find(std::move(findRequest), ReadPreferenceSetting{}, resultProcessor);
+        client.find(std::move(findRequest), resultProcessor);
         return Status::OK();
     } catch (const DBException& e) {
         return e.toStatus();
diff --git a/src/mongo/db/auth/authz_manager_external_state_local.cpp b/src/mongo/db/auth/authz_manager_external_state_local.cpp
index 2eb9ba9c47d..0228897aad8 100644
--- a/src/mongo/db/auth/authz_manager_external_state_local.cpp
+++ b/src/mongo/db/auth/authz_manager_external_state_local.cpp
@@ -631,7 +631,7 @@ Status AuthzManagerExternalStateLocal::getRolesDescription(
 
 Status AuthzManagerExternalStateLocal::getRoleDescriptionsForDB(
     OperationContext* opCtx,
-    StringData dbname,
+    const DatabaseName& dbname,
     PrivilegeFormat showPrivileges,
     AuthenticationRestrictionsFormat showRestrictions,
     bool showBuiltinRoles,
@@ -682,7 +682,7 @@ Status AuthzManagerExternalStateLocal::getRoleDescriptionsForDB(
 
     return query(opCtx,
                  getRolesCollection(getActiveTenant(opCtx)),
-                 BSON(AuthorizationManager::ROLE_DB_FIELD_NAME << dbname),
+                 BSON(AuthorizationManager::ROLE_DB_FIELD_NAME << dbname.db()),
                  BSONObj(),
                  [&](const BSONObj& roleDoc) {
                      try {
diff --git a/src/mongo/db/auth/authz_manager_external_state_local.h b/src/mongo/db/auth/authz_manager_external_state_local.h
index 2d6ae65b235..7c5e690b9d6 100644
--- a/src/mongo/db/auth/authz_manager_external_state_local.h
+++ b/src/mongo/db/auth/authz_manager_external_state_local.h
@@ -74,7 +74,7 @@ public:
                                   AuthenticationRestrictionsFormat,
                                   BSONObj* result) override;
     Status getRoleDescriptionsForDB(OperationContext* opCtx,
-                                    StringData dbname,
+                                    const DatabaseName& dbname,
                                     PrivilegeFormat showPrivileges,
                                     AuthenticationRestrictionsFormat,
                                     bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authz_manager_external_state_s.h b/src/mongo/db/auth/authz_manager_external_state_s.h
index 58547be92b2..a1ac0feee41 100644
--- a/src/mongo/db/auth/authz_manager_external_state_s.h
+++ b/src/mongo/db/auth/authz_manager_external_state_s.h
@@ -79,7 +79,7 @@ public:
         return {ErrorCodes::NotImplemented, "AuthzMongos::getRolesAsUserFragment"};
     }
     Status getRoleDescriptionsForDB(OperationContext* opCtx,
-                                    StringData dbname,
+                                    const DatabaseName& dbname,
                                     PrivilegeFormat showPrivileges,
                                     AuthenticationRestrictionsFormat,
                                     bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/builtin_roles.cpp b/src/mongo/db/auth/builtin_roles.cpp
index 2b0c63cb798..a263d1d318c 100644
--- a/src/mongo/db/auth/builtin_roles.cpp
+++ b/src/mongo/db/auth/builtin_roles.cpp
@@ -781,19 +781,19 @@ const std::map<StringData, BuiltinRoleDefinition> kBuiltinRoles({
 // $external is a virtual database used for X509, LDAP,
 // and other authentication mechanisms and not used for storage.
 // Therefore, granting privileges on this database does not make sense.
-bool isValidDB(StringData dbname) {
+bool isValidDB(const DatabaseName& dbname) {
     return NamespaceString::validDBName(dbname, NamespaceString::DollarInDbNameBehavior::Allow) &&
-        (dbname != NamespaceString::kExternalDb);
+        (dbname.db() != NamespaceString::kExternalDb);
 }
 
 }  // namespace
 
-stdx::unordered_set<RoleName> auth::getBuiltinRoleNamesForDB(StringData dbname) {
+stdx::unordered_set<RoleName> auth::getBuiltinRoleNamesForDB(const DatabaseName& dbname) {
     if (!isValidDB(dbname)) {
         return {};
     }
 
-    const bool isAdmin = dbname == ADMIN_DBNAME;
+    const bool isAdmin = dbname.db() == ADMIN_DBNAME;
 
     stdx::unordered_set<RoleName> roleNames;
     for (const auto& [role, def] : kBuiltinRoles) {
@@ -808,7 +808,7 @@ bool auth::addPrivilegesForBuiltinRole(const RoleName& roleName, PrivilegeVector
     auto role = roleName.getRole();
     auto dbname = roleName.getDB();
 
-    if (!isValidDB(dbname)) {
+    if (!isValidDB(roleName.getDatabaseName())) {
         return false;
     }
 
@@ -834,8 +834,7 @@ void auth::generateUniversalPrivileges(PrivilegeVector* privileges) {
 }
 
 bool auth::isBuiltinRole(const RoleName& role) {
-    auto dbname = role.getDB();
-    if (!isValidDB(dbname)) {
+    if (!isValidDB(role.getDatabaseName())) {
         return false;
     }
 
@@ -844,7 +843,7 @@ bool auth::isBuiltinRole(const RoleName& role) {
         return false;
     }
 
-    return !it->second.adminOnly() || (dbname == ADMIN_DBNAME);
+    return !it->second.adminOnly() || (role.getDB() == ADMIN_DBNAME);
 }
 
 }  // namespace mongo
diff --git a/src/mongo/db/auth/builtin_roles.h b/src/mongo/db/auth/builtin_roles.h
index 3665e79b9ba..e20dbdaa86d 100644
--- a/src/mongo/db/auth/builtin_roles.h
+++ b/src/mongo/db/auth/builtin_roles.h
@@ -31,6 +31,7 @@
 
 #include "mongo/db/auth/privilege.h"
 #include "mongo/db/auth/role_name.h"
+#include "mongo/db/database_name.h"
 #include "mongo/stdx/unordered_set.h"
 
 namespace mongo {
@@ -47,7 +48,7 @@ bool addPrivilegesForBuiltinRole(const RoleName& role, PrivilegeVector* privileg
 /**
  * Ennumerate all builtin RoleNames for the given database.
  */
-stdx::unordered_set<RoleName> getBuiltinRoleNamesForDB(StringData dbname);
+stdx::unordered_set<RoleName> getBuiltinRoleNamesForDB(const DatabaseName& dbname);
 
 /**
  * Adds to "privileges" the necessary privileges to do absolutely anything on the system.
diff --git a/src/mongo/db/auth/builtin_roles_test.cpp b/src/mongo/db/auth/builtin_roles_test.cpp
index 15b5aa932fd..a7662650d21 100644
--- a/src/mongo/db/auth/builtin_roles_test.cpp
+++ b/src/mongo/db/auth/builtin_roles_test.cpp
@@ -75,7 +75,7 @@ TEST(BuiltinRoles, BuiltinRolesOnlyOnAppropriateDatabases) {
 }
 
 TEST(BuiltinRoles, getBuiltinRolesForDB) {
-    auto adminRoles = auth::getBuiltinRoleNamesForDB("admin");
+    auto adminRoles = auth::getBuiltinRoleNamesForDB({boost::none, "admin"});
     ASSERT(adminRoles.contains(RoleName("read", "admin")));
     ASSERT(adminRoles.contains(RoleName("readAnyDatabase", "admin")));
     for (const auto& role : adminRoles) {
@@ -83,7 +83,7 @@ TEST(BuiltinRoles, getBuiltinRolesForDB) {
         ASSERT(auth::isBuiltinRole(role));
     }
 
-    auto testRoles = auth::getBuiltinRoleNamesForDB("test");
+    auto testRoles = auth::getBuiltinRoleNamesForDB({boost::none, "test"});
     ASSERT(testRoles.contains(RoleName("read", "test")));
     ASSERT(!testRoles.contains(RoleName("readAnyDatabase", "test")));
     for (const auto& role : testRoles) {
diff --git a/src/mongo/db/auth/security_token_authentication_guard.cpp b/src/mongo/db/auth/security_token_authentication_guard.cpp
new file mode 100644
index 00000000000..5be6de3dc75
--- /dev/null
+++ b/src/mongo/db/auth/security_token_authentication_guard.cpp
@@ -0,0 +1,66 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+
+#include "mongo/db/auth/security_token_authentication_guard.h"
+
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/logv2/log.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kAccessControl
+
+namespace mongo {
+namespace auth {
+
+SecurityTokenAuthenticationGuard::SecurityTokenAuthenticationGuard(
+    OperationContext* opCtx, const ValidatedTenancyScope& token) {
+    if (token.hasAuthenticatedUser()) {
+        const auto& userName = token.authenticatedUser();
+        auto* client = opCtx->getClient();
+        uassertStatusOK(AuthorizationSession::get(client)->addAndAuthorizeUser(opCtx, userName));
+        _client = client;
+
+        LOGV2_DEBUG(5838100,
+                    4,
+                    "Authenticated with security token",
+                    "token"_attr = token.getOriginalToken());
+    } else {
+        _client = nullptr;
+    }
+}
+
+SecurityTokenAuthenticationGuard::~SecurityTokenAuthenticationGuard() {
+    if (_client) {
+        // SecurityToken based users are "logged out" at the end of their request.
+        AuthorizationSession::get(_client)->logoutSecurityTokenUser(_client);
+    }
+}
+
+}  // namespace auth
+}  // namespace mongo
diff --git a/src/mongo/db/auth/security_token.h b/src/mongo/db/auth/security_token_authentication_guard.h
index 2e45e63952f..c73e0324e5f 100644
--- a/src/mongo/db/auth/security_token.h
+++ b/src/mongo/db/auth/security_token_authentication_guard.h
@@ -1,5 +1,5 @@
 /**
- *    Copyright (C) 2021-present MongoDB, Inc.
+ *    Copyright (C) 2022-present MongoDB, Inc.
  *
  *    This program is free software: you can redistribute it and/or modify
  *    it under the terms of the Server Side Public License, version 1,
@@ -29,49 +29,27 @@
 
 #pragma once
 
-#include <boost/optional.hpp>
-
-#include "mongo/bson/bsonobj.h"
-#include "mongo/db/auth/security_token_gen.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/client.h"
 #include "mongo/db/operation_context.h"
 
 namespace mongo {
 namespace auth {
 
+/**
+ * If ValidatedTenancyScope represents an AuthenticatedUser,
+ * that user will be authenticated against the client until this guard dies.
+ * This is used in ServiceEntryPoint to scope authentication to a single operation.
+ */
 class SecurityTokenAuthenticationGuard {
 public:
     SecurityTokenAuthenticationGuard() = delete;
-    SecurityTokenAuthenticationGuard(OperationContext* opCtx);
+    SecurityTokenAuthenticationGuard(OperationContext*, const ValidatedTenancyScope&);
     ~SecurityTokenAuthenticationGuard();
 
 private:
     Client* _client;
 };
 
-/**
- * Takes an unsigned security token as input and applies
- * the temporary signature algorithm to extend it into a full SecurityToken.
- */
-BSONObj signSecurityToken(BSONObj obj);
-
-/**
- * Verify the contents of the provided security token
- * using the temporary signing algorithm,
- */
-SecurityToken verifySecurityToken(BSONObj obj);
-
-/**
- * Parse any SecurityToken from the OpMsg and place it as a decoration
- * on OperationContext
- */
-void readSecurityTokenMetadata(OperationContext* opCtx, BSONObj securityToken);
-
-/**
- * Retrieve the Security Token associated with this operation context
- */
-using MaybeSecurityToken = boost::optional<SecurityToken>;
-MaybeSecurityToken getSecurityToken(OperationContext* opCtx);
-
 }  // namespace auth
 }  // namespace mongo
diff --git a/src/mongo/db/auth/security_token.cpp b/src/mongo/db/auth/validated_tenancy_scope.cpp
index 586abb92aee..2ab66b3abd3 100644
--- a/src/mongo/db/auth/security_token.cpp
+++ b/src/mongo/db/auth/validated_tenancy_scope.cpp
@@ -1,5 +1,5 @@
 /**
- *    Copyright (C) 2021-present MongoDB, Inc.
+ *    Copyright (C) 2022-present MongoDB, Inc.
  *
  *    This program is free software: you can redistribute it and/or modify
  *    it under the terms of the Server Side Public License, version 1,
@@ -27,26 +27,23 @@
  *    it in the license file.
  */
 
-
-#include "mongo/db/auth/security_token.h"
-
-#include <boost/optional.hpp>
+#include "mongo/db/auth/validated_tenancy_scope.h"
 
 #include "mongo/base/init.h"
 #include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/auth/security_token_gen.h"
+#include "mongo/db/multitenancy.h"
 #include "mongo/db/multitenancy_gen.h"
 #include "mongo/db/server_feature_flags_gen.h"
-#include "mongo/db/tenant_id.h"
 #include "mongo/logv2/log.h"
 #include "mongo/logv2/log_detail.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kAccessControl
 
-
-namespace mongo {
-namespace auth {
+namespace mongo::auth {
 namespace {
-const auto securityTokenDecoration = OperationContext::declareDecoration<MaybeSecurityToken>();
+const auto validatedTenancyScopeDecoration =
+    OperationContext::declareDecoration<boost::optional<ValidatedTenancyScope>>();
 MONGO_INITIALIZER(SecurityTokenOptionValidate)(InitializerContext*) {
     uassert(ErrorCodes::BadValue,
             "multitenancySupport may not be specified if featureFlagMongoStore is not enabled",
@@ -54,15 +51,13 @@ MONGO_INITIALIZER(SecurityTokenOptionValidate)(InitializerContext*) {
     if (gMultitenancySupport) {
         logv2::detail::setGetTenantIDCallback([]() -> boost::optional<TenantId> {
             auto* client = Client::getCurrent();
-            if (!client)
+            if (!client) {
                 return boost::none;
+            }
 
             if (auto* opCtx = client->getOperationContext()) {
-                auto token = getSecurityToken(opCtx);
-                if (token) {
-                    return token->getAuthenticatedUser().getTenant();
-                } else {
-                    return boost::none;
+                if (auto token = ValidatedTenancyScope::get(opCtx)) {
+                    return token->tenantId();
                 }
             }
 
@@ -72,45 +67,10 @@ MONGO_INITIALIZER(SecurityTokenOptionValidate)(InitializerContext*) {
 }
 }  // namespace
 
-SecurityTokenAuthenticationGuard::SecurityTokenAuthenticationGuard(OperationContext* opCtx) {
-    auto token = getSecurityToken(opCtx);
-    if (token == boost::none) {
-        _client = nullptr;
-        return;
-    }
-
-    auto client = opCtx->getClient();
-    uassertStatusOK(AuthorizationSession::get(client)->addAndAuthorizeUser(
-        opCtx, token->getAuthenticatedUser()));
-    _client = client;
-}
-
-SecurityTokenAuthenticationGuard::~SecurityTokenAuthenticationGuard() {
-    if (_client) {
-        // SecurityToken based users are "logged out" at the end of their request.
-        AuthorizationSession::get(_client)->logoutSecurityTokenUser(_client);
-    }
-}
-
-BSONObj signSecurityToken(BSONObj obj) {
-    auto authUserElem = obj[SecurityToken::kAuthenticatedUserFieldName];
-    uassert(ErrorCodes::BadValue,
-            "Invalid field(s) in token being signed",
-            (authUserElem.type() == Object) && (obj.nFields() == 1));
-
-    auto authUserObj = authUserElem.Obj();
-    ConstDataRange authUserCDR(authUserObj.objdata(), authUserObj.objsize());
-
-    // Placeholder algorithm.
-    auto sig = SHA256Block::computeHash({authUserCDR});
-
-    BSONObjBuilder signedToken(obj);
-    signedToken.appendBinData(SecurityToken::kSigFieldName, sig.size(), BinDataGeneral, sig.data());
-    return signedToken.obj();
-}
-
-SecurityToken verifySecurityToken(BSONObj obj) {
-    uassert(ErrorCodes::BadValue, "Multitenancy not enabled", gMultitenancySupport);
+ValidatedTenancyScope::ValidatedTenancyScope(BSONObj obj, InitTag tag) : _originalToken(obj) {
+    uassert(ErrorCodes::InvalidOptions,
+            "Multitenancy not enabled, refusing to accept securityToken",
+            gMultitenancySupport || (tag == InitTag::kInitForShell));
 
     auto token = SecurityToken::parse({"Security Token"}, obj);
     auto authenticatedUser = token.getAuthenticatedUser();
@@ -126,23 +86,97 @@ SecurityToken verifySecurityToken(BSONObj obj) {
     auto computed = SHA256Block::computeHash({authUserCDR});
 
     uassert(ErrorCodes::Unauthorized, "Token signature invalid", computed == token.getSig());
-    return token;
+
+    _tenantOrUser = std::move(authenticatedUser);
 }
 
-void readSecurityTokenMetadata(OperationContext* opCtx, BSONObj securityToken) try {
-    if (securityToken.nFields() == 0) {
-        return;
+ValidatedTenancyScope::ValidatedTenancyScope(Client* client, TenantId tenant)
+    : _tenantOrUser(std::move(tenant)) {
+    uassert(ErrorCodes::InvalidOptions,
+            "Multitenancy not enabled, refusing to accept $tenant parameter",
+            gMultitenancySupport);
+
+    uassert(ErrorCodes::Unauthorized,
+            "'$tenant' may only be specified with the useTenant action type",
+            client &&
+                AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+                    ResourcePattern::forClusterResource(), ActionType::useTenant));
+}
+
+boost::optional<ValidatedTenancyScope> ValidatedTenancyScope::create(Client* client,
+                                                                     BSONObj body,
+                                                                     BSONObj securityToken) {
+    if (!gMultitenancySupport) {
+        return boost::none;
+    }
+
+    auto dollarTenantElem = body["$tenant"_sd];
+    const bool hasToken = securityToken.nFields() > 0;
+
+    uassert(6545800,
+            "Cannot pass $tenant id if also passing securityToken",
+            dollarTenantElem.eoo() || !hasToken);
+    uassert(ErrorCodes::OperationFailed,
+            "Cannot process $tenant id when no client is available",
+            dollarTenantElem.eoo() || client);
+
+    // TODO SERVER-66822: Re-enable this uassert.
+    // uassert(ErrorCodes::Unauthorized,
+    //         "Multitenancy is enabled, $tenant id or securityToken is required.",
+    //         dollarTenantElem || opMsg.securityToken.nFields() > 0);
+
+    if (dollarTenantElem) {
+        return ValidatedTenancyScope(client, TenantId::parseFromBSON(dollarTenantElem));
+    } else if (hasToken) {
+        return ValidatedTenancyScope(securityToken);
+    } else {
+        return boost::none;
+    }
+}
+
+bool ValidatedTenancyScope::hasAuthenticatedUser() const {
+    return stdx::holds_alternative<UserName>(_tenantOrUser);
+}
+
+const UserName& ValidatedTenancyScope::authenticatedUser() const {
+    invariant(hasAuthenticatedUser());
+    return stdx::get<UserName>(_tenantOrUser);
+}
+
+const TenantId& ValidatedTenancyScope::tenantId() const {
+    if (hasAuthenticatedUser()) {
+        return stdx::get<UserName>(_tenantOrUser).getTenant().get();
+    } else {
+        invariant(stdx::holds_alternative<TenantId>(_tenantOrUser));
+        return stdx::get<TenantId>(_tenantOrUser);
     }
+}
+
+const boost::optional<ValidatedTenancyScope>& ValidatedTenancyScope::get(OperationContext* opCtx) {
+    return validatedTenancyScopeDecoration(opCtx);
+}
 
-    securityTokenDecoration(opCtx) = verifySecurityToken(securityToken);
-    LOGV2_DEBUG(5838100, 4, "Accepted security token", "token"_attr = securityToken);
-} catch (const DBException& ex) {
-    uassertStatusOK(ex.toStatus().withContext("Unable to parse Security Token from Metadata"));
+void ValidatedTenancyScope::set(OperationContext* opCtx,
+                                boost::optional<ValidatedTenancyScope> token) {
+    validatedTenancyScopeDecoration(opCtx) = std::move(token);
 }
 
-MaybeSecurityToken getSecurityToken(OperationContext* opCtx) {
-    return securityTokenDecoration(opCtx);
+ValidatedTenancyScope::ValidatedTenancyScope(BSONObj obj, TokenForTestingTag) {
+    auto authUserElem = obj[SecurityToken::kAuthenticatedUserFieldName];
+    uassert(ErrorCodes::BadValue,
+            "Invalid field(s) in token being signed",
+            (authUserElem.type() == Object) && (obj.nFields() == 1));
+
+    auto authUserObj = authUserElem.Obj();
+    ConstDataRange authUserCDR(authUserObj.objdata(), authUserObj.objsize());
+
+    // Placeholder algorithm.
+    auto sig = SHA256Block::computeHash({authUserCDR});
+
+    BSONObjBuilder signedToken(obj);
+    signedToken.appendBinData(SecurityToken::kSigFieldName, sig.size(), BinDataGeneral, sig.data());
+    _originalToken = signedToken.obj();
+    _tenantOrUser = UserName::parseFromBSONObj(authUserObj);
 }
 
-}  // namespace auth
-}  // namespace mongo
+}  // namespace mongo::auth
diff --git a/src/mongo/db/auth/validated_tenancy_scope.h b/src/mongo/db/auth/validated_tenancy_scope.h
new file mode 100644
index 00000000000..302b3fdac5a
--- /dev/null
+++ b/src/mongo/db/auth/validated_tenancy_scope.h
@@ -0,0 +1,116 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <boost/optional.hpp>
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/db/auth/user_name.h"
+#include "mongo/db/tenant_id.h"
+#include "mongo/stdx/variant.h"
+
+namespace mongo {
+
+class Client;
+class OperationContext;
+
+namespace auth {
+
+class ValidatedTenancyScope {
+public:
+    ValidatedTenancyScope() = delete;
+    ValidatedTenancyScope(const ValidatedTenancyScope&) = default;
+
+    // kInitForShell allows parsing a securityToken without multitenancy enabled.
+    // This is required in the shell since we do not enable this setting in non-servers.
+    enum class InitTag {
+        kNormal,
+        kInitForShell,
+    };
+
+    /**
+     * Constructs a ValidatedTenancyScope by parsing a SecurityToken from a BSON object
+     * and verifying its cryptographic signature.
+     */
+    explicit ValidatedTenancyScope(BSONObj securityToken, InitTag tag = InitTag::kNormal);
+
+    /**
+     * Constructs a ValidatedTenancyScope for tenant only by validating that the
+     * current client is permitted to specify a tenant via the $tenant field.
+     */
+    ValidatedTenancyScope(Client* client, TenantId tenant);
+
+    /**
+     * Parses the client provided command body and securityToken for tenantId,
+     * and for securityToken respectively, the authenticatedUser as well.
+     *
+     * Returns boost::none when multitenancy support is not enabled.
+     */
+    static boost::optional<ValidatedTenancyScope> create(Client* client,
+                                                         BSONObj body,
+                                                         BSONObj securityToken);
+
+    bool hasAuthenticatedUser() const;
+    const UserName& authenticatedUser() const;
+    const TenantId& tenantId() const;
+
+    BSONObj getOriginalToken() const {
+        return _originalToken;
+    }
+
+    /**
+     * Get/Set a ValidatedTenancyScope as a decoration on the OperationContext
+     */
+    static const boost::optional<ValidatedTenancyScope>& get(OperationContext* opCtx);
+    static void set(OperationContext* opCtx, boost::optional<ValidatedTenancyScope> token);
+
+    /**
+     * Transitional token generator, do not use outside of test code.
+     */
+    struct TokenForTestingTag {};
+    explicit ValidatedTenancyScope(BSONObj token, TokenForTestingTag);
+
+    /**
+     * Backdoor API for use by FLE Query Analysis to setup a validated tenant without a security
+     * context.
+     */
+    struct TrustedFLEQueryAnalysisTag {};
+    explicit ValidatedTenancyScope(TenantId tenant, TrustedFLEQueryAnalysisTag)
+        : _tenantOrUser(std::move(tenant)) {}
+
+private:
+    // Preserve original token for serializing from MongoQ.
+    BSONObj _originalToken;
+
+    stdx::variant<UserName, TenantId> _tenantOrUser;
+};
+
+}  // namespace auth
+}  // namespace mongo
diff --git a/src/mongo/db/auth/validated_tenancy_scope_test.cpp b/src/mongo/db/auth/validated_tenancy_scope_test.cpp
new file mode 100644
index 00000000000..f1942f757a6
--- /dev/null
+++ b/src/mongo/db/auth/validated_tenancy_scope_test.cpp
@@ -0,0 +1,177 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/auth/authorization_manager_impl.h"
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/auth/authorization_session_impl.h"
+#include "mongo/db/auth/authz_manager_external_state_mock.h"
+#include "mongo/db/auth/security_token_gen.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
+#include "mongo/db/multitenancy_gen.h"
+#include "mongo/db/service_context_test_fixture.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+
+class AuthorizationSessionImplTestHelper {
+public:
+    /**
+     * Synthesize a user with the useTenant privilege and add them to the authorization session.
+     */
+    static void grantUseTenant(Client& client) {
+        User user(UserName("useTenant", "admin"));
+        user.setPrivileges(
+            {Privilege(ResourcePattern::forClusterResource(), ActionType::useTenant)});
+        auto* as = dynamic_cast<AuthorizationSessionImpl*>(AuthorizationSession::get(client));
+        if (as->_authenticatedUser != boost::none) {
+            as->logoutAllDatabases(&client, "AuthorizationSessionImplTestHelper"_sd);
+        }
+        as->_authenticatedUser = std::move(user);
+        as->_authenticationMode = AuthorizationSession::AuthenticationMode::kConnection;
+        as->_updateInternalAuthorizationState();
+    }
+};
+
+namespace auth {
+namespace {
+
+class ValidatedTenancyScopeTestFixture : public mongo::ScopedGlobalServiceContextForTest,
+                                         public unittest::Test {
+protected:
+    void setUp() final {
+        auto authzManagerState = std::make_unique<AuthzManagerExternalStateMock>();
+        auto authzManager = std::make_unique<AuthorizationManagerImpl>(
+            getServiceContext(), std::move(authzManagerState));
+        authzManager->setAuthEnabled(true);
+        AuthorizationManager::set(getServiceContext(), std::move(authzManager));
+
+        client = getServiceContext()->makeClient("test");
+    }
+
+    BSONObj makeSecurityToken(const UserName& userName) {
+        constexpr auto authUserFieldName = auth::SecurityToken::kAuthenticatedUserFieldName;
+        auto authUser = userName.toBSON(true /* serialize token */);
+        ASSERT_EQ(authUser["tenant"_sd].type(), jstOID);
+        using VTS = auth::ValidatedTenancyScope;
+        return VTS(BSON(authUserFieldName << authUser), VTS::TokenForTestingTag{})
+            .getOriginalToken();
+    }
+
+    ServiceContext::UniqueClient client;
+};
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportOffWithoutTenantOK) {
+    gMultitenancySupport = false;
+    auto body = BSON("$db"
+                     << "foo");
+
+    auto validated = ValidatedTenancyScope::create(client.get(), body, {});
+    ASSERT_TRUE(validated == boost::none);
+}
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithTenantOK) {
+    gMultitenancySupport = true;
+
+    auto kOid = OID::gen();
+    auto body = BSON("ping" << 1 << "$tenant" << kOid);
+
+    AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+    auto validated = ValidatedTenancyScope::create(client.get(), body, {});
+    ASSERT_TRUE(validated != boost::none);
+    ASSERT_TRUE(validated->tenantId() == TenantId(kOid));
+}
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithSecurityTokenOK) {
+    gMultitenancySupport = true;
+
+    const TenantId kTenantId(OID::gen());
+    auto body = BSON("ping" << 1);
+    UserName user("user", "admin", kTenantId);
+    auto token = makeSecurityToken(user);
+
+    auto validated = ValidatedTenancyScope::create(client.get(), body, token);
+    ASSERT_TRUE(validated != boost::none);
+    ASSERT_TRUE(validated->tenantId() == kTenantId);
+    ASSERT_TRUE(validated->hasAuthenticatedUser());
+    ASSERT_TRUE(validated->authenticatedUser() == user);
+}
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportOffWithTenantNOK) {
+    gMultitenancySupport = false;
+
+    auto kOid = OID::gen();
+    auto body = BSON("ping" << 1 << "$tenant" << kOid);
+
+    AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+    ASSERT_THROWS_CODE(ValidatedTenancyScope(client.get(), TenantId(kOid)),
+                       DBException,
+                       ErrorCodes::InvalidOptions);
+    ASSERT_TRUE(ValidatedTenancyScope::create(client.get(), body, {}) == boost::none);
+}
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithTenantNOK) {
+    gMultitenancySupport = true;
+
+    auto kOid = OID::gen();
+    auto body = BSON("ping" << 1 << "$tenant" << kOid);
+
+    ASSERT_THROWS_CODE(
+        ValidatedTenancyScope(client.get(), TenantId(kOid)), DBException, ErrorCodes::Unauthorized);
+    ASSERT_THROWS_CODE(ValidatedTenancyScope::create(client.get(), body, {}),
+                       DBException,
+                       ErrorCodes::Unauthorized);
+}
+
+// TODO SERVER-66822: Re-enable this test case.
+// TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithoutTenantAndSecurityTokenNOK) {
+//     gMultitenancySupport = true;
+//     auto body = BSON("ping" << 1);
+//     AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+//     ASSERT_THROWS_CODE(ValidatedTenancyScope::create(client.get(), body, {}), DBException,
+//     ErrorCodes::Unauthorized);
+// }
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithTenantAndSecurityTokenNOK) {
+    gMultitenancySupport = true;
+
+    auto kOid = OID::gen();
+    auto body = BSON("ping" << 1 << "$tenant" << kOid);
+    UserName user("user", "admin", TenantId(kOid));
+    auto token = makeSecurityToken(user);
+
+    AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+    ASSERT_THROWS_CODE(
+        ValidatedTenancyScope::create(client.get(), body, token), DBException, 6545800);
+}
+
+}  // namespace
+}  // namespace auth
+}  // namespace mongo
diff --git a/src/mongo/db/catalog/README.md b/src/mongo/db/catalog/README.md
index 71844f54e2d..db4bda42bdd 100644
--- a/src/mongo/db/catalog/README.md
+++ b/src/mongo/db/catalog/README.md
@@ -25,7 +25,10 @@ directory.
 
 For more information on the Storage Engine API, see the [storage/README][].
 
+For more information on time-series collections, see the [timeseries/README][].
+
 [storage/README]: https://github.com/mongodb/mongo/blob/master/src/mongo/db/storage/README.md
+[timeseries/README]: https://github.com/mongodb/mongo/blob/master/src/mongo/db/timeseries/README.md
 
 # The Catalog
 
@@ -756,11 +759,18 @@ Manual](https://docs.mongodb.com/master/core/index-creation/#index-builds-in-rep
 
 ### Commit Quorum
 
-A primary will not commit an index build until a minimum number of data-bearing nodes have completed
-the index build and are ready to commit. This threshold is called the _commit quorum_.
+The purpose of `commitQuorm` is to ensure secondaries are ready to commit an index build quickly.
+This minimizes replication lag on secondaries: secondaries, on receipt of a `commitIndexBuild` oplog
+entry, will stall oplog application until the local index build can be committed. `commitQuorum`
+delays commit of an index build on the primary node until secondaries are also ready to commit. A
+primary will not commit an index build until a minimum number of data-bearing nodes are ready to
+commit the index build. Index builds can take anywhere from moments to days to complete, so the
+replication lag can be very significant. Note: `commitQuorum` makes no guarantee that indexes on
+secondaries are ready for use when the command completes, `writeConcern` must still be used for
+that.
 
 A `commitQuorum` option can be provided to the `createIndexes` command and specifies the number of
-nodes, including itself, a primary must wait to be ready before committing. The `commitQuorum`
+nodes, including itself, for which a primary must wait to be ready before committing. The `commitQuorum`
 option accepts the same range of values as the writeConcern `"w"` option. This can be an integer
 specifying the number of nodes, `"majority"`, `"votingMembers"`, or a replica set tag. The default value
 is `"votingMembers"`, or all voting data-bearing nodes.
@@ -778,6 +788,10 @@ the index build is successful, it will replicate a `commitIndexBuild` oplog entr
 Secondaries that were not included in the commit quorum and recieve a `commitIndexBuild` oplog entry
 will block replication until their index build is complete.
 
+The `commitQuorum` for a running index build may be changed by the user via the
+[`setIndexCommitQuorum`](https://github.com/mongodb/mongo/blob/v6.0/src/mongo/db/commands/set_index_commit_quorum_command.cpp#L55)
+server command.
+
 See
 [IndexBuildsCoordinator::_waitForNextIndexBuildActionAndCommit](https://github.com/mongodb/mongo/blob/r4.4.0-rc9/src/mongo/db/index_builds_coordinator_mongod.cpp#L632).
 
@@ -954,8 +968,6 @@ _Code spelunking starting points:_
 * [_The TTLCollectionCache Class_](https://github.com/mongodb/mongo/blob/d88a892d5b18035bd0f5393a42690e705c2007d7/src/mongo/db/ttl_collection_cache.h)
 * [_ttl.idl_](https://github.com/mongodb/mongo/blob/d88a892d5b18035bd0f5393a42690e705c2007d7/src/mongo/db/ttl.idl)
 
-TODO SERVER-66898: Refresh links
-
 # Repair
 
 Data corruption has a variety of causes, but can usually be attributed to misconfigured or
@@ -1722,6 +1734,10 @@ The TTL monitor will only delete data from a time-series bucket collection when
 time, _id, is past the expiration plus the bucket maximum time span (default 1 hour). This
 procedure avoids deleting buckets with data that is not older than the expiration time.
 
+For more information on time-series collections, see the [timeseries/README][].
+
+[timeseries/README]: https://github.com/mongodb/mongo/blob/master/src/mongo/db/timeseries/README.md
+
 ## Capped clustered collections
 
 Capped clustered collections are available internally. Unlike regular capped collections, clustered
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index f36fee396d6..2cae97495a4 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -537,6 +537,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/query/query_plan_cache',
         '$BUILD_DIR/mongo/db/query/query_planner',
         '$BUILD_DIR/mongo/db/update_index_data',
+        'collection',
     ],
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/base',
@@ -670,6 +671,7 @@ if wiredtiger:
             'collection',
             'collection_catalog',
             'collection_options',
+            'collection_query_info',
             'collection_validation',
             'commit_quorum_options',
             'database_holder',
diff --git a/src/mongo/db/catalog/capped_utils.cpp b/src/mongo/db/catalog/capped_utils.cpp
index ba307ee0124..0810970806f 100644
--- a/src/mongo/db/catalog/capped_utils.cpp
+++ b/src/mongo/db/catalog/capped_utils.cpp
@@ -97,7 +97,7 @@ Status emptyCapped(OperationContext* opCtx, const NamespaceString& collectionNam
 
     WriteUnitOfWork wuow(opCtx);
 
-    auto writableCollection = collection.getWritableCollection();
+    auto writableCollection = collection.getWritableCollection(opCtx);
     Status status = writableCollection->truncate(opCtx);
     if (!status.isOK()) {
         return status;
diff --git a/src/mongo/db/catalog/coll_mod.cpp b/src/mongo/db/catalog/coll_mod.cpp
index cfad9d81079..d7c5ff1ee92 100644
--- a/src/mongo/db/catalog/coll_mod.cpp
+++ b/src/mongo/db/catalog/coll_mod.cpp
@@ -177,13 +177,11 @@ StatusWith<std::pair<ParsedCollModRequest, BSONObj>> parseCollModRequest(Operati
         }
 
         if (const auto& cappedSize = cmr.getCappedSize()) {
-            static constexpr long long minCappedSize = 4096;
             auto swCappedSize = CollectionOptions::checkAndAdjustCappedSize(*cappedSize);
             if (!swCappedSize.isOK()) {
                 return swCappedSize.getStatus();
             }
-            parsed.cappedSize =
-                (swCappedSize.getValue() < minCappedSize) ? minCappedSize : swCappedSize.getValue();
+            parsed.cappedSize = swCappedSize.getValue();
             oplogEntryBuilder.append(CollMod::kCappedSizeFieldName, *cappedSize);
         }
         if (const auto& cappedMax = cmr.getCappedMax()) {
diff --git a/src/mongo/db/catalog/collection_catalog.cpp b/src/mongo/db/catalog/collection_catalog.cpp
index cd6cb68f8a5..4c02fb5a1a5 100644
--- a/src/mongo/db/catalog/collection_catalog.cpp
+++ b/src/mongo/db/catalog/collection_catalog.cpp
@@ -475,8 +475,11 @@ Status CollectionCatalog::createView(OperationContext* opCtx,
                                      const BSONArray& pipeline,
                                      const BSONObj& collation,
                                      const ViewsForDatabase::PipelineValidatorFn& pipelineValidator,
-                                     const bool updateDurableViewCatalog) const {
-    invariant(opCtx->lockState()->isCollectionLockedForMode(viewName, MODE_IX));
+                                     const ViewUpsertMode insertViewMode) const {
+    // A view document direct write can occur via the oplog application path, which may only hold a
+    // lock on the collection being updated (the database views collection).
+    invariant(insertViewMode == ViewUpsertMode::kAlreadyDurableView ||
+              opCtx->lockState()->isCollectionLockedForMode(viewName, MODE_IX));
     invariant(opCtx->lockState()->isCollectionLockedForMode(
         NamespaceString(viewName.db(), NamespaceString::kSystemDotViewsCollectionName), MODE_X));
 
@@ -514,7 +517,7 @@ Status CollectionCatalog::createView(OperationContext* opCtx,
                                      pipelineValidator,
                                      std::move(collator.getValue()),
                                      ViewsForDatabase{viewsForDb},
-                                     ViewUpsertMode::kCreateView);
+                                     insertViewMode);
     }
 
     return result;
@@ -1404,8 +1407,11 @@ Status CollectionCatalog::_createOrUpdateView(
     const ViewsForDatabase::PipelineValidatorFn& pipelineValidator,
     std::unique_ptr<CollatorInterface> collator,
     ViewsForDatabase&& viewsForDb,
-    ViewUpsertMode mode) const {
-    invariant(opCtx->lockState()->isCollectionLockedForMode(viewName, MODE_IX));
+    ViewUpsertMode insertViewMode) const {
+    // A view document direct write can occur via the oplog application path, which may only hold a
+    // lock on the collection being updated (the database views collection).
+    invariant(insertViewMode == ViewUpsertMode::kAlreadyDurableView ||
+              opCtx->lockState()->isCollectionLockedForMode(viewName, MODE_IX));
     invariant(opCtx->lockState()->isCollectionLockedForMode(
         NamespaceString(viewName.db(), NamespaceString::kSystemDotViewsCollectionName), MODE_X));
 
@@ -1429,20 +1435,20 @@ Status CollectionCatalog::_createOrUpdateView(
     // If the view is already in the durable view catalog, we don't need to validate the graph. If
     // we need to update the durable view catalog, we need to check that the resulting dependency
     // graph is acyclic and within the maximum depth.
-    const bool viewGraphNeedsValidation = mode != ViewUpsertMode::kAlreadyDurableView;
+    const bool viewGraphNeedsValidation = insertViewMode != ViewUpsertMode::kAlreadyDurableView;
     Status graphStatus =
         viewsForDb.upsertIntoGraph(opCtx, view, pipelineValidator, viewGraphNeedsValidation);
     if (!graphStatus.isOK()) {
         return graphStatus;
     }
 
-    if (mode != ViewUpsertMode::kAlreadyDurableView) {
+    if (insertViewMode != ViewUpsertMode::kAlreadyDurableView) {
         viewsForDb.durable->upsert(opCtx, viewName, viewDef);
     }
 
     viewsForDb.valid = false;
     auto res = [&] {
-        switch (mode) {
+        switch (insertViewMode) {
             case ViewUpsertMode::kCreateView:
             case ViewUpsertMode::kAlreadyDurableView:
                 return viewsForDb.insert(opCtx, viewDef);
diff --git a/src/mongo/db/catalog/collection_catalog.h b/src/mongo/db/catalog/collection_catalog.h
index 3bdbd87540d..dbf3db5956a 100644
--- a/src/mongo/db/catalog/collection_catalog.h
+++ b/src/mongo/db/catalog/collection_catalog.h
@@ -115,6 +115,19 @@ public:
         }
     };
 
+    enum class ViewUpsertMode {
+        // Insert all data for that view into the view map, view graph, and durable view catalog.
+        kCreateView,
+
+        // Insert into the view map and view graph without reinserting the view into the durable
+        // view catalog. Skip view graph validation.
+        kAlreadyDurableView,
+
+        // Reload the view map, insert into the view graph (flagging it as needing refresh), and
+        // update the durable view catalog.
+        kUpdateView,
+    };
+
     static std::shared_ptr<const CollectionCatalog> get(ServiceContext* svcCtx);
     static std::shared_ptr<const CollectionCatalog> get(OperationContext* opCtx);
 
@@ -147,7 +160,8 @@ public:
      *
      * Must be in WriteUnitOfWork. View creation rolls back if the unit of work aborts.
      *
-     * Caller must ensure corresponding database exists.
+     * Caller must ensure corresponding database exists. Expects db.system.views MODE_X lock and
+     * view namespace MODE_IX lock (unless 'insertViewMode' is set to kAlreadyDurableView).
      */
     Status createView(OperationContext* opCtx,
                       const NamespaceString& viewName,
@@ -155,7 +169,7 @@ public:
                       const BSONArray& pipeline,
                       const BSONObj& collation,
                       const ViewsForDatabase::PipelineValidatorFn& pipelineValidator,
-                      bool updateDurableViewCatalog = true) const;
+                      ViewUpsertMode insertViewMode = ViewUpsertMode::kCreateView) const;
 
     /**
      * Drop the view named 'viewName'.
@@ -542,19 +556,6 @@ private:
      */
     void _replaceViewsForDatabase(const DatabaseName& dbName, ViewsForDatabase&& views);
 
-    enum class ViewUpsertMode {
-        // Insert all data for that view into the view map, view graph, and durable view catalog.
-        kCreateView,
-
-        // Insert into the view map and view graph without reinserting the view into the durable
-        // view catalog. Skip view graph validation.
-        kAlreadyDurableView,
-
-        // Reload the view map, insert into the view graph (flagging it as needing refresh), and
-        // update the durable view catalog.
-        kUpdateView,
-    };
-
     /**
      * Helper to take care of shared functionality for 'createView(...)' and 'modifyView(...)'.
      */
@@ -565,7 +566,7 @@ private:
                                const ViewsForDatabase::PipelineValidatorFn& pipelineValidator,
                                std::unique_ptr<CollatorInterface> collator,
                                ViewsForDatabase&& viewsForDb,
-                               ViewUpsertMode mode) const;
+                               ViewUpsertMode insertViewMode) const;
 
     /**
      * Returns true if this CollectionCatalog instance is part of an ongoing batched catalog write.
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index aaf5e7607f2..16560953def 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -37,7 +37,7 @@
 #include "mongo/bson/ordering.h"
 #include "mongo/bson/simple_bsonelement_comparator.h"
 #include "mongo/bson/simple_bsonobj_comparator.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/crypto/fle_crypto.h"
 #include "mongo/db/catalog/collection_catalog.h"
 #include "mongo/db/catalog/collection_options.h"
 #include "mongo/db/catalog/document_validation.h"
@@ -818,7 +818,6 @@ Status CollectionImpl::insertDocumentsForOplog(OperationContext* opCtx,
     return status;
 }
 
-
 Status CollectionImpl::insertDocuments(OperationContext* opCtx,
                                        const std::vector<InsertStatement>::const_iterator begin,
                                        const std::vector<InsertStatement>::const_iterator end,
@@ -842,8 +841,20 @@ Status CollectionImpl::insertDocuments(OperationContext* opCtx,
         }
 
         auto status = _checkValidationAndParseResult(opCtx, it->doc);
-        if (!status.isOK())
+        if (!status.isOK()) {
             return status;
+        }
+
+        auto& validationSettings = DocumentValidationSettings::get(opCtx);
+
+        if (getCollectionOptions().encryptedFieldConfig &&
+            !validationSettings.isSchemaValidationDisabled() &&
+            !validationSettings.isSafeContentValidationDisabled() &&
+            it->doc.hasField(kSafeContent)) {
+            return Status(ErrorCodes::BadValue,
+                          str::stream()
+                              << "Cannot insert a document with field name " << kSafeContent);
+        }
     }
 
     const SnapshotId sid = opCtx->recoveryUnit()->getSnapshotId();
@@ -1347,6 +1358,17 @@ void CollectionImpl::deleteDocument(OperationContext* opCtx,
     }
 }
 
+bool compareSafeContentElem(const BSONObj& oldDoc, const BSONObj& newDoc) {
+    if (newDoc.hasField(kSafeContent) != oldDoc.hasField(kSafeContent)) {
+        return false;
+    }
+    if (!newDoc.hasField(kSafeContent)) {
+        return true;
+    }
+
+    return newDoc.getField(kSafeContent).binaryEqual(oldDoc.getField(kSafeContent));
+}
+
 RecordId CollectionImpl::updateDocument(OperationContext* opCtx,
                                         RecordId oldLocation,
                                         const Snapshotted<BSONObj>& oldDoc,
@@ -1371,6 +1393,17 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx,
         }
     }
 
+    auto& validationSettings = DocumentValidationSettings::get(opCtx);
+    if (getCollectionOptions().encryptedFieldConfig &&
+        !validationSettings.isSchemaValidationDisabled() &&
+        !validationSettings.isSafeContentValidationDisabled()) {
+
+        uassert(ErrorCodes::BadValue,
+                str::stream() << "New document and old document both need to have " << kSafeContent
+                              << " field.",
+                compareSafeContentElem(oldDoc.value(), newDoc));
+    }
+
     dassert(opCtx->lockState()->isCollectionLockedForMode(ns(), MODE_IX));
     invariant(oldDoc.snapshotId() == opCtx->recoveryUnit()->getSnapshotId());
     invariant(newDoc.isOwned());
@@ -2166,8 +2199,9 @@ Status CollectionImpl::prepareForIndexBuild(OperationContext* opCtx,
               str::stream() << "index " << imd.nameStringData()
                             << " is already in current metadata: " << _metadata->toBSON());
 
-    if (getTimeseriesOptions() && feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() &&
-        serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest() &&
+    if (getTimeseriesOptions() &&
+        feature_flags::gTimeseriesMetricIndexes.isEnabled(
+            serverGlobalParams.featureCompatibility) &&
         timeseries::doesBucketsIndexIncludeMeasurement(
             opCtx, ns(), *getTimeseriesOptions(), spec->infoObj())) {
         invariant(_metadata->timeseriesBucketsMayHaveMixedSchemaData);
diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h
index 7d5bca73064..e2cc5175989 100644
--- a/src/mongo/db/catalog/collection_impl.h
+++ b/src/mongo/db/catalog/collection_impl.h
@@ -381,7 +381,7 @@ public:
         uint64_t n = numRecords(opCtx);
 
         if (n == 0)
-            return 5;
+            return 0;
         return static_cast<int>(dataSize(opCtx) / n);
     }
 
diff --git a/src/mongo/db/catalog/collection_writer_test.cpp b/src/mongo/db/catalog/collection_writer_test.cpp
index 1f828f38800..a5aad697d59 100644
--- a/src/mongo/db/catalog/collection_writer_test.cpp
+++ b/src/mongo/db/catalog/collection_writer_test.cpp
@@ -101,7 +101,7 @@ TEST_F(CollectionWriterTest, Commit) {
     {
         AutoGetCollection lock(operationContext(), kNss, MODE_X);
         WriteUnitOfWork wuow(operationContext());
-        auto writable = writer.getWritableCollection();
+        auto writable = writer.getWritableCollection(operationContext());
 
         // get() and getWritableCollection() should return the same instance
         ASSERT_EQ(writer.get().get(), writable);
@@ -128,7 +128,7 @@ TEST_F(CollectionWriterTest, Commit) {
     {
         AutoGetCollection lock(operationContext(), kNss, MODE_X);
         WriteUnitOfWork wuow(operationContext());
-        auto writable = writer.getWritableCollection();
+        auto writable = writer.getWritableCollection(operationContext());
 
         ASSERT_EQ(writer.get().get(), writable);
         ASSERT_EQ(writable, lookupCollectionFromCatalog().get());
@@ -153,7 +153,7 @@ TEST_F(CollectionWriterTest, Rollback) {
     {
         AutoGetCollection lock(operationContext(), kNss, MODE_X);
         WriteUnitOfWork wuow(operationContext());
-        auto writable = writer.getWritableCollection();
+        auto writable = writer.getWritableCollection(operationContext());
 
         ASSERT_EQ(writer.get().get(), writable);
         ASSERT_EQ(writable, lookupCollectionFromCatalog().get());
@@ -179,7 +179,7 @@ TEST_F(CollectionWriterTest, CommitAfterDestroy) {
             CollectionWriter writer(operationContext(), kNss);
 
             // Request a writable Collection and destroy CollectionWriter before WUOW commits
-            writable = writer.getWritableCollection();
+            writable = writer.getWritableCollection(operationContext());
         }
 
         wuow.commit();
diff --git a/src/mongo/db/catalog/commit_quorum.idl b/src/mongo/db/catalog/commit_quorum.idl
index 2a67a22fdef..26edd42e9fe 100644
--- a/src/mongo/db/catalog/commit_quorum.idl
+++ b/src/mongo/db/catalog/commit_quorum.idl
@@ -40,8 +40,8 @@ types:
             - decimal
             - double
             - string
-        description: "CommitQuorumOptions defines the required quorum for the index builds to 
-                      commit."
+        description: "CommitQuorumOptions defines the replica set membership required to be ready
+                      for commit in order for the primary to proceed to commit an index build."
         cpp_type: "mongo::CommitQuorumOptions"
         serializer: "mongo::CommitQuorumOptions::appendToBuilder"
         deserializer: "mongo::CommitQuorumOptions::deserializerForIDL"
diff --git a/src/mongo/db/catalog/commit_quorum_options.h b/src/mongo/db/catalog/commit_quorum_options.h
index 351281131a0..a910e0a0831 100644
--- a/src/mongo/db/catalog/commit_quorum_options.h
+++ b/src/mongo/db/catalog/commit_quorum_options.h
@@ -38,12 +38,15 @@ namespace mongo {
 class Status;
 
 /**
+ * 'CommitQuorumOptions' is used to determine when a primary should commit an index build. When the
+ * specified 'quorum' of replica set members is reached, then the primary proceeds to commit the
+ * index. commitQuorum ensures secondaries are ready to commit the index as quickly as possible:
+ * secondary replication will stall on receipt of a commitIndexBuild oplog entry until the
+ * secondary's index build is complete and ready to be committed.
+ *
  * The 'CommitQuorumOptions' has the same range of settings as the 'w' field from
  * 'WriteConcernOptions'. It can be set to an integer starting from 0 and up, or to a string. The
  * string option can be 'majority', 'votingMembers' or a replica set tag.
- *
- * The principal idea behind 'CommitQuorumOptions' is to figure out when an index build should be
- * committed on the replica set based on the number of commit ready members.
  */
 class CommitQuorumOptions {
 public:
@@ -86,10 +89,15 @@ public:
         return (numNodes == rhs.numNodes && mode == rhs.mode) ? true : false;
     }
 
-    // Returns the BSON representation of this object.
+    /**
+     * Returns the BSON representation of this object.
+     * E.g. {commitQuorum: "majority"}
+     */
     BSONObj toBSON() const;
 
-    // Appends the BSON representation of this object.
+    /**
+     * Appends the commitQuorum value (mode or numNodes) with the given field name "fieldName".
+     */
     void appendToBuilder(StringData fieldName, BSONObjBuilder* builder) const;
 
     // The 'commitQuorum' parameter to define the required quorum for the index builds to commit.
diff --git a/src/mongo/db/catalog/database_holder.h b/src/mongo/db/catalog/database_holder.h
index 3a7918acda8..ffb305b2c4c 100644
--- a/src/mongo/db/catalog/database_holder.h
+++ b/src/mongo/db/catalog/database_holder.h
@@ -36,14 +36,12 @@
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/collection_options.h"
 #include "mongo/db/database_name.h"
+#include "mongo/s/database_version.h"
 
 namespace mongo {
 
-class CollectionCatalogEntry;
 class Database;
 class OperationContext;
-class RecordStore;
-class ViewCatalog;
 
 /**
  * Registry of opened databases.
@@ -56,9 +54,8 @@ public:
     static DatabaseHolder* get(OperationContext* opCtx);
     static void set(ServiceContext* service, std::unique_ptr<DatabaseHolder> databaseHolder);
 
-    virtual ~DatabaseHolder() = default;
-
     DatabaseHolder() = default;
+    virtual ~DatabaseHolder() = default;
 
     /**
      * Retrieves an already opened database or returns nullptr. Must be called with the database
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index cd743dcac46..e8c9bf6c571 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -59,6 +59,7 @@
 #include "mongo/db/introspect.h"
 #include "mongo/db/op_observer.h"
 #include "mongo/db/query/collation/collator_factory_interface.h"
+#include "mongo/db/query/query_knobs_gen.h"
 #include "mongo/db/repl/drop_pending_collection_reaper.h"
 #include "mongo/db/repl/oplog.h"
 #include "mongo/db/repl/replication_coordinator.h"
@@ -93,6 +94,17 @@ MONGO_FAIL_POINT_DEFINE(hangAndFailAfterCreateCollectionReservesOpTime);
 MONGO_FAIL_POINT_DEFINE(openCreateCollectionWindowFp);
 MONGO_FAIL_POINT_DEFINE(allowSystemViewsDrop);
 
+// When active, a column index will be created for all new collections. This is used for the column
+// index JS test passthrough suite. Other passthroughs work by overriding javascript methods on the
+// client side, but this approach often requires the drop() function to create the collection. This
+// behavior is confusing, and requires a large number of tests to be re-written to accommodate this
+// passthrough behavior. In case you're wondering, this failpoint approach would not work as well
+// for the sharded collections task, since mongos and the config servers are generally unaware of
+// when a collection is created. There isn't a great server-side hook we can use to auto-shard a
+// collection, and it is more complex technically to drive this process from one shard in the
+// cluster. For column store indexes, we just need to change local state on each mongod.
+MONGO_FAIL_POINT_DEFINE(createColumnIndexOnAllCollections);
+
 Status validateDBNameForWindows(StringData dbname) {
     const std::vector<std::string> windowsReservedNames = {
         "con",  "prn",  "aux",  "nul",  "com1", "com2", "com3", "com4", "com5", "com6", "com7",
@@ -132,6 +144,12 @@ void assertMovePrimaryInProgress(OperationContext* opCtx, NamespaceString const&
     }
 }
 
+static const BSONObj kColumnStoreSpec = BSON("name"
+                                             << "$**_columnstore"
+                                             << "key"
+                                             << BSON("$**"
+                                                     << "columnstore")
+                                             << "v" << 2);
 }  // namespace
 
 Status DatabaseImpl::validateDBName(StringData dbname) {
@@ -176,7 +194,7 @@ Status DatabaseImpl::init(OperationContext* const opCtx) {
         // If this is called from the repair path, the collection is already initialized.
         if (!collection->isInitialized()) {
             WriteUnitOfWork wuow(opCtx);
-            collection.getWritableCollection()->init(opCtx);
+            collection.getWritableCollection(opCtx)->init(opCtx);
             wuow.commit();
         }
     }
@@ -450,6 +468,16 @@ Status DatabaseImpl::dropCollection(OperationContext* opCtx,
 
     invariant(nss.db() == _name.db());
 
+    // Returns true if the supplied namespace 'nss' is a system collection that can be dropped,
+    // false otherwise.
+    auto isDroppableSystemCollection = [](const auto& nss) {
+        return nss.isHealthlog() || nss == NamespaceString::kLogicalSessionsNamespace ||
+            nss == NamespaceString::kKeysCollectionNamespace ||
+            nss.isTemporaryReshardingCollection() || nss.isTimeseriesBucketsCollection() ||
+            nss.isChangeStreamPreImagesCollection() ||
+            nss == NamespaceString::kConfigsvrRestoreNamespace || nss.isChangeCollection();
+    };
+
     if (nss.isSystem()) {
         if (nss.isSystemDotProfile()) {
             if (catalog->getDatabaseProfileLevel(_name) != 0)
@@ -463,11 +491,7 @@ Status DatabaseImpl::dropCollection(OperationContext* opCtx,
                                       << " when time-series collections are present.",
                         viewStats && viewStats->userTimeseries == 0);
             }
-        } else if (!(nss.isHealthlog() || nss == NamespaceString::kLogicalSessionsNamespace ||
-                     nss == NamespaceString::kKeysCollectionNamespace ||
-                     nss.isTemporaryReshardingCollection() || nss.isTimeseriesBucketsCollection() ||
-                     nss.isChangeStreamPreImagesCollection() ||
-                     nss == NamespaceString::kConfigsvrRestoreNamespace)) {
+        } else if (!isDroppableSystemCollection(nss)) {
             return Status(ErrorCodes::IllegalOperation,
                           str::stream() << "can't drop system collection " << nss);
         }
@@ -520,14 +544,14 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx,
     auto opObserver = serviceContext->getOpObserver();
     auto isOplogDisabledForNamespace = replCoord->isOplogDisabledFor(opCtx, nss);
     if (dropOpTime.isNull() && isOplogDisabledForNamespace) {
-        _dropCollectionIndexes(opCtx, nss, collection.getWritableCollection());
+        _dropCollectionIndexes(opCtx, nss, collection.getWritableCollection(opCtx));
         opObserver->onDropCollection(opCtx,
                                      nss,
                                      uuid,
                                      numRecords,
                                      OpObserver::CollectionDropType::kOnePhase,
                                      markFromMigrate);
-        return _finishDropCollection(opCtx, nss, collection.getWritableCollection());
+        return _finishDropCollection(opCtx, nss, collection.getWritableCollection(opCtx));
     }
 
     // Replicated collections should be dropped in two phases.
@@ -536,7 +560,7 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx,
     // storage engine and will no longer be visible at the catalog layer with 3.6-style
     // <db>.system.drop.* namespaces.
     if (serviceContext->getStorageEngine()->supportsPendingDrops()) {
-        _dropCollectionIndexes(opCtx, nss, collection.getWritableCollection());
+        _dropCollectionIndexes(opCtx, nss, collection.getWritableCollection(opCtx));
 
         auto commitTimestamp = opCtx->recoveryUnit()->getCommitTimestamp();
         LOGV2(20314,
@@ -572,7 +596,7 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx,
                       str::stream() << "OpTime is not null. OpTime: " << opTime.toString());
         }
 
-        return _finishDropCollection(opCtx, nss, collection.getWritableCollection());
+        return _finishDropCollection(opCtx, nss, collection.getWritableCollection(opCtx));
     }
 
     // Old two-phase drop: Replicated collections will be renamed with a special drop-pending
@@ -706,7 +730,7 @@ Status DatabaseImpl::renameCollection(OperationContext* opCtx,
     // Set the namespace of 'collToRename' from within the CollectionCatalog. This is necessary
     // because the CollectionCatalog manages the necessary isolation for this Collection until the
     // WUOW commits.
-    auto writableCollection = collToRename.getWritableCollection();
+    auto writableCollection = collToRename.getWritableCollection(opCtx);
     Status status = writableCollection->rename(opCtx, toNss, stayTemp);
     if (!status.isOK())
         return status;
@@ -884,25 +908,32 @@ Collection* DatabaseImpl::createCollection(OperationContext* opCtx,
 
     BSONObj fullIdIndexSpec;
 
-    if (createIdIndex) {
-        if (collection->requiresIdIndex()) {
-            if (optionsWithUUID.autoIndexId == CollectionOptions::YES ||
-                optionsWithUUID.autoIndexId == CollectionOptions::DEFAULT) {
-                IndexCatalog* ic = collection->getIndexCatalog();
-                fullIdIndexSpec = uassertStatusOK(ic->createIndexOnEmptyCollection(
-                    opCtx,
-                    collection,
-                    !idIndex.isEmpty() ? idIndex : ic->getDefaultIdIndexSpec(collection)));
-            } else {
-                // autoIndexId: false is only allowed on unreplicated collections.
-                uassert(50001,
-                        str::stream() << "autoIndexId:false is not allowed for collection " << nss
-                                      << " because it can be replicated",
-                        !nss.isReplicated());
-            }
+    bool createColumnIndex = false;
+    if (createIdIndex && collection->requiresIdIndex()) {
+        if (optionsWithUUID.autoIndexId == CollectionOptions::YES ||
+            optionsWithUUID.autoIndexId == CollectionOptions::DEFAULT) {
+            auto* ic = collection->getIndexCatalog();
+            fullIdIndexSpec = uassertStatusOK(ic->createIndexOnEmptyCollection(
+                opCtx,
+                collection,
+                !idIndex.isEmpty() ? idIndex : ic->getDefaultIdIndexSpec(collection)));
+            createColumnIndex = createColumnIndexOnAllCollections.shouldFail();
+        } else {
+            // autoIndexId: false is only allowed on unreplicated collections.
+            uassert(50001,
+                    str::stream() << "autoIndexId:false is not allowed for collection " << nss
+                                  << " because it can be replicated",
+                    !nss.isReplicated());
         }
     }
 
+    if (MONGO_unlikely(createColumnIndex)) {
+        invariant(!internalQueryForceClassicEngine.load(),
+                  "Column Store Indexes failpoint in use without enabling SBE engine");
+        uassertStatusOK(collection->getIndexCatalog()->createIndexOnEmptyCollection(
+            opCtx, collection, kColumnStoreSpec));
+    }
+
     hangBeforeLoggingCreateCollection.pauseWhileSet();
 
     opCtx->getServiceContext()->getOpObserver()->onCreateCollection(
diff --git a/src/mongo/db/catalog/database_test.cpp b/src/mongo/db/catalog/database_test.cpp
index e9da3e319ee..8346d0f1a29 100644
--- a/src/mongo/db/catalog/database_test.cpp
+++ b/src/mongo/db/catalog/database_test.cpp
@@ -355,7 +355,7 @@ TEST_F(DatabaseTest, MakeUniqueCollectionNamespaceReplacesPercentSignsWithRandom
         auto nss1 = unittest::assertGet(db->makeUniqueCollectionNamespace(_opCtx.get(), model));
         if (!re.FullMatch(nss1.ns())) {
             FAIL((StringBuilder() << "First generated namespace \"" << nss1.ns()
-                                  << "\" does not match reqular expression \"" << re.pattern()
+                                  << "\" does not match regular expression \"" << re.pattern()
                                   << "\"")
                      .str());
         }
@@ -372,7 +372,7 @@ TEST_F(DatabaseTest, MakeUniqueCollectionNamespaceReplacesPercentSignsWithRandom
         auto nss2 = unittest::assertGet(db->makeUniqueCollectionNamespace(_opCtx.get(), model));
         if (!re.FullMatch(nss2.ns())) {
             FAIL((StringBuilder() << "Second generated namespace \"" << nss2.ns()
-                                  << "\" does not match reqular expression \"" << re.pattern()
+                                  << "\" does not match regular expression \"" << re.pattern()
                                   << "\"")
                      .str());
         }
diff --git a/src/mongo/db/catalog/document_validation.h b/src/mongo/db/catalog/document_validation.h
index 875f255c565..47db304d79d 100644
--- a/src/mongo/db/catalog/document_validation.h
+++ b/src/mongo/db/catalog/document_validation.h
@@ -52,7 +52,7 @@ class DocumentValidationSettings {
 public:
     enum flag : std::uint8_t {
         /*
-         * Enables document validation (both schema and internal).
+         * Enables document validation (schema, internal, and safeContent).
          */
         kEnableValidation = 0x00,
         /*
@@ -67,6 +67,12 @@ public:
          * doesn't comply with internal validation rules.
          */
         kDisableInternalValidation = 0x02,
+        /*
+         * If set, modifications to the safeContent array are allowed. This flag is only
+         * enabled when bypass document validation is enabled or if crudProcessed is true
+         * in the query.
+         */
+        kDisableSafeContentValidation = 0x04,
     };
 
     using Flags = std::uint8_t;
@@ -92,6 +98,10 @@ public:
         return _flags & kDisableInternalValidation;
     }
 
+    bool isSafeContentValidationDisabled() const {
+        return _flags & kDisableSafeContentValidation;
+    }
+
     bool isDocumentValidationEnabled() const {
         return _flags == kEnableValidation;
     }
@@ -134,11 +144,29 @@ class DisableDocumentSchemaValidationIfTrue {
 public:
     DisableDocumentSchemaValidationIfTrue(OperationContext* opCtx,
                                           bool shouldDisableSchemaValidation) {
-        if (shouldDisableSchemaValidation)
-            _documentSchemaValidationDisabler.emplace(opCtx);
+        if (shouldDisableSchemaValidation) {
+            _documentSchemaValidationDisabler.emplace(
+                opCtx, DocumentValidationSettings::kDisableSchemaValidation);
+        }
+    }
+
+private:
+    boost::optional<DisableDocumentValidation> _documentSchemaValidationDisabler;
+};
+
+class DisableSafeContentValidationIfTrue {
+public:
+    DisableSafeContentValidationIfTrue(OperationContext* opCtx,
+                                       bool shouldDisableSchemaValidation,
+                                       bool encryptionInformationCrudProcessed) {
+        if (shouldDisableSchemaValidation || encryptionInformationCrudProcessed) {
+            _documentSchemaValidationDisabler.emplace(
+                opCtx, DocumentValidationSettings::kDisableSafeContentValidation);
+        }
     }
 
 private:
     boost::optional<DisableDocumentValidation> _documentSchemaValidationDisabler;
 };
+
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp
index c38af203930..553f4dcbf0e 100644
--- a/src/mongo/db/catalog/index_builds_manager.cpp
+++ b/src/mongo/db/catalog/index_builds_manager.cpp
@@ -322,7 +322,7 @@ Status IndexBuildsManager::commitIndexBuild(OperationContext* opCtx,
         [this, builder, buildUUID, opCtx, &collection, nss, &onCreateEachFn, &onCommitFn] {
             WriteUnitOfWork wunit(opCtx);
             auto status = builder->commit(
-                opCtx, collection.getWritableCollection(), onCreateEachFn, onCommitFn);
+                opCtx, collection.getWritableCollection(opCtx), onCreateEachFn, onCommitFn);
             if (!status.isOK()) {
                 return status;
             }
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index 74049b34427..0e3679f3f02 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -118,8 +118,8 @@ Status isSpecOKClusteredIndexCheck(const BSONObj& indexSpec,
     auto key = indexSpec.getObjectField("key");
     bool keysMatch = clustered_util::matchesClusterKey(key, collInfo);
 
-    bool clusteredOptionPresent =
-        indexSpec.hasField("clustered") && indexSpec["clustered"].trueValue();
+    bool clusteredOptionPresent = indexSpec.hasField(IndexDescriptor::kClusteredFieldName) &&
+        indexSpec[IndexDescriptor::kClusteredFieldName].trueValue();
 
     if (clusteredOptionPresent && !keysMatch) {
         // The 'clustered' option implies the indexSpec must match the clustered index.
@@ -907,8 +907,11 @@ Status IndexCatalogImpl::_isSpecOk(OperationContext* opCtx,
                 str::stream() << pluginName
                               << " indexes are under development and cannot be used without "
                                  "enabling the feature flag",
-                feature_flags::gFeatureFlagColumnstoreIndexes.isEnabled(
-                    serverGlobalParams.featureCompatibility));
+                // With our testing failpoint we may try to run this code before we've initialized
+                // the FCV.
+                !serverGlobalParams.featureCompatibility.isVersionInitialized() ||
+                    feature_flags::gFeatureFlagColumnstoreIndexes.isEnabled(
+                        serverGlobalParams.featureCompatibility));
         if (auto columnSpecStatus = validateColumnStoreSpec(collection, spec, indexVersion);
             !columnSpecStatus.isOK()) {
             return columnSpecStatus;
diff --git a/src/mongo/db/catalog/index_key_validate.cpp b/src/mongo/db/catalog/index_key_validate.cpp
index 1abc7b14e03..199b1f5a13d 100644
--- a/src/mongo/db/catalog/index_key_validate.cpp
+++ b/src/mongo/db/catalog/index_key_validate.cpp
@@ -85,7 +85,7 @@ static const std::set<StringData> allowedClusteredIndexFieldNames = {
     ClusteredIndexSpec::kVFieldName,
     ClusteredIndexSpec::kKeyFieldName,
     // This is for indexSpec creation only.
-    "clustered",
+    IndexDescriptor::kClusteredFieldName,
 };
 
 /**
@@ -268,7 +268,8 @@ BSONObj repairIndexSpec(const NamespaceString& ns,
              IndexDescriptor::kUniqueFieldName == fieldName ||
              IndexDescriptor::kSparseFieldName == fieldName ||
              IndexDescriptor::kDropDuplicatesFieldName == fieldName ||
-             IndexDescriptor::kPrepareUniqueFieldName == fieldName || "clustered" == fieldName) &&
+             IndexDescriptor::kPrepareUniqueFieldName == fieldName ||
+             IndexDescriptor::kClusteredFieldName == fieldName) &&
             !indexSpecElem.isNumber() && !indexSpecElem.isBoolean() && indexSpecElem.trueValue()) {
             LOGV2_WARNING(6444400,
                           "Fixing boolean field from index spec",
@@ -293,7 +294,7 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in
     bool hasOriginalSpecField = false;
     bool unique = false;
     bool prepareUnique = false;
-    auto clusteredField = indexSpec["clustered"];
+    auto clusteredField = indexSpec[IndexDescriptor::kClusteredFieldName];
     bool apiStrict = opCtx && APIParameters::get(opCtx).getAPIStrict().value_or(false);
 
     auto fieldNamesValidStatus = validateIndexSpecFieldNames(indexSpec);
@@ -500,11 +501,9 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in
         } else if ((IndexDescriptor::kBackgroundFieldName == indexSpecElemFieldName ||
                     IndexDescriptor::kUniqueFieldName == indexSpecElemFieldName ||
                     IndexDescriptor::kSparseFieldName == indexSpecElemFieldName ||
-                    IndexDescriptor::k2dsphereCoarsestIndexedLevel == indexSpecElemFieldName ||
-                    IndexDescriptor::k2dsphereFinestIndexedLevel == indexSpecElemFieldName ||
                     IndexDescriptor::kDropDuplicatesFieldName == indexSpecElemFieldName ||
                     IndexDescriptor::kPrepareUniqueFieldName == indexSpecElemFieldName ||
-                    "clustered" == indexSpecElemFieldName)) {
+                    IndexDescriptor::kClusteredFieldName == indexSpecElemFieldName)) {
             if (!indexSpecElem.isNumber() && !indexSpecElem.isBoolean()) {
                 return {ErrorCodes::TypeMismatch,
                         str::stream()
@@ -528,7 +527,9 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in
                     IndexDescriptor::kTextVersionFieldName == indexSpecElemFieldName ||
                     IndexDescriptor::k2dIndexBitsFieldName == indexSpecElemFieldName ||
                     IndexDescriptor::k2dIndexMinFieldName == indexSpecElemFieldName ||
-                    IndexDescriptor::k2dIndexMaxFieldName == indexSpecElemFieldName) &&
+                    IndexDescriptor::k2dIndexMaxFieldName == indexSpecElemFieldName ||
+                    IndexDescriptor::k2dsphereCoarsestIndexedLevel == indexSpecElemFieldName ||
+                    IndexDescriptor::k2dsphereFinestIndexedLevel == indexSpecElemFieldName) &&
                    !indexSpecElem.isNumber()) {
             return {ErrorCodes::TypeMismatch,
                     str::stream() << "The field '" << indexSpecElemFieldName
@@ -629,7 +630,7 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in
 }
 
 Status validateIdIndexSpec(const BSONObj& indexSpec) {
-    bool isClusteredIndexSpec = indexSpec.hasField("clustered");
+    bool isClusteredIndexSpec = indexSpec.hasField(IndexDescriptor::kClusteredFieldName);
 
     if (!isClusteredIndexSpec) {
         // Field names for a 'clustered' index spec have already been validated through
@@ -691,7 +692,7 @@ Status validateIndexSpecFieldNames(const BSONObj& indexSpec) {
         return Status::OK();
     }
 
-    if (indexSpec.hasField("clustered")) {
+    if (indexSpec.hasField(IndexDescriptor::kClusteredFieldName)) {
         return validateClusteredSpecFieldNames(indexSpec);
     }
 
diff --git a/src/mongo/db/catalog/index_key_validate_test.cpp b/src/mongo/db/catalog/index_key_validate_test.cpp
index adfd2e25a5a..e5b9e7b6316 100644
--- a/src/mongo/db/catalog/index_key_validate_test.cpp
+++ b/src/mongo/db/catalog/index_key_validate_test.cpp
@@ -353,5 +353,42 @@ TEST(IndexKeyValidateTest, RepairIndexSpecs) {
                             "true, force: true}"))));
 }
 
+TEST(IndexKeyValidateTest, GeoIndexSpecs) {
+    ASSERT_OK(index_key_validate::validateIndexSpec(
+        nullptr,
+        fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':17,'"
+                 "coarsestIndexedLevel':5}")));
+
+    ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+        nullptr,
+        fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':'string','"
+                 "coarsestIndexedLevel':'string'}")));
+
+    ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+        nullptr,
+        fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':17,'"
+                 "coarsestIndexedLevel':'string'}")));
+
+    ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+        nullptr,
+        fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':'string','"
+                 "coarsestIndexedLevel':5}")));
+
+    ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+        nullptr,
+        fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':true,'"
+                 "coarsestIndexedLevel':true}")));
+
+    ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+        nullptr,
+        fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':17,'"
+                 "coarsestIndexedLevel':true}")));
+
+    ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+        nullptr,
+        fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':true,'"
+                 "coarsestIndexedLevel':5}")));
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 95e1c8d7f2a..8a66be4f602 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -137,7 +137,7 @@ void MultiIndexBlock::abortIndexBuild(OperationContext* opCtx,
             // This cleans up all index builds. Because that may need to write, it is done inside of
             // a WUOW. Nothing inside this block can fail, and it is made fatal if it does.
             for (size_t i = 0; i < _indexes.size(); i++) {
-                _indexes[i].block->fail(opCtx, collection.getWritableCollection());
+                _indexes[i].block->fail(opCtx, collection.getWritableCollection(opCtx));
             }
 
             onCleanUp();
@@ -301,16 +301,18 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
                         stateInfoIt != resumeInfoIndexes.end());
 
                 stateInfo = *stateInfoIt;
-                status = index.block->initForResume(
-                    opCtx, collection.getWritableCollection(), *stateInfo, resumeInfo->getPhase());
+                status = index.block->initForResume(opCtx,
+                                                    collection.getWritableCollection(opCtx),
+                                                    *stateInfo,
+                                                    resumeInfo->getPhase());
             } else {
-                status = index.block->init(opCtx, collection.getWritableCollection());
+                status = index.block->init(opCtx, collection.getWritableCollection(opCtx));
             }
             if (!status.isOK())
                 return status;
 
             auto indexCatalogEntry =
-                index.block->getEntry(opCtx, collection.getWritableCollection());
+                index.block->getEntry(opCtx, collection.getWritableCollection(opCtx));
             index.real = indexCatalogEntry->accessMethod();
             status = index.real->initializeAsEmpty(opCtx);
             if (!status.isOK())
diff --git a/src/mongo/db/catalog/multi_index_block_test.cpp b/src/mongo/db/catalog/multi_index_block_test.cpp
index 33874c6e92a..1315975f5c0 100644
--- a/src/mongo/db/catalog/multi_index_block_test.cpp
+++ b/src/mongo/db/catalog/multi_index_block_test.cpp
@@ -100,7 +100,7 @@ TEST_F(MultiIndexBlockTest, CommitWithoutInsertingDocuments) {
     {
         WriteUnitOfWork wunit(operationContext());
         ASSERT_OK(indexer->commit(operationContext(),
-                                  coll.getWritableCollection(),
+                                  coll.getWritableCollection(operationContext()),
                                   MultiIndexBlock::kNoopOnCreateEachFn,
                                   MultiIndexBlock::kNoopOnCommitFn));
         wunit.commit();
@@ -130,7 +130,7 @@ TEST_F(MultiIndexBlockTest, CommitAfterInsertingSingleDocument) {
     {
         WriteUnitOfWork wunit(operationContext());
         ASSERT_OK(indexer->commit(operationContext(),
-                                  coll.getWritableCollection(),
+                                  coll.getWritableCollection(operationContext()),
                                   MultiIndexBlock::kNoopOnCreateEachFn,
                                   MultiIndexBlock::kNoopOnCommitFn));
         wunit.commit();
diff --git a/src/mongo/db/catalog/throttle_cursor_test.cpp b/src/mongo/db/catalog/throttle_cursor_test.cpp
index 8de3f08fbeb..02999c2a739 100644
--- a/src/mongo/db/catalog/throttle_cursor_test.cpp
+++ b/src/mongo/db/catalog/throttle_cursor_test.cpp
@@ -67,7 +67,6 @@ public:
     void setMaxMbPerSec(int maxMbPerSec);
 
     Date_t getTime();
-    int64_t getDifferenceInMillis(Date_t start, Date_t end);
     SortedDataInterfaceThrottleCursor getIdIndex(const CollectionPtr& coll);
 
     std::unique_ptr<DataThrottle> _dataThrottle;
@@ -114,10 +113,6 @@ Date_t ThrottleCursorTest::getTime() {
     return operationContext()->getServiceContext()->getFastClockSource()->now();
 }
 
-int64_t ThrottleCursorTest::getDifferenceInMillis(Date_t start, Date_t end) {
-    return end.toMillisSinceEpoch() - start.toMillisSinceEpoch();
-}
-
 SortedDataInterfaceThrottleCursor ThrottleCursorTest::getIdIndex(const CollectionPtr& coll) {
     const IndexDescriptor* idDesc = coll->getIndexCatalog()->findIdIndex(operationContext());
     const IndexCatalogEntry* idEntry = coll->getIndexCatalog()->getEntry(idDesc);
@@ -156,7 +151,7 @@ TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOff) {
     Date_t end = getTime();
 
     ASSERT_EQ(numRecords, 20);
-    ASSERT_EQ(getDifferenceInMillis(start, end), kTickDelay * numRecords + kTickDelay);
+    ASSERT_EQ(end - start, Milliseconds(kTickDelay * numRecords + kTickDelay));
 }
 
 TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOn) {
@@ -187,7 +182,7 @@ TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOn) {
         Date_t end = getTime();
 
         ASSERT_EQ(numRecords, 10);
-        ASSERT_TRUE(getDifferenceInMillis(start, end) >= 5000);
+        ASSERT_GTE(end - start, Milliseconds(5000));
     }
 
     // Using a throttle with a limit of 5MB per second, all operations should take at least 1
@@ -207,7 +202,7 @@ TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOn) {
         Date_t end = getTime();
 
         ASSERT_EQ(numRecords, 10);
-        ASSERT_TRUE(getDifferenceInMillis(start, end) >= 1000);
+        ASSERT_GTE(end - start, Milliseconds(1000));
     }
 }
 
@@ -239,7 +234,7 @@ TEST_F(ThrottleCursorTestFastClock, TestSeekableRecordThrottleCursorOnLargeDocs1
     Date_t end = getTime();
 
     ASSERT_EQ(scanRecords, 0);
-    ASSERT_GTE(getDifferenceInMillis(start, end), 10 * 1000);
+    ASSERT_GTE(end - start, Milliseconds(10 * 1000));
 }
 
 TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOnLargeDocs5MBps) {
@@ -270,7 +265,7 @@ TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOnLargeDocs5MBps) {
     Date_t end = getTime();
 
     ASSERT_EQ(scanRecords, 0);
-    ASSERT_GTE(getDifferenceInMillis(start, end), 2000);
+    ASSERT_GTE(end - start, Milliseconds(2000));
 }
 
 TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOff) {
@@ -297,7 +292,7 @@ TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOff) {
     Date_t end = getTime();
 
     ASSERT_EQ(numRecords, 10);
-    ASSERT_EQ(getDifferenceInMillis(start, end), kTickDelay * numRecords + kTickDelay);
+    ASSERT_EQ(end - start, Milliseconds(kTickDelay * numRecords + kTickDelay));
 }
 
 TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOn) {
@@ -327,7 +322,7 @@ TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOn) {
         Date_t end = getTime();
 
         ASSERT_EQ(numRecords, 10);
-        ASSERT_TRUE(getDifferenceInMillis(start, end) >= 5000);
+        ASSERT_GTE(end - start, Milliseconds(5000));
     }
 
     // Using a throttle with a limit of 5MB per second, all operations should take at least 1
@@ -347,7 +342,7 @@ TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOn) {
         Date_t end = getTime();
 
         ASSERT_EQ(numRecords, 10);
-        ASSERT_TRUE(getDifferenceInMillis(start, end) >= 1000);
+        ASSERT_GTE(end - start, Milliseconds(1000));
     }
 }
 
@@ -390,7 +385,7 @@ TEST_F(ThrottleCursorTest, TestMixedCursorsWithSharedThrottleOff) {
     Date_t end = getTime();
 
     ASSERT_EQ(numRecords, 30);
-    ASSERT_EQ(getDifferenceInMillis(start, end), kTickDelay * numRecords + kTickDelay);
+    ASSERT_EQ(end - start, Milliseconds(kTickDelay * numRecords + kTickDelay));
 }
 
 TEST_F(ThrottleCursorTest, TestMixedCursorsWithSharedThrottleOn) {
@@ -425,7 +420,7 @@ TEST_F(ThrottleCursorTest, TestMixedCursorsWithSharedThrottleOn) {
         Date_t end = getTime();
 
         ASSERT_EQ(numRecords, 20);
-        ASSERT_TRUE(getDifferenceInMillis(start, end) >= 5000);
+        ASSERT_GTE(end - start, Milliseconds(5000));
     }
 
     // Using a throttle with a limit of 5MB per second, all operations should take at least 2
@@ -447,7 +442,7 @@ TEST_F(ThrottleCursorTest, TestMixedCursorsWithSharedThrottleOn) {
         Date_t end = getTime();
 
         ASSERT_EQ(numRecords, 20);
-        ASSERT_TRUE(getDifferenceInMillis(start, end) >= 2000);
+        ASSERT_GTE(end - start, Milliseconds(2000));
     }
 }
 
diff --git a/src/mongo/db/catalog_raii.cpp b/src/mongo/db/catalog_raii.cpp
index 4396bf245fb..e376905e45f 100644
--- a/src/mongo/db/catalog_raii.cpp
+++ b/src/mongo/db/catalog_raii.cpp
@@ -144,6 +144,10 @@ void acquireCollectionLocksInResourceIdOrder(
         // ResourceId(RESOURCE_COLLECTION, nss.ns()).
         temp.insert(catalog->resolveNamespaceStringOrUUID(opCtx, nsOrUUID));
         for (const auto& secondaryNssOrUUID : secondaryNssOrUUIDs) {
+            invariant(secondaryNssOrUUID.db() == nsOrUUID.db(),
+                      str::stream()
+                          << "Unable to acquire locks for collections across different databases ("
+                          << secondaryNssOrUUID << " vs " << nsOrUUID << ")");
             temp.insert(catalog->resolveNamespaceStringOrUUID(opCtx, secondaryNssOrUUID));
         }
 
@@ -165,29 +169,12 @@ void acquireCollectionLocksInResourceIdOrder(
 }  // namespace
 
 // TODO SERVER-62918 Pass DatabaseName instead of string for dbName.
-AutoGetDb::AutoGetDb(OperationContext* opCtx,
-                     StringData dbName,
-                     LockMode mode,
-                     Date_t deadline,
-                     const std::set<StringData>& secondaryDbNames)
+AutoGetDb::AutoGetDb(OperationContext* opCtx, StringData dbName, LockMode mode, Date_t deadline)
     : _dbName(dbName), _dbLock(opCtx, dbName, mode, deadline), _db([&] {
           const DatabaseName tenantDbName(boost::none, dbName);
           auto databaseHolder = DatabaseHolder::get(opCtx);
           return databaseHolder->getDb(opCtx, tenantDbName);
       }()) {
-    // Take the secondary dbs' database locks only: no global or RSTL, as they are already acquired
-    // above. Note: no consistent ordering is when acquiring database locks because there are no
-    // occasions where multiple strong locks are acquired to make ordering matter (deadlock
-    // avoidance).
-    for (const auto& secondaryDbName : secondaryDbNames) {
-        // The primary database may be repeated in the secondary databases and the primary database
-        // should not be locked twice.
-        if (secondaryDbName != _dbName) {
-            _secondaryDbLocks.emplace_back(
-                opCtx, secondaryDbName, MODE_IS, deadline, true /*skipGlobalAndRSTLLocks*/);
-        }
-    }
-
     // The 'primary' database must be version checked for sharding.
     auto dss = DatabaseShardingState::get(opCtx, dbName);
     auto dssLock = DatabaseShardingState::DSSLock::lockShared(opCtx, dss);
@@ -219,19 +206,9 @@ AutoGetCollection::AutoGetCollection(
     const std::vector<NamespaceStringOrUUID>& secondaryNssOrUUIDs) {
     invariant(!opCtx->isLockFreeReadsOp());
 
-    // Get a unique list of 'secondary' database names to pass into AutoGetDb below.
-    std::set<StringData> secondaryDbNames;
-    for (auto& secondaryNssOrUUID : secondaryNssOrUUIDs) {
-        secondaryDbNames.emplace(secondaryNssOrUUID.db());
-    }
-
     // Acquire the global/RSTL and all the database locks (may or may not be multiple
     // databases).
-    _autoDb.emplace(opCtx,
-                    !nsOrUUID.dbname().empty() ? nsOrUUID.dbname() : nsOrUUID.nss()->db(),
-                    isSharedLockMode(modeColl) ? MODE_IS : MODE_IX,
-                    deadline,
-                    secondaryDbNames);
+    _autoDb.emplace(opCtx, nsOrUUID.db(), isSharedLockMode(modeColl) ? MODE_IS : MODE_IX, deadline);
 
     // Out of an abundance of caution, force operations to acquire new snapshots after
     // acquiring exclusive collection locks. Operations that hold MODE_X locks make an
@@ -246,7 +223,7 @@ AutoGetCollection::AutoGetCollection(
     // Acquire the collection locks. If there's only one lock, then it can simply be taken. If
     // there are many, however, the locks must be taken in _ascending_ ResourceId order to avoid
     // deadlocks across threads.
-    if (secondaryDbNames.empty()) {
+    if (secondaryNssOrUUIDs.empty()) {
         uassertStatusOK(nsOrUUID.isNssValid());
         _collLocks.emplace_back(opCtx, nsOrUUID, modeColl, deadline);
     } else {
@@ -478,7 +455,6 @@ struct CollectionWriter::SharedImpl {
 
 CollectionWriter::CollectionWriter(OperationContext* opCtx, const UUID& uuid)
     : _collection(&_storedCollection),
-      _opCtx(opCtx),
       _managed(true),
       _sharedImpl(std::make_shared<SharedImpl>(this)) {
 
@@ -490,7 +466,6 @@ CollectionWriter::CollectionWriter(OperationContext* opCtx, const UUID& uuid)
 
 CollectionWriter::CollectionWriter(OperationContext* opCtx, const NamespaceString& nss)
     : _collection(&_storedCollection),
-      _opCtx(opCtx),
       _managed(true),
       _sharedImpl(std::make_shared<SharedImpl>(this)) {
     _storedCollection = CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss);
@@ -502,7 +477,6 @@ CollectionWriter::CollectionWriter(OperationContext* opCtx, const NamespaceStrin
 
 CollectionWriter::CollectionWriter(OperationContext* opCtx, AutoGetCollection& autoCollection)
     : _collection(&autoCollection.getCollection()),
-      _opCtx(opCtx),
       _managed(true),
       _sharedImpl(std::make_shared<SharedImpl>(this)) {
     _sharedImpl->_writableCollectionInitializer = [&autoCollection, opCtx]() {
@@ -523,7 +497,7 @@ CollectionWriter::~CollectionWriter() {
     }
 }
 
-Collection* CollectionWriter::getWritableCollection() {
+Collection* CollectionWriter::getWritableCollection(OperationContext* opCtx) {
     // Acquire writable instance lazily if not already available
     if (!_writableCollection) {
         _writableCollection = _sharedImpl->_writableCollectionInitializer();
@@ -539,7 +513,7 @@ Collection* CollectionWriter::getWritableCollection() {
             // and re-clone the Collection if a new write unit of work is opened. Holds the back
             // pointer to the CollectionWriter explicitly so we can detect if the instance is
             // already destroyed.
-            _opCtx->recoveryUnit()->registerChange(
+            opCtx->recoveryUnit()->registerChange(
                 [shared = _sharedImpl](boost::optional<Timestamp>) {
                     if (shared->_parent)
                         shared->_parent->_writableCollection = nullptr;
@@ -599,4 +573,35 @@ AutoGetOplog::AutoGetOplog(OperationContext* opCtx, OplogAccessMode mode, Date_t
     _oplog = &_oplogInfo->getCollection();
 }
 
+
+AutoGetChangeCollection::AutoGetChangeCollection(OperationContext* opCtx,
+                                                 AutoGetChangeCollection::AccessMode mode,
+                                                 boost::optional<TenantId> tenantId,
+                                                 Date_t deadline) {
+    auto nss = NamespaceString::makeChangeCollectionNSS(tenantId);
+    if (mode == AccessMode::kWrite) {
+        // The global lock must already be held.
+        invariant(opCtx->lockState()->isWriteLocked());
+
+        // TODO SERVER-66715 avoid taking 'AutoGetCollection' and remove
+        // 'AllowLockAcquisitionOnTimestampedUnitOfWork'.
+        AllowLockAcquisitionOnTimestampedUnitOfWork allowLockAcquisition(opCtx->lockState());
+        _coll.emplace(
+            opCtx, nss, LockMode::MODE_IX, AutoGetCollectionViewMode::kViewsForbidden, deadline);
+    }
+}
+
+const Collection* AutoGetChangeCollection::operator->() const {
+    return _coll ? _coll->getCollection().get() : nullptr;
+}
+
+const CollectionPtr& AutoGetChangeCollection::operator*() const {
+    return _coll->getCollection();
+}
+
+AutoGetChangeCollection::operator bool() const {
+    return _coll && _coll->getCollection().get();
+}
+
+
 }  // namespace mongo
diff --git a/src/mongo/db/catalog_raii.h b/src/mongo/db/catalog_raii.h
index 3b66a3a7294..2c48422f8fb 100644
--- a/src/mongo/db/catalog_raii.h
+++ b/src/mongo/db/catalog_raii.h
@@ -56,16 +56,10 @@ class AutoGetDb {
     AutoGetDb& operator=(const AutoGetDb&) = delete;
 
 public:
-    /**
-     * Database locks are also acquired for any 'secondaryDbNames' database names provided. Only
-     * MODE_IS is supported when 'secondaryDbNames' are provided. It is safe to repeat 'dbName' in
-     * 'secondaryDbNames'.
-     */
     AutoGetDb(OperationContext* opCtx,
               StringData dbName,
               LockMode mode,
-              Date_t deadline = Date_t::max(),
-              const std::set<StringData>& secondaryDbNames = {});
+              Date_t deadline = Date_t::max());
 
     AutoGetDb(AutoGetDb&&) = default;
 
@@ -386,7 +380,7 @@ public:
 
     // Returns writable Collection, any previous Collection that has been returned may be
     // invalidated.
-    Collection* getWritableCollection();
+    Collection* getWritableCollection(OperationContext* opCtx);
 
 private:
     // If this class is instantiated with the constructors that take UUID or nss we need somewhere
@@ -397,7 +391,6 @@ private:
     const CollectionPtr* _collection = nullptr;
     CollectionPtr _storedCollection;
     Collection* _writableCollection = nullptr;
-    OperationContext* _opCtx = nullptr;
 
     // Indicates if this instance is managing Collection pointers through commit and rollback.
     bool _managed;
@@ -477,4 +470,31 @@ private:
     const CollectionPtr* _oplog;
 };
 
+/**
+ * A RAII-style class to acquire lock to a particular tenant's change collection.
+ *
+ * A change collection can be accessed in the following modes:
+ *   kWrite - This mode assumes that the global IX lock is already held before writing to the change
+ *            collection.
+ */
+class AutoGetChangeCollection {
+public:
+    enum class AccessMode { kWrite };
+
+    AutoGetChangeCollection(OperationContext* opCtx,
+                            AccessMode mode,
+                            boost::optional<TenantId> tenantId,
+                            Date_t deadline = Date_t::max());
+
+    AutoGetChangeCollection(const AutoGetChangeCollection&) = delete;
+    AutoGetChangeCollection& operator=(const AutoGetChangeCollection&) = delete;
+
+    const Collection* operator->() const;
+    const CollectionPtr& operator*() const;
+    explicit operator bool() const;
+
+private:
+    boost::optional<AutoGetCollection> _coll;
+};
+
 }  // namespace mongo
diff --git a/src/mongo/db/catalog_raii_test.cpp b/src/mongo/db/catalog_raii_test.cpp
index 383a3dddc36..2b86133de66 100644
--- a/src/mongo/db/catalog_raii_test.cpp
+++ b/src/mongo/db/catalog_raii_test.cpp
@@ -27,12 +27,8 @@
  *    it in the license file.
  */
 
+#include <boost/optional/optional_io.hpp>
 
-#include "mongo/platform/basic.h"
-
-#include <string>
-
-#include "boost/optional/optional_io.hpp"
 #include "mongo/db/catalog/database_holder_mock.h"
 #include "mongo/db/catalog_raii.h"
 #include "mongo/db/client.h"
@@ -47,7 +43,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -66,6 +61,7 @@ public:
     const NamespaceString nss = NamespaceString("test", "coll");
     const NamespaceString kSecondaryNss1 = NamespaceString("test", "secondaryColl1");
     const NamespaceString kSecondaryNss2 = NamespaceString("test", "secondaryColl2");
+    const NamespaceString kSecondaryNss3 = NamespaceString("test", "secondaryColl3");
     const NamespaceString kSecondaryNssOtherDb1 = NamespaceString("test2", "secondaryColl1");
     const NamespaceString kSecondaryNssOtherDb2 = NamespaceString("test2", "secondaryColl2");
     const Milliseconds timeoutMs = Seconds(1);
@@ -125,44 +121,6 @@ TEST_F(CatalogRAIITestFixture, AutoGetDBDeadlineMin) {
                          Milliseconds(0));
 }
 
-TEST_F(CatalogRAIITestFixture, AutoGetDBMultiDBDeadline) {
-    // Take the kSecondaryNssOtherDb1 database MODE_X lock to create a conflict later.
-    boost::optional<Lock::DBLock> dbLockXLock;
-    dbLockXLock.emplace(client1.second.get(), kSecondaryNssOtherDb1.db(), MODE_X);
-    ASSERT(client1.second->lockState()->isDbLockedForMode(kSecondaryNssOtherDb1.db(), MODE_X));
-
-    // Trying to acquire the kSecondaryNssOtherDb1 database MODE_IS lock should time out.
-    std::set<StringData> secondaryDbNamesConflicting{kSecondaryNss1.db(),
-                                                     kSecondaryNss2.db(),
-                                                     kSecondaryNssOtherDb1.db(),
-                                                     kSecondaryNssOtherDb2.db()};
-    failsWithLockTimeout(
-        [&] {
-            AutoGetDb autoGetDb(client2.second.get(),
-                                nss.db(),
-                                MODE_IS,
-                                Date_t::now() + timeoutMs,
-                                secondaryDbNamesConflicting);
-        },
-        timeoutMs);
-
-    {
-        // Acquiring multiple database locks without the kSecondaryNssOtherDb1 database should work.
-        std::set<StringData> secondaryDbNamesNoConflict{kSecondaryNss1.db()};
-        AutoGetDb autoGetDbNoConflict(client2.second.get(),
-                                      kSecondaryNss1.db(),
-                                      MODE_IS,
-                                      Date_t::max(),
-                                      secondaryDbNamesNoConflict);
-    }
-
-    // Lastly, with the MODE_X lock on kSecondaryNssOtherDb1.db() released, the original multi
-    // database lock request should work.
-    dbLockXLock.reset();
-    AutoGetDb autoGetDb(
-        client2.second.get(), nss.db(), MODE_IS, Date_t::max(), secondaryDbNamesConflicting);
-}
-
 TEST_F(CatalogRAIITestFixture, AutoGetCollectionCollLockDeadline) {
     Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
     ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
@@ -337,50 +295,11 @@ TEST_F(CatalogRAIITestFixture, AutoGetCollectionMultiNamespacesMODEIX) {
     ASSERT(!opCtx1->lockState()->isLocked());  // Global lock check
 }
 
-TEST_F(CatalogRAIITestFixture, AutoGetCollectionSecondaryNamespacesMultiDb) {
-    auto opCtx1 = client1.second.get();
-
-    std::vector<NamespaceStringOrUUID> secondaryNamespaces{
-        NamespaceStringOrUUID(kSecondaryNss1),
-        NamespaceStringOrUUID(kSecondaryNss2),
-        NamespaceStringOrUUID(kSecondaryNssOtherDb1),
-        NamespaceStringOrUUID(kSecondaryNssOtherDb2)};
-    boost::optional<AutoGetCollection> autoGetColl;
-    autoGetColl.emplace(opCtx1,
-                        nss,
-                        MODE_IS,
-                        AutoGetCollectionViewMode::kViewsForbidden,
-                        Date_t::max(),
-                        secondaryNamespaces);
-
-    ASSERT(opCtx1->lockState()->isRSTLLocked());
-    ASSERT(opCtx1->lockState()->isReadLocked());  // Global lock check
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(nss.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNss1.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNss2.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNssOtherDb1.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNssOtherDb2.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isCollectionLockedForMode(nss, MODE_IS));
-    ASSERT(opCtx1->lockState()->isCollectionLockedForMode(kSecondaryNss1, MODE_IS));
-    ASSERT(opCtx1->lockState()->isCollectionLockedForMode(kSecondaryNss2, MODE_IS));
-    ASSERT(opCtx1->lockState()->isCollectionLockedForMode(kSecondaryNssOtherDb1, MODE_IS));
-    ASSERT(opCtx1->lockState()->isCollectionLockedForMode(kSecondaryNssOtherDb2, MODE_IS));
-
-    ASSERT(!opCtx1->lockState()->isRSTLExclusive());
-    ASSERT(!opCtx1->lockState()->isGlobalLockedRecursively());
-    ASSERT(!opCtx1->lockState()->isWriteLocked());
-
-    // All the locks should release.
-    autoGetColl.reset();
-    ASSERT(!opCtx1->lockState()->isLocked());  // Global lock check.
-}
-
 TEST_F(CatalogRAIITestFixture, AutoGetDbSecondaryNamespacesSingleDb) {
     auto opCtx1 = client1.second.get();
 
-    std::set<StringData> secondaryDbNames{kSecondaryNss1.db(), kSecondaryNss2.db()};
     boost::optional<AutoGetDb> autoGetDb;
-    autoGetDb.emplace(opCtx1, nss.db(), MODE_IS, Date_t::max(), secondaryDbNames);
+    autoGetDb.emplace(opCtx1, nss.db(), MODE_IS, Date_t::max());
 
     ASSERT(opCtx1->lockState()->isRSTLLocked());
     ASSERT(opCtx1->lockState()->isReadLocked());  // Global lock check
@@ -399,33 +318,6 @@ TEST_F(CatalogRAIITestFixture, AutoGetDbSecondaryNamespacesSingleDb) {
     ASSERT(!opCtx1->lockState()->isLocked());  // Global lock check.
 }
 
-TEST_F(CatalogRAIITestFixture, AutoGetDbSecondaryNamespacesMultiDb) {
-    auto opCtx1 = client1.second.get();
-
-    std::set<StringData> secondaryDbNames{kSecondaryNss1.db(),
-                                          kSecondaryNss2.db(),
-                                          kSecondaryNssOtherDb1.db(),
-                                          kSecondaryNssOtherDb2.db()};
-    boost::optional<AutoGetDb> autoGetDb;
-    autoGetDb.emplace(opCtx1, nss.db(), MODE_IS, Date_t::max(), secondaryDbNames);
-
-    ASSERT(opCtx1->lockState()->isReadLocked());  // Global lock check
-    ASSERT(opCtx1->lockState()->isRSTLLocked());
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(nss.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNss1.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNss2.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNssOtherDb1.db(), MODE_IS));
-    ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNssOtherDb2.db(), MODE_IS));
-
-    ASSERT(!opCtx1->lockState()->isRSTLExclusive());
-    ASSERT(!opCtx1->lockState()->isGlobalLockedRecursively());
-    ASSERT(!opCtx1->lockState()->isWriteLocked());
-
-    // All the locks should release.
-    autoGetDb.reset();
-    ASSERT(!opCtx1->lockState()->isLocked());  // Global lock check.
-}
-
 TEST_F(CatalogRAIITestFixture, AutoGetCollectionMultiNssCollLockDeadline) {
     // Take a MODE_X collection lock on kSecondaryNss1.
     boost::optional<AutoGetCollection> autoGetCollWithXLock;
@@ -437,7 +329,7 @@ TEST_F(CatalogRAIITestFixture, AutoGetCollectionMultiNssCollLockDeadline) {
     const std::vector<NamespaceStringOrUUID> secondaryNamespacesConflict{
         NamespaceStringOrUUID(kSecondaryNss1),
         NamespaceStringOrUUID(kSecondaryNss2),
-        NamespaceStringOrUUID(kSecondaryNssOtherDb1)};
+        NamespaceStringOrUUID(kSecondaryNss3)};
     failsWithLockTimeout(
         [&] {
             AutoGetCollection coll(client2.second.get(),
@@ -452,7 +344,7 @@ TEST_F(CatalogRAIITestFixture, AutoGetCollectionMultiNssCollLockDeadline) {
     {
         // Sanity check that there's no conflict without kSecondaryNss1 that's MODE_X locked.
         const std::vector<NamespaceStringOrUUID> secondaryNamespacesNoConflict{
-            NamespaceStringOrUUID(kSecondaryNss2), NamespaceStringOrUUID(kSecondaryNssOtherDb1)};
+            NamespaceStringOrUUID(kSecondaryNss2), NamespaceStringOrUUID(kSecondaryNss2)};
         AutoGetCollection collNoConflict(client2.second.get(),
                                          nss,
                                          MODE_IS,
diff --git a/src/mongo/db/change_stream_change_collection_manager.cpp b/src/mongo/db/change_stream_change_collection_manager.cpp
index 3311c7be092..d76d197c505 100644
--- a/src/mongo/db/change_stream_change_collection_manager.cpp
+++ b/src/mongo/db/change_stream_change_collection_manager.cpp
@@ -48,11 +48,101 @@ namespace {
 const auto getChangeCollectionManager =
     ServiceContext::declareDecoration<boost::optional<ChangeStreamChangeCollectionManager>>();
 
-// TODO: SERVER-65950 create or update the change collection for a particular tenant.
-NamespaceString getTenantChangeCollectionNamespace(boost::optional<TenantId> tenantId) {
-    return NamespaceString{NamespaceString::kConfigDb, NamespaceString::kChangeCollectionName};
+/**
+ * Creates a Document object from the supplied oplog entry, performs necessary modifications to it
+ * and then returns it as a BSON object.
+ */
+BSONObj createChangeCollectionEntryFromOplog(const BSONObj& oplogEntry) {
+    Document oplogDoc(oplogEntry);
+    MutableDocument changeCollDoc(oplogDoc);
+    changeCollDoc["_id"] = Value(oplogDoc["ts"]);
+
+    auto readyChangeCollDoc = changeCollDoc.freeze();
+    return readyChangeCollDoc.toBson();
 }
 
+/**
+ * Helper to write insert statements to respective change collections based on tenant ids.
+ */
+class ChangeCollectionsWriter {
+public:
+    /**
+     * Adds the insert statement for the provided tenant that will be written to the change
+     * collection when the 'write()' method is called.
+     */
+    void add(const TenantId& tenantId, InsertStatement insertStatement) {
+        if (_shouldAddEntry(insertStatement)) {
+            _tenantStatementsMap[tenantId].push_back(std::move(insertStatement));
+        }
+    }
+
+    /**
+     * Writes the batch of insert statements for each change collection. Bails out further writes if
+     * a failure is encountered in writing to a any change collection.
+     */
+    Status write(OperationContext* opCtx, OpDebug* opDebug) {
+        for (auto&& [tenantId, insertStatements] : _tenantStatementsMap) {
+            AutoGetChangeCollection tenantChangeCollection(
+                opCtx, AutoGetChangeCollection::AccessMode::kWrite, boost::none /* tenantId */);
+
+            // The change collection does not exist for a particular tenant because either the
+            // change collection is not enabled or is in the process of enablement. Ignore this
+            // insert for now.
+            // TODO: SERVER-65950 move this check before inserting to the map
+            // 'tenantToInsertStatements'.
+            if (!tenantChangeCollection) {
+                continue;
+            }
+
+            // Writes to the change collection should not be replicated.
+            repl::UnreplicatedWritesBlock unReplBlock(opCtx);
+
+            Status status = tenantChangeCollection->insertDocuments(opCtx,
+                                                                    insertStatements.begin(),
+                                                                    insertStatements.end(),
+                                                                    opDebug,
+                                                                    false /* fromMigrate */);
+            if (!status.isOK()) {
+                return Status(status.code(),
+                              str::stream()
+                                  << "Write to change collection: " << tenantChangeCollection->ns()
+                                  << "failed, reason: " << status.reason());
+            }
+        }
+
+        return Status::OK();
+    }
+
+private:
+    bool _shouldAddEntry(const InsertStatement& insertStatement) {
+        auto& oplogDoc = insertStatement.doc;
+
+        // TODO SERVER-65950 retreive tenant from the oplog.
+        // TODO SERVER-67170 avoid inspecting the oplog BSON object.
+
+        if (auto nssFieldElem = oplogDoc[repl::OplogEntry::kNssFieldName];
+            nssFieldElem && nssFieldElem.String() == "config.$cmd"_sd) {
+            if (auto objectFieldElem = oplogDoc[repl::OplogEntry::kObjectFieldName]) {
+                // The oplog entry might be a drop command on the change collection. Check if the
+                // drop request is for the already deleted change collection, as such do not attempt
+                // to write to the change collection if that is the case. This scenario is possible
+                // because 'WriteUnitOfWork' will stage the changes and while committing the staged
+                // 'CollectionImpl::insertDocuments' change the collection object might have already
+                // been deleted.
+                if (auto dropFieldElem = objectFieldElem["drop"_sd]) {
+                    return dropFieldElem.String() != NamespaceString::kChangeCollectionName;
+                }
+            }
+        }
+
+        return true;
+    }
+
+    // Maps inserts statements for each tenant.
+    stdx::unordered_map<TenantId, std::vector<InsertStatement>, TenantId::Hasher>
+        _tenantStatementsMap;
+};
+
 }  // namespace
 
 ChangeStreamChangeCollectionManager& ChangeStreamChangeCollectionManager::get(
@@ -69,12 +159,19 @@ void ChangeStreamChangeCollectionManager::create(ServiceContext* service) {
     getChangeCollectionManager(service).emplace(service);
 }
 
-bool ChangeStreamChangeCollectionManager::isChangeCollectionEnabled() {
+bool ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive() {
     return feature_flags::gFeatureFlagServerlessChangeStreams.isEnabled(
                serverGlobalParams.featureCompatibility) &&
         gMultitenancySupport;
 }
 
+bool ChangeStreamChangeCollectionManager::hasChangeCollection(
+    OperationContext* opCtx, boost::optional<TenantId> tenantId) const {
+    auto catalog = CollectionCatalog::get(opCtx);
+    return static_cast<bool>(catalog->lookupCollectionByNamespace(
+        opCtx, NamespaceString::makeChangeCollectionNSS(tenantId)));
+}
+
 Status ChangeStreamChangeCollectionManager::createChangeCollection(
     OperationContext* opCtx, boost::optional<TenantId> tenantId) {
     // Make the change collection clustered by '_id'. The '_id' field will have the same value as
@@ -83,8 +180,10 @@ Status ChangeStreamChangeCollectionManager::createChangeCollection(
     changeCollectionOptions.clusteredIndex.emplace(clustered_util::makeDefaultClusteredIdIndex());
     changeCollectionOptions.capped = true;
 
-    auto status = createCollection(
-        opCtx, getTenantChangeCollectionNamespace(tenantId), changeCollectionOptions, BSONObj());
+    auto status = createCollection(opCtx,
+                                   NamespaceString::makeChangeCollectionNSS(tenantId),
+                                   changeCollectionOptions,
+                                   BSONObj());
     if (status.code() == ErrorCodes::NamespaceExists) {
         return Status::OK();
     }
@@ -96,7 +195,7 @@ Status ChangeStreamChangeCollectionManager::dropChangeCollection(
     OperationContext* opCtx, boost::optional<TenantId> tenantId) {
     DropReply dropReply;
     return dropCollection(opCtx,
-                          getTenantChangeCollectionNamespace(tenantId),
+                          NamespaceString::makeChangeCollectionNSS(tenantId),
                           &dropReply,
                           DropCollectionSystemCollectionMode::kAllowSystemCollectionDrops);
 }
@@ -111,58 +210,57 @@ void ChangeStreamChangeCollectionManager::insertDocumentsToChangeCollection(
     // commiting the unit of work.
     invariant(opCtx->lockState()->inAWriteUnitOfWork());
 
-    // Maps statements that should be inserted to the change collection for each tenant.
-    stdx::unordered_map<TenantId, std::vector<InsertStatement>, TenantId::Hasher>
-        tenantToInsertStatements;
+    ChangeCollectionsWriter changeCollectionsWriter;
 
     for (size_t idx = 0; idx < oplogRecords.size(); idx++) {
         auto& record = oplogRecords[idx];
         auto& ts = oplogTimestamps[idx];
 
-        // Create a mutable document and update the '_id' field with the oplog entry timestamp. The
-        // '_id' field will be use to order the change collection documents.
-        Document oplogDoc(record.data.toBson());
-        MutableDocument changeCollDoc(oplogDoc);
-        changeCollDoc["_id"] = Value(ts);
-
         // Create an insert statement that should be written at the timestamp 'ts' for a particular
         // tenant.
-        auto readyChangeCollDoc = changeCollDoc.freeze();
-        tenantToInsertStatements[TenantId::kSystemTenantId].push_back(
-            InsertStatement{readyChangeCollDoc.toBson(), ts, repl::OpTime::kUninitializedTerm});
+        auto changeCollDoc = createChangeCollectionEntryFromOplog(record.data.toBson());
+
+        // TODO SERVER-65950 replace 'TenantId::kSystemTenantId' with the tenant id.
+        changeCollectionsWriter.add(
+            TenantId::kSystemTenantId,
+            InsertStatement{std::move(changeCollDoc), ts, repl::OpTime::kUninitializedTerm});
     }
 
-    for (auto&& [tenantId, insertStatements] : tenantToInsertStatements) {
-        // TODO SERVER-66715 avoid taking 'AutoGetCollection' and remove
-        // 'AllowLockAcquisitionOnTimestampedUnitOfWork'.
-        AllowLockAcquisitionOnTimestampedUnitOfWork allowLockAcquisition(opCtx->lockState());
-        AutoGetCollection tenantChangeCollection(
-            opCtx, getTenantChangeCollectionNamespace(tenantId), LockMode::MODE_IX);
-
-        // The change collection does not exist for a particular tenant because either the change
-        // collection is not enabled or is in the process of enablement. Ignore this insert for now.
-        // TODO: SERVER-65950 move this check before inserting to the map
-        // 'tenantToInsertStatements'.
-        if (!tenantChangeCollection) {
-            continue;
-        }
+    // Write documents to change collections and throw exception in case of any failure.
+    Status status = changeCollectionsWriter.write(opCtx, nullptr /* opDebug */);
+    if (!status.isOK()) {
+        LOGV2_FATAL(
+            6612300, "Failed to write to change collection", "reason"_attr = status.reason());
+    }
+}
 
-        // Writes to the change collection should not be replicated.
-        repl::UnreplicatedWritesBlock unReplBlock(opCtx);
-
-        Status status = tenantChangeCollection->insertDocuments(opCtx,
-                                                                insertStatements.begin(),
-                                                                insertStatements.end(),
-                                                                nullptr /* opDebug */,
-                                                                false /* fromMigrate */);
-        if (!status.isOK()) {
-            LOGV2_FATAL(6612300,
-                        "Write to change collection: {ns} failed: {error}",
-                        "Write to change collection failed",
-                        "ns"_attr = tenantChangeCollection->ns().toString(),
-                        "error"_attr = status.toString());
-        }
+Status ChangeStreamChangeCollectionManager::insertDocumentsToChangeCollection(
+    OperationContext* opCtx,
+    std::vector<InsertStatement>::const_iterator beginOplogEntries,
+    std::vector<InsertStatement>::const_iterator endOplogEntries,
+    OpDebug* opDebug) {
+    ChangeCollectionsWriter changeCollectionsWriter;
+
+    // Transform oplog entries to change collections entries and group them by tenant id.
+    for (auto oplogEntryIter = beginOplogEntries; oplogEntryIter != endOplogEntries;
+         oplogEntryIter++) {
+        auto& oplogDoc = oplogEntryIter->doc;
+
+        // The initial seed oplog insertion is not timestamped as such the 'oplogSlot' is not
+        // initialized. The corresponding change collection insertion will not be timestamped.
+        auto oplogSlot = oplogEntryIter->oplogSlot;
+
+        auto changeCollDoc = createChangeCollectionEntryFromOplog(oplogDoc);
+
+        // TODO SERVER-65950 replace 'TenantId::kSystemTenantId' with the tenant id.
+        changeCollectionsWriter.add(TenantId::kSystemTenantId,
+                                    InsertStatement{std::move(changeCollDoc),
+                                                    oplogSlot.getTimestamp(),
+                                                    oplogSlot.getTerm()});
     }
+
+    // Write documents to change collections.
+    return changeCollectionsWriter.write(opCtx, opDebug);
 }
 
 }  // namespace mongo
diff --git a/src/mongo/db/change_stream_change_collection_manager.h b/src/mongo/db/change_stream_change_collection_manager.h
index 8ecc48b9a5c..f9fe6d6f414 100644
--- a/src/mongo/db/change_stream_change_collection_manager.h
+++ b/src/mongo/db/change_stream_change_collection_manager.h
@@ -31,6 +31,7 @@
 
 #include "mongo/db/catalog/collection_catalog.h"
 #include "mongo/db/operation_context.h"
+#include "mongo/db/repl/storage_interface.h"
 #include "mongo/db/service_context.h"
 
 namespace mongo {
@@ -63,7 +64,12 @@ public:
      * Returns true if change collections are enabled for recording oplog entries, false
      * otherwise.
      */
-    static bool isChangeCollectionEnabled();
+    static bool isChangeCollectionsModeActive();
+
+    /**
+     * Returns true if the change collection is present for the specified tenant, false otherwise.
+     */
+    bool hasChangeCollection(OperationContext* opCtx, boost::optional<TenantId> tenantId) const;
 
     /**
      * Creates a change collection for the specified tenant, if it doesn't exist. Returns Status::OK
@@ -81,9 +87,9 @@ public:
     Status dropChangeCollection(OperationContext* opCtx, boost::optional<TenantId> tenantId);
 
     /**
-     * Inserts documents to change collections. The parameter 'oplogRecords'
-     * is a vector of oplog records and the parameter 'oplogTimestamps' is a vector for respective
-     * timestamp for each oplog record.
+     * Inserts documents to change collections. The parameter 'oplogRecords' is a vector of oplog
+     * records and the parameter 'oplogTimestamps' is a vector for respective timestamp for each
+     * oplog record.
      *
      * The method fetches the tenant-id from the oplog entry, performs necessary modification to the
      * document and then write to the tenant's change collection at the specified oplog timestamp.
@@ -96,6 +102,20 @@ public:
     void insertDocumentsToChangeCollection(OperationContext* opCtx,
                                            const std::vector<Record>& oplogRecords,
                                            const std::vector<Timestamp>& oplogTimestamps);
+
+
+    /**
+     * Performs a range inserts on respective change collections using the oplog entries as
+     * specified by 'beginOplogEntries' and 'endOplogEntries'.
+     *
+     * Bails out if a failure is encountered in inserting documents to a particular change
+     * collection.
+     */
+    Status insertDocumentsToChangeCollection(
+        OperationContext* opCtx,
+        std::vector<InsertStatement>::const_iterator beginOplogEntries,
+        std::vector<InsertStatement>::const_iterator endOplogEntries,
+        OpDebug* opDebug);
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/change_streams_cluster_parameter.cpp b/src/mongo/db/change_streams_cluster_parameter.cpp
new file mode 100644
index 00000000000..c0ac9577f2e
--- /dev/null
+++ b/src/mongo/db/change_streams_cluster_parameter.cpp
@@ -0,0 +1,62 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
+#include "mongo/db/change_streams_cluster_parameter.h"
+
+#include "mongo/base/status.h"
+#include "mongo/db/change_streams_cluster_parameter_gen.h"
+#include "mongo/logv2/log.h"
+namespace mongo {
+
+Status validateChangeStreamsClusterParameter(
+    const ChangeStreamsClusterParameterStorage& clusterParameter) {
+    LOGV2_DEBUG(6594801,
+                1,
+                "Validating change streams cluster parameter",
+                "enabled"_attr = clusterParameter.getEnabled(),
+                "expireAfterSeconds"_attr = clusterParameter.getExpireAfterSeconds());
+    if (clusterParameter.getEnabled()) {
+        if (clusterParameter.getExpireAfterSeconds() <= 0) {
+            return Status(ErrorCodes::BadValue,
+                          "Expected a positive integer for 'expireAfterSeconds' field if 'enabled' "
+                          "field is true");
+        }
+    } else {
+        if (clusterParameter.getExpireAfterSeconds() != 0) {
+            return Status(
+                ErrorCodes::BadValue,
+                "Expected a zero value for 'expireAfterSeconds' if 'enabled' field is false");
+        }
+    }
+    return Status::OK();
+}
+
+}  // namespace mongo
diff --git a/src/mongo/db/initialize_snmp.h b/src/mongo/db/change_streams_cluster_parameter.h
index 5fb85193b43..ebeedaa0e8b 100644
--- a/src/mongo/db/initialize_snmp.h
+++ b/src/mongo/db/change_streams_cluster_parameter.h
@@ -1,5 +1,5 @@
 /**
- *    Copyright (C) 2018-present MongoDB, Inc.
+ *    Copyright (C) 2022-present MongoDB, Inc.
  *
  *    This program is free software: you can redistribute it and/or modify
  *    it under the terms of the Server Side Public License, version 1,
@@ -27,26 +27,16 @@
  *    it in the license file.
  */
 
-
 #pragma once
 
-#include <functional>
-
+#include "mongo/base/status.h"
 namespace mongo {
-/**
- * Registers the specified initializer function `init` as the initialization handler for SNMP
- * enterprise modules.
- *
- * NOTE: This function may only be called once.
- * NOTE: This function is not multithread safe.
- */
-void registerSNMPInitializer(std::function<void()> init);
+
+class ChangeStreamsClusterParameterStorage;
 
 /**
- * Performs initialization for SNMP enterprise modules, if present, otherwise does nothing.
- *
- * This will call the function registered by `registerSNMPInitializer`.  It is safe to call when no
- * function has been registered.
+ * Validates 'changeStreams' cluster-wide parameter.
  */
-void initializeSNMP();
+Status validateChangeStreamsClusterParameter(
+    const ChangeStreamsClusterParameterStorage& clusterParameter);
 }  // namespace mongo
diff --git a/src/mongo/db/ops/new_write_error_exception_format_feature_flag.idl b/src/mongo/db/change_streams_cluster_parameter.idl
index f5fb71095b0..74563d47752 100644
--- a/src/mongo/db/ops/new_write_error_exception_format_feature_flag.idl
+++ b/src/mongo/db/change_streams_cluster_parameter.idl
@@ -27,15 +27,38 @@
 #
 
 global:
-    cpp_namespace: "mongo::feature_flags"
+  cpp_namespace: "mongo"
+  cpp_includes:
+  - "mongo/db/change_streams_cluster_parameter.h"
 
 imports:
-    - "mongo/idl/basic_types.idl"
+  - "mongo/idl/basic_types.idl"
+  - "mongo/idl/cluster_server_parameter.idl"
 
-feature_flags:
-    featureFlagNewWriteErrorExceptionFormat:
-        description: Feature flag for enabling the new write error format which avoids serialising
-                     StaleShardVersion with the information of StaleConfig.
-        cpp_varname: gFeatureFlagNewWriteErrorExceptionFormat
-        default: true
-        version: 6.0
+structs:
+  ChangeStreamsClusterParameterStorage:
+    description: "A specification for the 'changeStreams' cluster-wide configuration parameter
+    type."
+    inline_chained_structs: true
+    chained_structs:
+      ClusterServerParameter: clusterServerParameter
+    fields:
+      enabled:
+        description: "Enable or disable change streams."
+        type: bool
+        default: false
+      expireAfterSeconds:
+        description: "The number of seconds to retain the change events. This value will be a
+        non-zero positive value if the change stream is enabled and a zero value if the change
+        stream is disabled."
+        type: safeInt64
+        default: 0
+
+server_parameters:
+  changeStreams:
+    description: "The cluster-wide configuration parameter for the change stream in the serverless."
+    set_at: cluster
+    cpp_vartype: ChangeStreamsClusterParameterStorage
+    cpp_varname: gChangeStreamsClusterParameter
+    validator:
+      callback: validateChangeStreamsClusterParameter
diff --git a/src/mongo/db/change_streams_cluster_parameter_test.cpp b/src/mongo/db/change_streams_cluster_parameter_test.cpp
new file mode 100644
index 00000000000..80ef8d71da7
--- /dev/null
+++ b/src/mongo/db/change_streams_cluster_parameter_test.cpp
@@ -0,0 +1,78 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/change_streams_cluster_parameter.h"
+#include "mongo/db/change_streams_cluster_parameter_gen.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace {
+
+
+TEST(ValidateChangeStreamsClusterParameter, EnabledWithSuccess) {
+    ChangeStreamsClusterParameterStorage changeStreamClusterParam;
+    changeStreamClusterParam.setEnabled(true);
+    changeStreamClusterParam.setExpireAfterSeconds(3600);
+    auto result = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+    ASSERT_TRUE(result.isOK());
+}
+
+TEST(ValidateChangeStreamsClusterParameter, EnabledWithNonPositiveExpireAfterSeconds) {
+    ChangeStreamsClusterParameterStorage changeStreamClusterParam;
+    changeStreamClusterParam.setEnabled(true);
+    changeStreamClusterParam.setExpireAfterSeconds(0);
+    auto resultZero = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+    ASSERT_EQ(resultZero.code(), ErrorCodes::BadValue);
+
+    changeStreamClusterParam.setExpireAfterSeconds(-1);
+    auto resultNegative = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+    ASSERT_EQ(resultNegative.code(), ErrorCodes::BadValue);
+}
+
+TEST(ValidateChangeStreamsClusterParameter, DisabledWithSuccess) {
+    ChangeStreamsClusterParameterStorage changeStreamClusterParam;
+    changeStreamClusterParam.setEnabled(false);
+    auto resultDefault = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+    ASSERT_TRUE(resultDefault.isOK());
+
+    changeStreamClusterParam.setExpireAfterSeconds(0);
+    auto resultZero = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+    ASSERT_TRUE(resultDefault.isOK());
+}
+
+TEST(ValidateChangeStreamsClusterParameter, DisabledWithNonZeroExpireAfterSeconds) {
+    ChangeStreamsClusterParameterStorage changeStreamClusterParam;
+    changeStreamClusterParam.setEnabled(false);
+    changeStreamClusterParam.setExpireAfterSeconds(1);
+    auto result = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+    ASSERT_EQ(result.code(), ErrorCodes::BadValue);
+}
+
+}  // namespace
+}  // namespace mongo
diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp
index bc5fc066e03..a6f394b481d 100644
--- a/src/mongo/db/cloner.cpp
+++ b/src/mongo/db/cloner.cpp
@@ -90,11 +90,11 @@ BSONObj Cloner::_getIdIndexSpec(const std::list<BSONObj>& indexSpecs) {
 
 Cloner::Cloner() {}
 
-struct Cloner::Fun {
-    Fun(OperationContext* opCtx, const std::string& dbName)
+struct Cloner::BatchHandler {
+    BatchHandler(OperationContext* opCtx, const std::string& dbName)
         : lastLog(0), opCtx(opCtx), _dbName(dbName) {}
 
-    void operator()(DBClientCursorBatchIterator& i) {
+    void operator()(DBClientCursor& cursor) {
         boost::optional<Lock::DBLock> dbLock;
         dbLock.emplace(opCtx, _dbName, MODE_X);
         uassert(ErrorCodes::NotWritablePrimary,
@@ -128,7 +128,7 @@ struct Cloner::Fun {
             });
         }
 
-        while (i.moreInCurrentBatch()) {
+        while (cursor.moreInCurrentBatch()) {
             if (numSeen % 128 == 127) {
                 time_t now = time(nullptr);
                 if (now - lastLog >= 60) {
@@ -164,7 +164,7 @@ struct Cloner::Fun {
                         collection);
             }
 
-            BSONObj tmp = i.nextSafe();
+            BSONObj tmp = cursor.nextSafe();
 
             /* assure object is valid.  note this will slow us down a little. */
             // We allow cloning of collections containing decimal data even if decimal is disabled.
@@ -245,23 +245,24 @@ void Cloner::_copy(OperationContext* opCtx,
                 logAttrs(nss),
                 "conn_getServerAddress"_attr = conn->getServerAddress());
 
-    Fun f(opCtx, toDBName);
-    f.numSeen = 0;
-    f.nss = nss;
-    f.from_options = from_opts;
-    f.from_id_index = from_id_index;
-    f.saveLast = time(nullptr);
-
-    int options = QueryOption_NoCursorTimeout | QueryOption_Exhaust;
-
-    conn->query_DEPRECATED(std::function<void(DBClientCursorBatchIterator&)>(f),
-                           nss,
-                           BSONObj{} /* filter */,
-                           Query() /* querySettings */,
-                           nullptr,
-                           options,
-                           0 /* batchSize */,
-                           repl::ReadConcernArgs::kLocal);
+    BatchHandler batchHandler{opCtx, toDBName};
+    batchHandler.numSeen = 0;
+    batchHandler.nss = nss;
+    batchHandler.from_options = from_opts;
+    batchHandler.from_id_index = from_id_index;
+    batchHandler.saveLast = time(nullptr);
+
+    FindCommandRequest findCmd{nss};
+    findCmd.setNoCursorTimeout(true);
+    findCmd.setReadConcern(repl::ReadConcernArgs::kLocal);
+    auto cursor = conn->find(std::move(findCmd),
+                             ReadPreferenceSetting{ReadPreference::SecondaryPreferred},
+                             ExhaustMode::kOn);
+
+    // Process the results of the cursor in batches.
+    while (cursor->more()) {
+        batchHandler(*cursor);
+    }
 }
 
 void Cloner::_copyIndexes(OperationContext* opCtx,
diff --git a/src/mongo/db/cloner.h b/src/mongo/db/cloner.h
index 8d1d512fe1f..5cbb4d76337 100644
--- a/src/mongo/db/cloner.h
+++ b/src/mongo/db/cloner.h
@@ -104,7 +104,7 @@ private:
                       const std::list<BSONObj>& from_indexes,
                       DBClientBase* conn);
 
-    struct Fun;
+    struct BatchHandler;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/commands.cpp b/src/mongo/db/commands.cpp
index 7826963b3c2..6ebe905b732 100644
--- a/src/mongo/db/commands.cpp
+++ b/src/mongo/db/commands.cpp
@@ -108,38 +108,11 @@ bool checkAuthorizationImplPreParse(OperationContext* opCtx,
     uassert(ErrorCodes::Unauthorized,
             str::stream() << "command " << command->getName() << " requires authentication",
             !command->requiresAuth() || authzSession->isAuthenticated() ||
-                request.securityToken.nFields());
+                (request.validatedTenancyScope &&
+                 request.validatedTenancyScope->hasAuthenticatedUser()));
     return false;
 }
 
-// TODO SERVER-65101: Replace this with a property on each command.
-// The command names that are allowed in a multi-document transaction.
-const StringMap<int> txnCmdAllowlist = {{"abortTransaction", 1},
-                                        {"aggregate", 1},
-                                        {"clusterAbortTransaction", 1},
-                                        {"clusterAggregate", 1},
-                                        {"clusterCommitTransaction", 1},
-                                        {"clusterDelete", 1},
-                                        {"clusterFind", 1},
-                                        {"clusterGetMore", 1},
-                                        {"clusterInsert", 1},
-                                        {"clusterUpdate", 1},
-                                        {"commitTransaction", 1},
-                                        {"coordinateCommitTransaction", 1},
-                                        {"create", 1},
-                                        {"createIndexes", 1},
-                                        {"delete", 1},
-                                        {"distinct", 1},
-                                        {"find", 1},
-                                        {"findandmodify", 1},
-                                        {"findAndModify", 1},
-                                        {"getMore", 1},
-                                        {"insert", 1},
-                                        {"killCursors", 1},
-                                        {"prepareTransaction", 1},
-                                        {"testInternalTransactions", 1},
-                                        {"update", 1}};
-
 auto getCommandInvocationHooks =
     ServiceContext::declareDecoration<std::unique_ptr<CommandInvocationHooks>>();
 
@@ -577,11 +550,15 @@ void CommandHelpers::canUseTransactions(const NamespaceString& nss,
             "http://dochub.mongodb.org/core/transaction-count for a recommended alternative.",
             cmdName != "count"_sd);
 
-    auto inTxnAllowlist = txnCmdAllowlist.find(cmdName) != txnCmdAllowlist.cend();
+    auto command = findCommand(cmdName);
+    uassert(ErrorCodes::CommandNotFound,
+            str::stream() << "Encountered unknown command during check if can run in transactions: "
+                          << cmdName,
+            command);
 
     uassert(ErrorCodes::OperationNotSupportedInTransaction,
             str::stream() << "Cannot run '" << cmdName << "' in a multi-document transaction.",
-            inTxnAllowlist);
+            command->allowedInTransactions());
 
     const auto dbName = nss.db();
 
diff --git a/src/mongo/db/commands.h b/src/mongo/db/commands.h
index 48341c29335..d54d86e7c50 100644
--- a/src/mongo/db/commands.h
+++ b/src/mongo/db/commands.h
@@ -603,6 +603,35 @@ public:
         return nullptr;
     }
 
+    /**
+     * Returns true if this command supports apply once semantic when retried.
+     */
+    virtual bool supportsRetryableWrite() const {
+        return false;
+    }
+
+    /**
+     * Returns true if sessions should be checked out when lsid and txnNumber is present in the
+     * request.
+     */
+    virtual bool shouldCheckoutSession() const {
+        return true;
+    }
+
+    /**
+     * Returns true if this is a command related to managing the lifecycle of a transaction.
+     */
+    virtual bool isTransactionCommand() const {
+        return false;
+    }
+
+    /**
+     * Returns true if this command can be run in a transaction.
+     */
+    virtual bool allowedInTransactions() const {
+        return false;
+    }
+
 private:
     // The full name of the command
     const std::string _name;
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript
index cf6f79671bc..a9a5e95533e 100644
--- a/src/mongo/db/commands/SConscript
+++ b/src/mongo/db/commands/SConscript
@@ -326,6 +326,7 @@ env.Library(
     source=[
         "count_cmd.cpp",
         "cqf/cqf_aggregate.cpp",
+        "cqf/cqf_command_utils.cpp",
         "create_command.cpp",
         "create_indexes.cpp",
         "current_op.cpp",
@@ -369,6 +370,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/catalog/database_holder',
         '$BUILD_DIR/mongo/db/catalog/index_key_validate',
         '$BUILD_DIR/mongo/db/catalog/multi_index_block',
+        '$BUILD_DIR/mongo/db/change_stream_change_collection_manager',
         '$BUILD_DIR/mongo/db/command_can_run_here',
         '$BUILD_DIR/mongo/db/commands',
         '$BUILD_DIR/mongo/db/concurrency/exception_util',
diff --git a/src/mongo/db/commands/async_command_execution_test.cpp b/src/mongo/db/commands/async_command_execution_test.cpp
index 7e798b2028b..01bec502118 100644
--- a/src/mongo/db/commands/async_command_execution_test.cpp
+++ b/src/mongo/db/commands/async_command_execution_test.cpp
@@ -72,7 +72,7 @@ struct AsyncCommandExecutionTest::TestState {
         // Setup the execution context
         rec = std::make_shared<RequestExecutionContext>(opCtx.get(), mockMessage());
         rec->setReplyBuilder(makeReplyBuilder(rpc::protocolForMessage(rec->getMessage())));
-        rec->setRequest(rpc::opMsgRequestFromAnyProtocol(rec->getMessage()));
+        rec->setRequest(rpc::opMsgRequestFromAnyProtocol(rec->getMessage(), opCtx->getClient()));
         rec->setCommand(CommandHelpers::findCommand(rec->getRequest().getCommandName()));
 
         // Setup the invocation
diff --git a/src/mongo/db/commands/cqf/cqf_aggregate.cpp b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
index aabfc99c3a5..516a3f9ca2e 100644
--- a/src/mongo/db/commands/cqf/cqf_aggregate.cpp
+++ b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
@@ -87,15 +87,10 @@ static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpe
     while (indexIterator->more()) {
         const IndexCatalogEntry& catalogEntry = *indexIterator->next();
 
-        const bool isMultiKey = catalogEntry.isMultikey(opCtx, collection);
-        const MultikeyPaths& multiKeyPaths = catalogEntry.getMultikeyPaths(opCtx, collection);
-        uassert(6624251, "Multikey paths cannot be empty.", !multiKeyPaths.empty());
-
         const IndexDescriptor& descriptor = *catalogEntry.descriptor();
         if (descriptor.hidden() || descriptor.isSparse() ||
             descriptor.getIndexType() != IndexType::INDEX_BTREE) {
-            // Not supported for now.
-            continue;
+            uasserted(ErrorCodes::InternalErrorNotSupported, "Unsupported index type");
         }
 
         if (indexHint) {
@@ -111,6 +106,10 @@ static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpe
             }
         }
 
+        const bool isMultiKey = catalogEntry.isMultikey(opCtx, collection);
+        const MultikeyPaths& multiKeyPaths = catalogEntry.getMultikeyPaths(opCtx, collection);
+        uassert(6624251, "Multikey paths cannot be empty.", !multiKeyPaths.empty());
+
         // SBE version is base 0.
         const int64_t version = static_cast<int>(descriptor.version()) - 1;
 
@@ -185,12 +184,16 @@ static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpe
 
             // TODO: simplify expression.
 
-            PartialSchemaReqConversion conversion = convertExprToPartialSchemaReq(exprABT);
-            if (!conversion._success || conversion._hasEmptyInterval) {
+            auto conversion = convertExprToPartialSchemaReq(exprABT, true /*isFilterContext*/);
+            if (!conversion || conversion->_hasEmptyInterval) {
                 // Unsatisfiable partial index filter?
                 continue;
             }
-            partialIndexReqMap = std::move(conversion._reqMap);
+            tassert(6624257,
+                    "Should not be seeing a partial index filter where we need to over-approximate",
+                    !conversion->_retainPredicate);
+
+            partialIndexReqMap = std::move(conversion->_reqMap);
         }
 
         // For now we assume distribution is Centralized.
@@ -380,6 +383,18 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> getSBEExecutorViaCascadesOp
         uasserted(6624256,
                   "For now we can apply hints only for queries involving a single collection");
     }
+    // Unsupported command/collection options.
+    uassert(ErrorCodes::InternalErrorNotSupported,
+            "Collection-default collation is not supported",
+            !collection || collection->getCollectionOptions().collation.isEmpty());
+
+    uassert(ErrorCodes::InternalErrorNotSupported,
+            "Clustered collections are not supported",
+            !collection || !collection->isClustered());
+
+    uassert(ErrorCodes::InternalErrorNotSupported,
+            "Timeseries collections are not supported",
+            !collection || !collection->getTimeseriesOptions());
 
     QueryHints queryHints = getHintsFromQueryKnobs();
 
diff --git a/src/mongo/db/commands/cqf/cqf_command_utils.cpp b/src/mongo/db/commands/cqf/cqf_command_utils.cpp
new file mode 100644
index 00000000000..2edf7a56772
--- /dev/null
+++ b/src/mongo/db/commands/cqf/cqf_command_utils.cpp
@@ -0,0 +1,696 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/commands/cqf/cqf_command_utils.h"
+
+#include "mongo/db/exec/add_fields_projection_executor.h"
+#include "mongo/db/exec/exclusion_projection_executor.h"
+#include "mongo/db/exec/inclusion_projection_executor.h"
+#include "mongo/db/exec/sbe/abt/abt_lower.h"
+#include "mongo/db/matcher/expression_always_boolean.h"
+#include "mongo/db/matcher/expression_array.h"
+#include "mongo/db/matcher/expression_expr.h"
+#include "mongo/db/matcher/expression_geo.h"
+#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
+#include "mongo/db/matcher/expression_internal_expr_comparison.h"
+#include "mongo/db/matcher/expression_leaf.h"
+#include "mongo/db/matcher/expression_text.h"
+#include "mongo/db/matcher/expression_text_noop.h"
+#include "mongo/db/matcher/expression_tree.h"
+#include "mongo/db/matcher/expression_type.h"
+#include "mongo/db/matcher/expression_visitor.h"
+#include "mongo/db/matcher/expression_where.h"
+#include "mongo/db/matcher/expression_where_noop.h"
+#include "mongo/db/matcher/match_expression_walker.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_cond.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_eq.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_fmod.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_match_array_index.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_max_items.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_max_length.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_max_properties.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_min_items.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_min_length.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_min_properties.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_object_match.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_root_doc_eq.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_unique_items.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_xor.h"
+#include "mongo/db/pipeline/abt/abt_document_source_visitor.h"
+#include "mongo/db/pipeline/abt/agg_expression_visitor.h"
+#include "mongo/db/pipeline/abt/match_expression_visitor.h"
+#include "mongo/db/pipeline/abt/utils.h"
+#include "mongo/db/pipeline/document_source_bucket_auto.h"
+#include "mongo/db/pipeline/document_source_coll_stats.h"
+#include "mongo/db/pipeline/document_source_current_op.h"
+#include "mongo/db/pipeline/document_source_cursor.h"
+#include "mongo/db/pipeline/document_source_exchange.h"
+#include "mongo/db/pipeline/document_source_facet.h"
+#include "mongo/db/pipeline/document_source_geo_near.h"
+#include "mongo/db/pipeline/document_source_geo_near_cursor.h"
+#include "mongo/db/pipeline/document_source_graph_lookup.h"
+#include "mongo/db/pipeline/document_source_group.h"
+#include "mongo/db/pipeline/document_source_index_stats.h"
+#include "mongo/db/pipeline/document_source_internal_inhibit_optimization.h"
+#include "mongo/db/pipeline/document_source_internal_shard_filter.h"
+#include "mongo/db/pipeline/document_source_internal_split_pipeline.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
+#include "mongo/db/pipeline/document_source_limit.h"
+#include "mongo/db/pipeline/document_source_list_cached_and_active_users.h"
+#include "mongo/db/pipeline/document_source_list_local_sessions.h"
+#include "mongo/db/pipeline/document_source_list_sessions.h"
+#include "mongo/db/pipeline/document_source_lookup.h"
+#include "mongo/db/pipeline/document_source_match.h"
+#include "mongo/db/pipeline/document_source_merge.h"
+#include "mongo/db/pipeline/document_source_operation_metrics.h"
+#include "mongo/db/pipeline/document_source_out.h"
+#include "mongo/db/pipeline/document_source_plan_cache_stats.h"
+#include "mongo/db/pipeline/document_source_queue.h"
+#include "mongo/db/pipeline/document_source_redact.h"
+#include "mongo/db/pipeline/document_source_replace_root.h"
+#include "mongo/db/pipeline/document_source_sample.h"
+#include "mongo/db/pipeline/document_source_sample_from_random_cursor.h"
+#include "mongo/db/pipeline/document_source_sequential_document_cache.h"
+#include "mongo/db/pipeline/document_source_single_document_transformation.h"
+#include "mongo/db/pipeline/document_source_skip.h"
+#include "mongo/db/pipeline/document_source_sort.h"
+#include "mongo/db/pipeline/document_source_tee_consumer.h"
+#include "mongo/db/pipeline/document_source_union_with.h"
+#include "mongo/db/pipeline/document_source_unwind.h"
+#include "mongo/db/pipeline/visitors/document_source_visitor.h"
+#include "mongo/db/pipeline/visitors/document_source_walker.h"
+#include "mongo/db/pipeline/visitors/transformer_interface_walker.h"
+#include "mongo/db/query/query_feature_flags_gen.h"
+#include "mongo/db/query/query_knobs_gen.h"
+#include "mongo/db/query/query_planner_params.h"
+#include "mongo/s/query/document_source_merge_cursors.h"
+
+namespace mongo {
+
+using namespace optimizer;
+
+namespace {
+
+/**
+ * Visitor that is responsible for indicating whether a MatchExpression is eligible for Bonsai by
+ * setting the '_eligible' member variable. Expressions which are "test-only" and not officially
+ * supported should set _eligible to false.
+ */
+class ABTMatchExpressionVisitor : public MatchExpressionConstVisitor {
+public:
+    ABTMatchExpressionVisitor(bool& eligible) : _eligible(eligible) {}
+
+    void visit(const LTEMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+    }
+    void visit(const LTMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+    }
+    void visit(const ElemMatchObjectMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+    }
+    void visit(const ElemMatchValueMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+    }
+    void visit(const EqualityMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+    }
+    void visit(const GTEMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+    }
+    void visit(const GTMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+    }
+    void visit(const InMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+
+        // $in over a regex predicate is not supported.
+        if (!expr->getRegexes().empty()) {
+            _eligible = false;
+        }
+    }
+    void visit(const ExistsMatchExpression* expr) override {
+        assertSupportedPathExpression(expr);
+    }
+    void visit(const AndMatchExpression* expr) override {}
+    void visit(const OrMatchExpression* expr) override {}
+
+    void visit(const GeoMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const GeoNearMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalBucketGeoWithinMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalExprEqMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalExprGTMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalExprGTEMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalExprLTMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalExprLTEMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaBinDataFLE2EncryptedTypeExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaBinDataSubTypeExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaCondMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaEqMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaFmodMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaMaxItemsMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaMaxLengthMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaMaxPropertiesMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaMinItemsMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaMinLengthMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaMinPropertiesMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaObjectMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaRootDocEqMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaTypeExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaUniqueItemsMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const InternalSchemaXorMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const ModMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const NorMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const NotMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const RegexMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const SizeMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const TextMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const TextNoOpMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const TwoDPtInAnnulusExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const WhereMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const WhereNoOpMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const BitsAllClearMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const BitsAllSetMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const BitsAnyClearMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const BitsAnySetMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const TypeMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const AlwaysFalseMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const AlwaysTrueMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+    void visit(const ExprMatchExpression* expr) override {
+        unsupportedExpression(expr);
+    }
+
+private:
+    void unsupportedExpression(const MatchExpression* expr) {
+        _eligible = false;
+    }
+
+    void assertSupportedPathExpression(const PathMatchExpression* expr) {
+        if (FieldRef(expr->path()).hasNumericPathComponents())
+            _eligible = false;
+    }
+
+    bool& _eligible;
+};
+
+
+class ABTTransformerVisitor : public TransformerInterfaceConstVisitor {
+public:
+    ABTTransformerVisitor(bool& eligible) : _eligible(eligible) {}
+
+    void visit(const projection_executor::ExclusionProjectionExecutor* transformer) override {
+        std::set<std::string> preservedPaths;
+        transformer->getRoot()->reportProjectedPaths(&preservedPaths);
+
+        for (const std::string& path : preservedPaths) {
+            if (FieldRef(path).hasNumericPathComponents()) {
+                unsupportedTransformer(transformer);
+                return;
+            }
+        }
+    }
+
+    void visit(const projection_executor::InclusionProjectionExecutor* transformer) override {
+        std::set<std::string> computedPaths;
+        StringMap<std::string> renamedPaths;
+        transformer->getRoot()->reportComputedPaths(&computedPaths, &renamedPaths);
+
+        // Non-simple projections are supported under test only.
+        if (computedPaths.size() > 0 || renamedPaths.size() > 0) {
+            unsupportedTransformer(transformer);
+            return;
+        }
+
+        std::set<std::string> preservedPaths;
+        transformer->getRoot()->reportProjectedPaths(&preservedPaths);
+
+        for (const std::string& path : preservedPaths) {
+            if (FieldRef(path).hasNumericPathComponents()) {
+                unsupportedTransformer(transformer);
+                return;
+            }
+        }
+    }
+
+    void visit(const projection_executor::AddFieldsProjectionExecutor* transformer) override {
+        unsupportedTransformer(transformer);
+    }
+
+    void visit(const GroupFromFirstDocumentTransformation* transformer) override {
+        unsupportedTransformer(transformer);
+    }
+
+    void visit(const ReplaceRootTransformation* transformer) override {
+        unsupportedTransformer(transformer);
+    }
+
+private:
+    void unsupportedTransformer(const TransformerInterface* transformer) const {
+        _eligible = false;
+    }
+
+    bool& _eligible;
+};
+
+/**
+ * Visitor that is responsible for indicating whether a DocumentSource is eligible for Bonsai by
+ * setting the 'eligible' member variable. Stages which are "test-only" and not officially supported
+ * should set 'eligible' to false.
+ */
+class ABTUnsupportedDocumentSourceVisitor : public DocumentSourceConstVisitor {
+public:
+    void visit(const DocumentSourceInternalUnpackBucket* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceBucketAuto* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceCollStats* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceCurrentOp* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceCursor* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceExchange* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceFacet* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceGeoNear* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceGeoNearCursor* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceGraphLookUp* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceIndexStats* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceInternalShardFilter* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceInternalSplitPipeline* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceListCachedAndActiveUsers* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceListLocalSessions* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceListSessions* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceLookUp* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceMerge* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceMergeCursors* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceOperationMetrics* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceOut* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourcePlanCacheStats* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceQueue* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceRedact* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceSample* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceSampleFromRandomCursor* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceSequentialDocumentCache* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceTeeConsumer* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceGroup* source) override {
+        unsupportedStage(source);
+    }
+    void visit(const DocumentSourceLimit* source) override {
+        unsupportedStage(source);
+    }
+    void visit(const DocumentSourceSkip* source) override {
+        unsupportedStage(source);
+    }
+    void visit(const DocumentSourceSort* source) override {
+        unsupportedStage(source);
+    }
+    void visit(const DocumentSourceUnwind* source) override {
+        unsupportedStage(source);
+    }
+    void visit(const DocumentSourceUnionWith* source) override {
+        unsupportedStage(source);
+    }
+
+    void visit(const DocumentSourceInternalInhibitOptimization* source) override {
+        // Can be ignored.
+    }
+
+    void visit(const DocumentSourceMatch* source) override {
+        // Pass a reference to our local 'eligible' variable to allow the visitor to overwrite it.
+        ABTMatchExpressionVisitor visitor(eligible);
+        MatchExpressionWalker walker(nullptr /*preVisitor*/, nullptr /*inVisitor*/, &visitor);
+        tree_walker::walk<true, MatchExpression>(source->getMatchExpression(), &walker);
+    }
+
+    void visit(const DocumentSourceSingleDocumentTransformation* source) override {
+        ABTTransformerVisitor visitor(eligible);
+        TransformerInterfaceWalker walker(&visitor);
+        walker.walk(&source->getTransformer());
+    }
+
+    void unsupportedStage(const DocumentSource* source) {
+        eligible = false;
+    }
+
+    bool eligible = true;
+};
+
+template <class RequestType>
+bool isEligibleCommon(const RequestType& request,
+                      OperationContext* opCtx,
+                      const CollectionPtr& collection) {
+    // The FindCommandRequest defaults some parameters to BSONObj() instead of boost::none.
+    auto noneOrDefaultEmpty = [&](auto param) {
+        if constexpr (std::is_same_v<decltype(param), boost::optional<BSONObj>>) {
+            return param && !param->isEmpty();
+        } else {
+            return !param.isEmpty();
+        }
+    };
+    bool unsupportedCmdOption = noneOrDefaultEmpty(request.getHint()) ||
+        noneOrDefaultEmpty(request.getCollation()) || request.getLet() ||
+        request.getLegacyRuntimeConstants();
+
+    bool unsupportedIndexType = [&]() {
+        if (collection == nullptr)
+            return false;
+
+        const IndexCatalog& indexCatalog = *collection->getIndexCatalog();
+        auto indexIterator =
+            indexCatalog.getIndexIterator(opCtx, IndexCatalog::InclusionPolicy::kReady);
+
+        while (indexIterator->more()) {
+            const IndexDescriptor& descriptor = *indexIterator->next()->descriptor();
+            if (descriptor.isPartial() || descriptor.hidden() || descriptor.isSparse() ||
+                descriptor.getIndexType() != IndexType::INDEX_BTREE) {
+                return true;
+            }
+        }
+        return false;
+    }();
+
+    bool unsupportedCollectionType = [&]() {
+        if (collection == nullptr)
+            return false;
+
+        if (collection->isClustered() || !collection->getCollectionOptions().collation.isEmpty() ||
+            collection->getTimeseriesOptions()) {
+            return true;
+        }
+
+        return false;
+    }();
+
+    return !unsupportedCmdOption && !unsupportedIndexType && !unsupportedCollectionType;
+}
+
+boost::optional<bool> shouldForceBonsai() {
+    // Without the feature flag set, nothing else matters.
+    if (!feature_flags::gFeatureFlagCommonQueryFramework.isEnabled(
+            serverGlobalParams.featureCompatibility)) {
+        return false;
+    }
+
+    // The "force classic" flag takes precedence over the others.
+    if (internalQueryForceClassicEngine.load()) {
+        return false;
+    }
+
+    if (internalQueryForceCommonQueryFramework.load()) {
+        return true;
+    }
+
+    if (!internalQueryEnableCascadesOptimizer.load()) {
+        return false;
+    }
+
+    return boost::none;
+}
+
+}  // namespace
+
+bool isEligibleForBonsai(const AggregateCommandRequest& request,
+                         const Pipeline& pipeline,
+                         OperationContext* opCtx,
+                         const CollectionPtr& collection) {
+    if (auto forceBonsai = shouldForceBonsai(); forceBonsai.has_value()) {
+        return *forceBonsai;
+    }
+
+    bool commandOptionsEligible = isEligibleCommon(request, opCtx, collection) &&
+        !request.getUnwrappedReadPref() && !request.getRequestReshardingResumeToken().has_value() &&
+        !request.getExchange();
+
+    ABTUnsupportedDocumentSourceVisitor visitor;
+    DocumentSourceWalker walker(nullptr /*preVisitor*/, &visitor);
+    walker.walk(pipeline);
+    bool eligiblePipeline = visitor.eligible;
+
+    return commandOptionsEligible && eligiblePipeline;
+}
+
+bool isEligibleForBonsai(const FindCommandRequest& request,
+                         const MatchExpression& expression,
+                         OperationContext* opCtx,
+                         const CollectionPtr& collection) {
+    if (auto forceBonsai = shouldForceBonsai(); forceBonsai.has_value()) {
+        return *forceBonsai;
+    }
+
+    bool commandOptionsEligible = isEligibleCommon(request, opCtx, collection);
+
+    bool eligibleMatch = true;
+    ABTMatchExpressionVisitor visitor(eligibleMatch);
+    MatchExpressionWalker walker(nullptr /*preVisitor*/, nullptr /*inVisitor*/, &visitor);
+    tree_walker::walk<true, MatchExpression>(&expression, &walker);
+
+    return commandOptionsEligible && eligibleMatch;
+}
+
+}  // namespace mongo
diff --git a/src/mongo/db/commands/cqf/cqf_command_utils.h b/src/mongo/db/commands/cqf/cqf_command_utils.h
new file mode 100644
index 00000000000..a88b0b712d6
--- /dev/null
+++ b/src/mongo/db/commands/cqf/cqf_command_utils.h
@@ -0,0 +1,53 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/catalog/collection.h"
+
+namespace mongo {
+
+/**
+ * Returns whether the given Pipeline and aggregate command is eligible to use the bonsai
+ * optimizer.
+ */
+bool isEligibleForBonsai(const AggregateCommandRequest& request,
+                         const Pipeline& pipeline,
+                         OperationContext* opCtx,
+                         const CollectionPtr& collection);
+
+/**
+ * Returns whether the given find command is eligible to use the bonsai optimizer.
+ */
+bool isEligibleForBonsai(const FindCommandRequest& request,
+                         const MatchExpression& expression,
+                         OperationContext* opCtx,
+                         const CollectionPtr& collection);
+
+}  // namespace mongo
diff --git a/src/mongo/db/commands/create_command.cpp b/src/mongo/db/commands/create_command.cpp
index e6b3a70efc5..27a871ba489 100644
--- a/src/mongo/db/commands/create_command.cpp
+++ b/src/mongo/db/commands/create_command.cpp
@@ -89,6 +89,10 @@ public:
         return kCreateCommandHelp.toString();
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBaseGen {
     public:
         using InvocationBaseGen::InvocationBaseGen;
diff --git a/src/mongo/db/commands/create_indexes.cpp b/src/mongo/db/commands/create_indexes.cpp
index 9172bbdac17..9b848683cd3 100644
--- a/src/mongo/db/commands/create_indexes.cpp
+++ b/src/mongo/db/commands/create_indexes.cpp
@@ -375,7 +375,7 @@ CreateIndexesReply runCreateIndexesOnNewCollection(
         for (const auto& spec : specs) {
             uassert(6100900,
                     "Cannot implicitly create a new collection with createIndex 'clustered' option",
-                    !spec["clustered"]);
+                    !spec[IndexDescriptor::kClusteredFieldName]);
         }
 
         // We need to create the collection.
@@ -716,6 +716,13 @@ CreateIndexesReply runCreateIndexesWithCoordinator(OperationContext* opCtx,
  * { createIndexes : "bar",
  *   indexes : [ { ns : "test.bar", key : { x : 1 }, name: "x_1" } ],
  *   commitQuorum: "majority" }
+ *
+ * commitQuorum specifies which or how many replica set members must be ready to commit before the
+ * primary will commit the index. The same values can be used for commitQuorum as writeConcern, with
+ * the addition of 'votingMembers', the default. It is used to ensure secondaries can commit indexes
+ * quickly, minimizing replication lag (secondaries block replication on receipt of commitIndexBuild
+ * while completing the associated index). Note that commitQuorum is NOT like writeConcern: there is
+ * no guarantee that indexes on secondaries are ready for use after the command returns.
  */
 class CmdCreateIndexes : public CreateIndexesCmdVersion1Gen<CmdCreateIndexes> {
 public:
@@ -807,6 +814,10 @@ public:
         return AllowedOnSecondary::kNever;
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
 } cmdCreateIndex;
 
 }  // namespace
diff --git a/src/mongo/db/commands/distinct.cpp b/src/mongo/db/commands/distinct.cpp
index 09a4350fddd..7b298885cde 100644
--- a/src/mongo/db/commands/distinct.cpp
+++ b/src/mongo/db/commands/distinct.cpp
@@ -134,6 +134,10 @@ public:
                                       hasTerm);
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     Status explain(OperationContext* opCtx,
                    const OpMsgRequest& request,
                    ExplainOptions::Verbosity verbosity,
diff --git a/src/mongo/db/commands/drop_indexes.cpp b/src/mongo/db/commands/drop_indexes.cpp
index 88326b20e48..e95da5c1378 100644
--- a/src/mongo/db/commands/drop_indexes.cpp
+++ b/src/mongo/db/commands/drop_indexes.cpp
@@ -235,8 +235,8 @@ public:
                                                             "Uninitialized");
         writeConflictRetry(opCtx, "dropAllIndexes", toReIndexNss.ns(), [&] {
             WriteUnitOfWork wunit(opCtx);
-            collection.getWritableCollection()->getIndexCatalog()->dropAllIndexes(
-                opCtx, collection.getWritableCollection(), true, {});
+            collection.getWritableCollection(opCtx)->getIndexCatalog()->dropAllIndexes(
+                opCtx, collection.getWritableCollection(opCtx), true, {});
 
             swIndexesToRebuild =
                 indexer->init(opCtx, collection, all, MultiIndexBlock::kNoopOnInitFn);
@@ -263,7 +263,7 @@ public:
         writeConflictRetry(opCtx, "commitReIndex", toReIndexNss.ns(), [&] {
             WriteUnitOfWork wunit(opCtx);
             uassertStatusOK(indexer->commit(opCtx,
-                                            collection.getWritableCollection(),
+                                            collection.getWritableCollection(opCtx),
                                             MultiIndexBlock::kNoopOnCreateEachFn,
                                             MultiIndexBlock::kNoopOnCommitFn));
             wunit.commit();
diff --git a/src/mongo/db/commands/find_and_modify.cpp b/src/mongo/db/commands/find_and_modify.cpp
index 221d053036a..c5bffda7673 100644
--- a/src/mongo/db/commands/find_and_modify.cpp
+++ b/src/mongo/db/commands/find_and_modify.cpp
@@ -280,6 +280,14 @@ public:
         CmdFindAndModify::_updateMetrics.collectMetrics(request);
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBaseGen {
     public:
         using InvocationBaseGen::InvocationBaseGen;
@@ -638,10 +646,15 @@ write_ops::FindAndModifyCommandReply CmdFindAndModify::Invocation::typedRun(
     // Collect metrics.
     CmdFindAndModify::collectMetrics(req);
 
-    boost::optional<DisableDocumentValidation> maybeDisableValidation;
-    if (req.getBypassDocumentValidation().value_or(false)) {
-        maybeDisableValidation.emplace(opCtx);
-    }
+    auto disableDocumentValidation = req.getBypassDocumentValidation().value_or(false);
+    auto fleCrudProcessed =
+        write_ops_exec::getFleCrudProcessed(opCtx, req.getEncryptionInformation());
+
+    DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(opCtx,
+                                                                      disableDocumentValidation);
+
+    DisableSafeContentValidationIfTrue safeContentValidationDisabler(
+        opCtx, disableDocumentValidation, fleCrudProcessed);
 
     const auto inTransaction = opCtx->inMultiDocumentTransaction();
     uassert(50781,
diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp
index 5bae30e43c2..eda3c32b291 100644
--- a/src/mongo/db/commands/find_cmd.cpp
+++ b/src/mongo/db/commands/find_cmd.cpp
@@ -36,6 +36,7 @@
 #include "mongo/db/client.h"
 #include "mongo/db/clientcursor.h"
 #include "mongo/db/commands.h"
+#include "mongo/db/commands/cqf/cqf_command_utils.h"
 #include "mongo/db/commands/run_aggregate.h"
 #include "mongo/db/commands/test_commands_enabled.h"
 #include "mongo/db/cursor_manager.h"
@@ -216,6 +217,10 @@ public:
         return true;
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public CommandInvocation {
     public:
         Invocation(const FindCmd* definition, const OpMsgRequest& request, StringData dbName)
@@ -297,9 +302,8 @@ public:
             // If we are running a query against a view, or if we are trying to test the new
             // optimizer, redirect this query through the aggregation system.
             if (ctx->getView() ||
-                (feature_flags::gfeatureFlagCommonQueryFramework.isEnabled(
-                     serverGlobalParams.featureCompatibility) &&
-                 internalQueryEnableCascadesOptimizer.load())) {
+                isEligibleForBonsai(
+                    cq->getFindCommandRequest(), *cq->root(), opCtx, ctx->getCollection())) {
                 // Relinquish locks. The aggregation command will re-acquire them.
                 ctx.reset();
 
@@ -503,9 +507,8 @@ public:
             // If we are running a query against a view, or if we are trying to test the new
             // optimizer, redirect this query through the aggregation system.
             if (ctx->getView() ||
-                (feature_flags::gfeatureFlagCommonQueryFramework.isEnabled(
-                     serverGlobalParams.featureCompatibility) &&
-                 internalQueryEnableCascadesOptimizer.load())) {
+                isEligibleForBonsai(
+                    cq->getFindCommandRequest(), *cq->root(), opCtx, ctx->getCollection())) {
                 // Relinquish locks. The aggregation command will re-acquire them.
                 ctx.reset();
 
diff --git a/src/mongo/db/commands/fle_compact_test.cpp b/src/mongo/db/commands/fle_compact_test.cpp
index 18c52f548ef..26153aadcc8 100644
--- a/src/mongo/db/commands/fle_compact_test.cpp
+++ b/src/mongo/db/commands/fle_compact_test.cpp
@@ -395,8 +395,13 @@ void FleCompactTest::doSingleInsert(int id, BSONObj encryptedFieldsObj) {
     auto efc =
         generateEncryptedFieldConfig(encryptedFieldsObj.getFieldNames<std::set<std::string>>());
 
-    uassertStatusOK(processInsert(
-        _queryImpl.get(), _namespaces.edcNss, serverPayload, efc, kUninitializedTxnNumber, result));
+    uassertStatusOK(processInsert(_queryImpl.get(),
+                                  _namespaces.edcNss,
+                                  serverPayload,
+                                  efc,
+                                  kUninitializedTxnNumber,
+                                  result,
+                                  false));
 }
 
 void FleCompactTest::doSingleDelete(int id, BSONObj encryptedFieldsObj) {
diff --git a/src/mongo/db/commands/get_cluster_parameter_invocation.cpp b/src/mongo/db/commands/get_cluster_parameter_invocation.cpp
index b95acf4896f..7eb4218040e 100644
--- a/src/mongo/db/commands/get_cluster_parameter_invocation.cpp
+++ b/src/mongo/db/commands/get_cluster_parameter_invocation.cpp
@@ -43,10 +43,6 @@ namespace mongo {
 std::pair<std::vector<std::string>, std::vector<BSONObj>>
 GetClusterParameterInvocation::retrieveRequestedParameters(OperationContext* opCtx,
                                                            const CmdBody& cmdBody) {
-    uassert(ErrorCodes::IllegalOperation,
-            "featureFlagClusterWideConfig not enabled",
-            gFeatureFlagClusterWideConfig.isEnabled(serverGlobalParams.featureCompatibility));
-
     ServerParameterSet* clusterParameters = ServerParameterSet::getClusterParameterSet();
     std::vector<std::string> parameterNames;
     std::vector<BSONObj> parameterValues;
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index eacb27e85ff..3b34751014f 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -317,6 +317,10 @@ public:
         return std::make_unique<Invocation>(this, opMsgRequest);
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public CommandInvocation {
     public:
         Invocation(Command* cmd, const OpMsgRequest& request)
diff --git a/src/mongo/db/commands/index_filter_commands.cpp b/src/mongo/db/commands/index_filter_commands.cpp
index f040bd9eea3..5deb5ecd339 100644
--- a/src/mongo/db/commands/index_filter_commands.cpp
+++ b/src/mongo/db/commands/index_filter_commands.cpp
@@ -99,7 +99,7 @@ void removePlanCacheEntriesByIndexFilterKeys(const stdx::unordered_set<uint32_t>
                                              sbe::PlanCache* planCache) {
     planCache->removeIf([&](const sbe::PlanCacheKey& key, const sbe::PlanCacheEntry& entry) {
         return indexFilterKeys.contains(entry.indexFilterKey) &&
-            key.getCollectionUuid() == collectionUuid;
+            key.getMainCollectionState().uuid == collectionUuid;
     });
 }
 }  // namespace
diff --git a/src/mongo/db/commands/killcursors_common.h b/src/mongo/db/commands/killcursors_common.h
index 06ee9c9335d..f0ccc33d794 100644
--- a/src/mongo/db/commands/killcursors_common.h
+++ b/src/mongo/db/commands/killcursors_common.h
@@ -66,6 +66,10 @@ public:
         return true;
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation : public KCV1Gen::InvocationBaseGen {
     public:
         using KCV1Gen::InvocationBaseGen::InvocationBaseGen;
diff --git a/src/mongo/db/commands/pipeline_command.cpp b/src/mongo/db/commands/pipeline_command.cpp
index 1d2e0f25059..9f0cb6bd909 100644
--- a/src/mongo/db/commands/pipeline_command.cpp
+++ b/src/mongo/db/commands/pipeline_command.cpp
@@ -207,6 +207,10 @@ public:
         return &::mongo::AggregateCommandRequest::kAuthorizationContract;
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
 } pipelineCmd;
 
 }  // namespace
diff --git a/src/mongo/db/commands/plan_cache_commands_test.cpp b/src/mongo/db/commands/plan_cache_commands_test.cpp
index 4ef229c36f4..3495ee127d6 100644
--- a/src/mongo/db/commands/plan_cache_commands_test.cpp
+++ b/src/mongo/db/commands/plan_cache_commands_test.cpp
@@ -40,7 +40,7 @@ namespace {
 
 static const NamespaceString nss{"test.collection"_sd};
 
-PlanCacheKey makeKey(const CanonicalQuery& cq) {
+PlanCacheKey makeClassicKey(const CanonicalQuery& cq) {
     CollectionMock coll(nss);
     return plan_cache_key_factory::make<PlanCacheKey>(cq, &coll);
 }
@@ -106,7 +106,7 @@ TEST(PlanCacheCommandsTest, CanCanonicalizeWithValidQuery) {
         plan_cache_commands::canonicalize(opCtx.get(), nss.ns(), fromjson("{query: {b: 3, a: 4}}"));
     ASSERT_OK(statusWithCQ.getStatus());
     std::unique_ptr<CanonicalQuery> equivQuery = std::move(statusWithCQ.getValue());
-    ASSERT_EQUALS(makeKey(*query), makeKey(*equivQuery));
+    ASSERT_EQUALS(makeClassicKey(*query), makeClassicKey(*equivQuery));
 }
 
 TEST(PlanCacheCommandsTest, SortQueryResultsInDifferentPlanCacheKeyFromUnsorted) {
@@ -124,7 +124,7 @@ TEST(PlanCacheCommandsTest, SortQueryResultsInDifferentPlanCacheKeyFromUnsorted)
         opCtx.get(), nss.ns(), fromjson("{query: {a: 1, b: 1}, sort: {a: 1, b: 1}}"));
     ASSERT_OK(statusWithCQ.getStatus());
     std::unique_ptr<CanonicalQuery> sortQuery = std::move(statusWithCQ.getValue());
-    ASSERT_NOT_EQUALS(makeKey(*query), makeKey(*sortQuery));
+    ASSERT_NOT_EQUALS(makeClassicKey(*query), makeClassicKey(*sortQuery));
 }
 
 // Regression test for SERVER-17158.
@@ -143,7 +143,7 @@ TEST(PlanCacheCommandsTest, SortsAreProperlyDelimitedInPlanCacheKey) {
         opCtx.get(), nss.ns(), fromjson("{query: {a: 1, b: 1}, sort: {aab: 1}}"));
     ASSERT_OK(statusWithCQ.getStatus());
     std::unique_ptr<CanonicalQuery> sortQuery2 = std::move(statusWithCQ.getValue());
-    ASSERT_NOT_EQUALS(makeKey(*sortQuery1), makeKey(*sortQuery2));
+    ASSERT_NOT_EQUALS(makeClassicKey(*sortQuery1), makeClassicKey(*sortQuery2));
 }
 
 TEST(PlanCacheCommandsTest, ProjectQueryResultsInDifferentPlanCacheKeyFromUnprojected) {
@@ -160,7 +160,7 @@ TEST(PlanCacheCommandsTest, ProjectQueryResultsInDifferentPlanCacheKeyFromUnproj
         opCtx.get(), nss.ns(), fromjson("{query: {a: 1, b: 1}, projection: {_id: 0, a: 1}}"));
     ASSERT_OK(statusWithCQ.getStatus());
     std::unique_ptr<CanonicalQuery> projectionQuery = std::move(statusWithCQ.getValue());
-    ASSERT_NOT_EQUALS(makeKey(*query), makeKey(*projectionQuery));
+    ASSERT_NOT_EQUALS(makeClassicKey(*query), makeClassicKey(*projectionQuery));
 }
 
 }  // namespace
diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp
index 6cb0760d139..42053578913 100644
--- a/src/mongo/db/commands/run_aggregate.cpp
+++ b/src/mongo/db/commands/run_aggregate.cpp
@@ -41,7 +41,9 @@
 #include "mongo/db/catalog/collection_uuid_mismatch.h"
 #include "mongo/db/catalog/database.h"
 #include "mongo/db/catalog/database_holder.h"
+#include "mongo/db/change_stream_change_collection_manager.h"
 #include "mongo/db/commands/cqf/cqf_aggregate.h"
+#include "mongo/db/commands/cqf/cqf_command_utils.h"
 #include "mongo/db/curop.h"
 #include "mongo/db/cursor_manager.h"
 #include "mongo/db/db_raii.h"
@@ -686,12 +688,8 @@ Status runAggregate(OperationContext* opCtx,
 
     // Determine if this aggregation has foreign collections that the execution subsystem needs
     // to be aware of.
-    std::vector<NamespaceStringOrUUID> secondaryExecNssList;
-
-    // Taking locks over multiple collections is not supported outside of $lookup pushdown.
-    if (feature_flags::gFeatureFlagSBELookupPushdown.isEnabledAndIgnoreFCV()) {
-        secondaryExecNssList = liteParsedPipeline.getForeignExecutionNamespaces();
-    }
+    std::vector<NamespaceStringOrUUID> secondaryExecNssList =
+        liteParsedPipeline.getForeignExecutionNamespaces();
 
     // The collation to use for this aggregation. boost::optional to distinguish between the case
     // where the collation has not yet been resolved, and where it has been resolved to nullptr.
@@ -752,9 +750,21 @@ Status runAggregate(OperationContext* opCtx,
                                   << " is not supported for a change stream",
                     !request.getCollectionUUID());
 
-            // Replace the execution namespace with that of the oplog.
+            // Replace the execution namespace with the oplog.
             nss = NamespaceString::kRsOplogNamespace;
 
+            // In case of serverless the change stream will be opened on the change collection. We
+            // should first check if the change collection for the particular tenant exists and then
+            // replace the namespace with the change collection.
+            if (ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive()) {
+                auto& changeCollectionManager = ChangeStreamChangeCollectionManager::get(opCtx);
+                uassert(ErrorCodes::ChangeStreamNotEnabled,
+                        "Change streams must be enabled before being used.",
+                        changeCollectionManager.hasChangeCollection(opCtx, origNss.tenantId()));
+
+                nss = NamespaceString::makeChangeCollectionNSS(origNss.tenantId());
+            }
+
             // Upgrade and wait for read concern if necessary.
             _adjustChangeStreamReadConcern(opCtx);
 
@@ -940,9 +950,7 @@ Status runAggregate(OperationContext* opCtx,
         constexpr bool alreadyOptimized = true;
         pipeline->validateCommon(alreadyOptimized);
 
-        if (feature_flags::gfeatureFlagCommonQueryFramework.isEnabled(
-                serverGlobalParams.featureCompatibility) &&
-            internalQueryEnableCascadesOptimizer.load()) {
+        if (isEligibleForBonsai(request, *pipeline, opCtx, collections.getMainCollection())) {
             uassert(6624344,
                     "Exchanging is not supported in the Cascades optimizer",
                     !request.getExchange().has_value());
@@ -1023,7 +1031,7 @@ Status runAggregate(OperationContext* opCtx,
             // yet.
             invariant(ctx);
             Explain::explainStages(explainExecutor,
-                                   ctx->getCollection(),
+                                   collections,
                                    *(expCtx->explain),
                                    BSON("optimizedPipeline" << true),
                                    cmdObj,
diff --git a/src/mongo/db/commands/set_cluster_parameter_command.cpp b/src/mongo/db/commands/set_cluster_parameter_command.cpp
index 696c6eda751..08ae1b2835e 100644
--- a/src/mongo/db/commands/set_cluster_parameter_command.cpp
+++ b/src/mongo/db/commands/set_cluster_parameter_command.cpp
@@ -75,12 +75,6 @@ public:
                     "setClusterParameter can only run on mongos in sharded clusters",
                     (serverGlobalParams.clusterRole == ClusterRole::None));
 
-            FixedFCVRegion fcvRegion(opCtx);
-            uassert(
-                ErrorCodes::IllegalOperation,
-                "Cannot set cluster parameter, gFeatureFlagClusterWideConfig is not enabled",
-                gFeatureFlagClusterWideConfig.isEnabled(serverGlobalParams.featureCompatibility));
-
             // TODO SERVER-65249: This will eventually be made specific to the parameter being set
             // so that some parameters will be able to use setClusterParameter even on standalones.
             uassert(ErrorCodes::IllegalOperation,
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index ad2084e2ac7..1f146fa082f 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -74,7 +74,6 @@
 #include "mongo/db/s/resharding/resharding_coordinator_service.h"
 #include "mongo/db/s/resharding/resharding_donor_recipient_common.h"
 #include "mongo/db/s/sharding_ddl_coordinator_service.h"
-#include "mongo/db/s/sharding_util.h"
 #include "mongo/db/s/transaction_coordinator_service.h"
 #include "mongo/db/server_feature_flags_gen.h"
 #include "mongo/db/server_options.h"
@@ -87,7 +86,6 @@
 #include "mongo/logv2/log.h"
 #include "mongo/rpc/get_status_from_command_result.h"
 #include "mongo/s/pm2423_feature_flags_gen.h"
-#include "mongo/s/pm2583_feature_flags_gen.h"
 #include "mongo/s/resharding/resharding_feature_flag_gen.h"
 #include "mongo/s/sharding_feature_flags_gen.h"
 #include "mongo/stdx/unordered_set.h"
@@ -345,14 +343,10 @@ public:
 
                 // Drain moveChunks if the actualVersion relies on the new migration protocol but
                 // the requestedVersion uses the old one (downgrading).
-                if ((feature_flags::gFeatureFlagMigrationRecipientCriticalSection
-                         .isEnabledOnVersion(actualVersion) &&
-                     !feature_flags::gFeatureFlagMigrationRecipientCriticalSection
-                          .isEnabledOnVersion(requestedVersion)) ||
-                    (feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabledOnVersion(
-                         actualVersion) &&
-                     !feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabledOnVersion(
-                         requestedVersion))) {
+                if (feature_flags::gFeatureFlagMigrationRecipientCriticalSection.isEnabledOnVersion(
+                        actualVersion) &&
+                    !feature_flags::gFeatureFlagMigrationRecipientCriticalSection
+                         .isEnabledOnVersion(requestedVersion)) {
                     drainNewMoveChunks.emplace(opCtx, "setFeatureCompatibilityVersionDowngrade");
 
                     // At this point, because we are holding the MigrationBlockingGuard, no new
@@ -388,30 +382,6 @@ public:
                             !isBlockingUserWrites);
                 }
 
-                // TODO (SERVER-65572): Remove setClusterParameter serialization and collection
-                // drop after this is backported to 6.0.
-                if (!gFeatureFlagClusterWideConfig.isEnabledOnVersion(requestedVersion)) {
-                    if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
-                        uassert(ErrorCodes::CannotDowngrade,
-                                "Cannot downgrade while cluster server parameter is being set",
-                                ConfigsvrCoordinatorService::getService(opCtx)
-                                    ->areAllCoordinatorsOfTypeFinished(
-                                        opCtx, ConfigsvrCoordinatorTypeEnum::kSetClusterParameter));
-                    }
-
-                    DropReply dropReply;
-                    const auto dropStatus = dropCollection(
-                        opCtx,
-                        NamespaceString::kClusterParametersNamespace,
-                        &dropReply,
-                        DropCollectionSystemCollectionMode::kAllowSystemCollectionDrops);
-                    uassert(
-                        dropStatus.code(),
-                        str::stream() << "Failed to drop the cluster server parameters collection"
-                                      << causedBy(dropStatus.reason()),
-                        dropStatus.isOK() || dropStatus.code() == ErrorCodes::NamespaceNotFound);
-                }
-
                 FeatureCompatibilityVersion::updateFeatureCompatibilityVersionDocument(
                     opCtx,
                     actualVersion,
@@ -430,17 +400,6 @@ public:
                     clearOrphanCountersFromRangeDeletionTasks(opCtx);
                 }
 
-                // TODO (SERVER-62325): Remove collMod draining mechanism after 6.0 branching.
-                if (actualVersion > requestedVersion &&
-                    requestedVersion < multiversion::FeatureCompatibilityVersion::kVersion_6_0) {
-                    // No more collMod coordinators will start because we have already switched
-                    // the FCV value to kDowngrading. Wait for the ongoing collMod coordinators to
-                    // finish.
-                    ShardingDDLCoordinatorService::getService(opCtx)
-                        ->waitForCoordinatorsOfGivenTypeToComplete(
-                            opCtx, DDLCoordinatorTypeEnum::kCollMod);
-                }
-
                 // TODO SERVER-65077: Remove FCV check once 6.0 is released
                 if (actualVersion > requestedVersion &&
                     !gFeatureFlagFLE2.isEnabledOnVersion(requestedVersion)) {
@@ -481,10 +440,6 @@ public:
                      actualVersion) &&
                  feature_flags::gFeatureFlagMigrationRecipientCriticalSection.isEnabledOnVersion(
                      requestedVersion)) ||
-                (!feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabledOnVersion(
-                     actualVersion) &&
-                 feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabledOnVersion(
-                     requestedVersion)) ||
                 orphanTrackingCondition) {
                 drainOldMoveChunks.emplace(opCtx, "setFeatureCompatibilityVersionUpgrade");
 
@@ -586,14 +541,6 @@ private:
             !feature_flags::gFeatureFlagChangeStreamPreAndPostImages.isEnabledOnVersion(
                 requestedVersion);
 
-        // TODO SERVER-62693: remove the following scope once 6.0 branches out
-        if (requestedVersion == multiversion::GenericFCV::kLastLTS) {
-            if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer ||
-                serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
-                sharding_util::downgradeCollectionBalancingFieldsToPre53(opCtx);
-            }
-        }
-
         // TODO  SERVER-65332 remove logic bound to this future object When kLastLTS is 6.0
         boost::optional<SharedSemiFuture<void>> chunkResizeAsyncTask;
         if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
@@ -731,13 +678,6 @@ private:
             }
         }
 
-        // TODO SERVER-64720 Remove when 6.0 becomes last LTS
-        if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
-            ShardingDDLCoordinatorService::getService(opCtx)
-                ->waitForCoordinatorsOfGivenTypeToComplete(
-                    opCtx, DDLCoordinatorTypeEnum::kCreateCollection);
-        }
-
         // TODO SERVER-62338 Remove when 6.0 branches-out
         if (serverGlobalParams.clusterRole == ClusterRole::ShardServer &&
             !resharding::gFeatureFlagRecoverableShardsvrReshardCollectionCoordinator
diff --git a/src/mongo/db/commands/set_index_commit_quorum.idl b/src/mongo/db/commands/set_index_commit_quorum.idl
index 97cd14c2938..960a6847791 100644
--- a/src/mongo/db/commands/set_index_commit_quorum.idl
+++ b/src/mongo/db/commands/set_index_commit_quorum.idl
@@ -53,5 +53,6 @@ commands:
             commitQuorum:
                 type: CommitQuorum
                 description: "commitQuorum can be set to the same values as writeConcern.w and
-                              indicates how many and/or which replica set members are needed for the
-                              primary to commit the index build."
+                              indicates how many and/or which replica set members must be ready to
+                              commit the index build before the primary will proceed to commit the
+                              index build."
diff --git a/src/mongo/db/commands/set_index_commit_quorum_command.cpp b/src/mongo/db/commands/set_index_commit_quorum_command.cpp
index 35a0c45122b..6eaf7148272 100644
--- a/src/mongo/db/commands/set_index_commit_quorum_command.cpp
+++ b/src/mongo/db/commands/set_index_commit_quorum_command.cpp
@@ -66,7 +66,10 @@ public:
            << "    commitQuorum: <string|number|object> option to define the required quorum for"
            << std::endl
            << "                  the index builds to commit" << std::endl
-           << "}";
+           << "}" << std::endl
+           << "This command is useful if the commitQuorum of an active index build is no longer "
+              "possible or desirable (replica set membership has changed), or potential secondary "
+              "replication lag has become a greater concern";
         return ss.str();
     }
 
diff --git a/src/mongo/db/commands/txn_cmds.cpp b/src/mongo/db/commands/txn_cmds.cpp
index c06f80f2590..d180b1964a2 100644
--- a/src/mongo/db/commands/txn_cmds.cpp
+++ b/src/mongo/db/commands/txn_cmds.cpp
@@ -78,6 +78,15 @@ public:
     std::string help() const final {
         return "Commits a transaction";
     }
+
+    bool isTransactionCommand() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBaseGen {
     public:
         using InvocationBaseGen::InvocationBaseGen;
@@ -182,6 +191,14 @@ public:
         return "Aborts a transaction";
     }
 
+    bool isTransactionCommand() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBaseGen {
     public:
         using InvocationBaseGen::InvocationBaseGen;
diff --git a/src/mongo/db/commands/user_management_commands.cpp b/src/mongo/db/commands/user_management_commands.cpp
index 3045a4e69b9..63bfeb73a03 100644
--- a/src/mongo/db/commands/user_management_commands.cpp
+++ b/src/mongo/db/commands/user_management_commands.cpp
@@ -236,7 +236,7 @@ Status queryAuthzDocument(OperationContext* opCtx,
         FindCommandRequest findRequest{collectionName};
         findRequest.setFilter(query);
         findRequest.setProjection(projection);
-        client.find(std::move(findRequest), ReadPreferenceSetting{}, resultProcessor);
+        client.find(std::move(findRequest), resultProcessor);
         return Status::OK();
     } catch (const DBException& e) {
         return e.toStatus();
@@ -1461,8 +1461,11 @@ UsersInfoReply CmdUMCTyped<UsersInfoCommand, UMCInfoParams>::Invocation::typedRu
         CommandHelpers::appendSimpleCommandStatus(bodyBuilder, true);
         bodyBuilder.doneFast();
         auto response = CursorResponse::parseFromBSONThrowing(replyBuilder.releaseBody());
-        DBClientCursor cursor(
-            &client, response.getNSS(), response.getCursorId(), 0, 0, response.releaseBatch());
+        DBClientCursor cursor(&client,
+                              response.getNSS(),
+                              response.getCursorId(),
+                              false /*isExhaust*/,
+                              response.releaseBatch());
 
         while (cursor.more()) {
             users.push_back(cursor.next().getOwned());
diff --git a/src/mongo/db/commands/write_commands.cpp b/src/mongo/db/commands/write_commands.cpp
index d76053e34a7..b360c7b1a2c 100644
--- a/src/mongo/db/commands/write_commands.cpp
+++ b/src/mongo/db/commands/write_commands.cpp
@@ -263,6 +263,11 @@ BSONObj makeTimeseriesInsertDocument(std::shared_ptr<BucketCatalog::WriteBatch>
                                     kTimeseriesControlDefaultVersion);
         bucketControlBuilder.append(kBucketControlMinFieldName, batch->min());
         bucketControlBuilder.append(kBucketControlMaxFieldName, batch->max());
+
+        if (feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+                serverGlobalParams.featureCompatibility)) {
+            bucketControlBuilder.append(kBucketControlClosedFieldName, false);
+        }
     }
     if (metadataElem) {
         builder.appendAs(metadataElem, kBucketMetaFieldName);
@@ -511,6 +516,13 @@ public:
         return false;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
     class Invocation final : public InvocationBaseGen {
     public:
         Invocation(OperationContext* opCtx,
@@ -531,7 +543,8 @@ public:
         write_ops::InsertCommandReply typedRun(OperationContext* opCtx) final try {
             transactionChecks(opCtx, ns());
 
-            if (request().getEncryptionInformation().has_value()) {
+            if (request().getEncryptionInformation().has_value() &&
+                !request().getEncryptionInformation()->getCrudProcessed()) {
                 write_ops::InsertCommandReply insertReply;
                 auto batch = processFLEInsert(opCtx, request(), &insertReply);
                 if (batch == FLEBatchResult::kProcessed) {
@@ -720,8 +733,11 @@ public:
                 beforeSize = bucketDoc.objsize();
                 // Reset every time we run to ensure we never use a stale value
                 compressionStats = {};
-                auto compressed = timeseries::compressBucket(
-                    bucketDoc, closedBucket.timeField, ns(), validateCompression);
+                auto compressed = timeseries::compressBucket(bucketDoc,
+                                                             closedBucket.timeField,
+                                                             ns(),
+                                                             closedBucket.eligibleForReopening,
+                                                             validateCompression);
                 if (compressed.compressedBucket) {
                     // If compressed object size is larger than uncompressed, skip compression
                     // update.
@@ -1386,6 +1402,15 @@ public:
     bool shouldAffectCommandCounter() const final {
         return false;
     }
+
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBaseGen {
     public:
         Invocation(OperationContext* opCtx,
@@ -1458,7 +1483,8 @@ public:
             write_ops::UpdateCommandReply updateReply;
             OperationSource source = OperationSource::kStandard;
 
-            if (request().getEncryptionInformation().has_value()) {
+            if (request().getEncryptionInformation().has_value() &&
+                !request().getEncryptionInformation().get().getCrudProcessed()) {
                 return processFLEUpdate(opCtx, request());
             }
 
@@ -1623,6 +1649,14 @@ public:
         return false;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBaseGen {
     public:
         Invocation(OperationContext* opCtx,
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index 681043076dd..f1da9723284 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -374,8 +374,7 @@ bool LockerImpl::_acquireTicket(OperationContext* opCtx, LockMode mode, Date_t d
         // Acquiring a ticket is a potentially blocking operation. This must not be called after a
         // transaction timestamp has been set, indicating this transaction has created an oplog
         // hole.
-        if (opCtx)
-            invariant(!opCtx->recoveryUnit()->isTimestamped());
+        invariant(!opCtx->recoveryUnit()->isTimestamped());
 
         auto waitMode = _uninterruptibleLocksRequested ? TicketHolder::WaitMode::kUninterruptible
                                                        : TicketHolder::WaitMode::kInterruptible;
diff --git a/src/mongo/db/create_indexes.idl b/src/mongo/db/create_indexes.idl
index 247e0295edb..f00f287af5a 100644
--- a/src/mongo/db/create_indexes.idl
+++ b/src/mongo/db/create_indexes.idl
@@ -215,7 +215,10 @@ commands:
                 default: false
                 unstable: true
             commitQuorum:
-                description: 'Commit Quorum options'
+                description: "Dictates which or how many replica set members must be ready to commit
+                              the index build before the primary will proceed to commit the index.
+                              This minimizes replication lag by ensuring secondaries are ready for
+                              commit and can quickly apply the commit on a finished index build"
                 type: CommitQuorum
                 optional: true
                 unstable: false
diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp
index 08158554ad9..9fce4d9d7f9 100644
--- a/src/mongo/db/curop.cpp
+++ b/src/mongo/db/curop.cpp
@@ -502,6 +502,7 @@ bool CurOp::completeAndLogOperation(OperationContext* opCtx,
         _debug.report(
             opCtx, (lockerInfo ? &lockerInfo->stats : nullptr), operationMetricsPtr, &attr);
 
+        // TODO SERVER-67020 Ensure the ns in attr has the tenantId as the db prefix
         LOGV2_OPTIONS(51803, {component}, "Slow query", attr);
 
         _checkForFailpointsAfterCommandLogged();
diff --git a/src/mongo/db/database_name.h b/src/mongo/db/database_name.h
index a4a549eb75a..a44979a2096 100644
--- a/src/mongo/db/database_name.h
+++ b/src/mongo/db/database_name.h
@@ -49,27 +49,22 @@ public:
     /**
      * Constructs an empty DatabaseName.
      */
-    DatabaseName() : _tenantId(boost::none), _dbString(""), _tenantDbString(boost::none){};
+    DatabaseName() = default;
 
     /**
      * Constructs a DatabaseName from the given tenantId and database name.
      * "dbName" is expected only consist of a db name. It is the caller's responsibility to ensure
      * the dbName is a valid db name.
      */
-    DatabaseName(boost::optional<TenantId> tenantId, StringData dbString) {
-        _tenantId = tenantId;
-        _dbString = dbString.toString();
-
-        _tenantDbString =
-            _tenantId ? boost::make_optional(_tenantId->toString() + "_" + _dbString) : boost::none;
-    }
+    DatabaseName(boost::optional<TenantId> tenantId, StringData dbString)
+        : _tenantId(std::move(tenantId)), _dbString(dbString.toString()) {}
 
     /**
      * Prefer to use the constructor above.
      * TODO SERVER-65456 Remove this constructor.
      */
     DatabaseName(StringData dbName, boost::optional<TenantId> tenantId = boost::none)
-        : DatabaseName(tenantId, dbName) {}
+        : DatabaseName(std::move(tenantId), dbName) {}
 
     static DatabaseName createSystemTenantDbName(StringData dbString);
 
@@ -82,28 +77,26 @@ public:
     }
 
     const std::string& toString() const {
-        if (_tenantDbString)
-            return *_tenantDbString;
+        return db();
+    }
+
+    std::string toStringWithTenantId() const {
+        if (_tenantId)
+            return str::stream() << *_tenantId << '_' << _dbString;
 
-        invariant(!_tenantId);
         return _dbString;
     }
 
     bool equalCaseInsensitive(const DatabaseName& other) const {
-        return boost::iequals(toString(), other.toString());
-    }
-
-    /**
-     * Returns -1, 0, or 1 if 'this' is less, equal, or greater than 'other' in
-     * lexicographical order.
-     */
-    int compare(const DatabaseName& other) const {
-        return toString().compare(other.toString());
+        return boost::iequals(toStringWithTenantId(), other.toStringWithTenantId());
     }
 
     template <typename H>
     friend H AbslHashValue(H h, const DatabaseName& obj) {
-        return H::combine(std::move(h), obj.toString());
+        if (obj._tenantId) {
+            return H::combine(std::move(h), obj._tenantId.get(), obj._dbString);
+        }
+        return H::combine(std::move(h), obj._dbString);
     }
 
     friend auto logAttrs(const DatabaseName& obj) {
@@ -111,9 +104,8 @@ public:
     }
 
 private:
-    boost::optional<TenantId> _tenantId;
+    boost::optional<TenantId> _tenantId = boost::none;
     std::string _dbString;
-    boost::optional<std::string> _tenantDbString;
 };
 
 inline std::ostream& operator<<(std::ostream& stream, const DatabaseName& tdb) {
@@ -125,7 +117,7 @@ inline StringBuilder& operator<<(StringBuilder& builder, const DatabaseName& tdb
 }
 
 inline bool operator==(const DatabaseName& lhs, const DatabaseName& rhs) {
-    return lhs.compare(rhs) == 0;
+    return (lhs.tenantId() == rhs.tenantId()) && (lhs.db() == rhs.db());
 }
 
 inline bool operator!=(const DatabaseName& lhs, const DatabaseName& rhs) {
@@ -133,11 +125,17 @@ inline bool operator!=(const DatabaseName& lhs, const DatabaseName& rhs) {
 }
 
 inline bool operator<(const DatabaseName& lhs, const DatabaseName& rhs) {
-    return lhs.compare(rhs) < 0;
+    if (lhs.tenantId() != rhs.tenantId()) {
+        return lhs.tenantId() < rhs.tenantId();
+    }
+    return lhs.db() < rhs.db();
 }
 
 inline bool operator>(const DatabaseName& lhs, const DatabaseName& rhs) {
-    return rhs < lhs;
+    if (lhs.tenantId() != rhs.tenantId()) {
+        return lhs.tenantId() > rhs.tenantId();
+    }
+    return lhs.db() > rhs.db();
 }
 
 inline bool operator<=(const DatabaseName& lhs, const DatabaseName& rhs) {
diff --git a/src/mongo/db/tenant_database_name_test.cpp b/src/mongo/db/database_name_test.cpp
index 15ed7f9cd14..88436c5d3f1 100644
--- a/src/mongo/db/tenant_database_name_test.cpp
+++ b/src/mongo/db/database_name_test.cpp
@@ -48,7 +48,8 @@ TEST(DatabaseNameTest, MultitenancySupportDisabled) {
     ASSERT(dbnWithTenant.tenantId());
     ASSERT_EQUALS(tenantId, *dbnWithTenant.tenantId());
     ASSERT_EQUALS(std::string("a"), dbnWithTenant.db());
-    ASSERT_EQUALS(std::string(tenantId.toString() + "_a"), dbnWithTenant.toString());
+    ASSERT_EQUALS(std::string("a"), dbnWithTenant.toString());
+    ASSERT_EQUALS(std::string(tenantId.toString() + "_a"), dbnWithTenant.toStringWithTenantId());
 }
 
 TEST(DatabaseNameTest, MultitenancySupportEnabledTenantIDNotRequired) {
@@ -65,7 +66,8 @@ TEST(DatabaseNameTest, MultitenancySupportEnabledTenantIDNotRequired) {
     ASSERT(dbnWithTenant.tenantId());
     ASSERT_EQUALS(tenantId, *dbnWithTenant.tenantId());
     ASSERT_EQUALS(std::string("a"), dbnWithTenant.db());
-    ASSERT_EQUALS(std::string(tenantId.toString() + "_a"), dbnWithTenant.toString());
+    ASSERT_EQUALS(std::string("a"), dbnWithTenant.toString());
+    ASSERT_EQUALS(std::string(tenantId.toString() + "_a"), dbnWithTenant.toStringWithTenantId());
 }
 
 /*
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index ddc53c40db1..688577f8e28 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -804,6 +804,14 @@ const CollectionPtr& AutoGetCollectionForReadMaybeLockFree::getCollection() cons
     }
 }
 
+bool AutoGetCollectionForReadMaybeLockFree::isAnySecondaryNamespaceAViewOrSharded() const {
+    if (_autoGet) {
+        return _autoGet->isAnySecondaryNamespaceAViewOrSharded();
+    } else {
+        return _autoGetLockFree->isAnySecondaryNamespaceAViewOrSharded();
+    }
+}
+
 template <typename AutoGetCollectionForReadType>
 AutoGetCollectionForReadCommandBase<AutoGetCollectionForReadType>::
     AutoGetCollectionForReadCommandBase(
diff --git a/src/mongo/db/db_raii.h b/src/mongo/db/db_raii.h
index 55e96aea833..63bdf8c621d 100644
--- a/src/mongo/db/db_raii.h
+++ b/src/mongo/db/db_raii.h
@@ -184,10 +184,6 @@ public:
         Date_t deadline = Date_t::max(),
         const std::vector<NamespaceStringOrUUID>& secondaryNssOrUUIDs = {});
 
-    Database* getDb() const {
-        return _autoColl->getDb();
-    }
-
     /**
      * Indicates whether any namespace in 'secondaryNssOrUUIDs' is a view or sharded.
      *
@@ -315,6 +311,7 @@ public:
     const CollectionPtr& getCollection() const;
     const ViewDefinition* getView() const;
     const NamespaceString& getNss() const;
+    bool isAnySecondaryNamespaceAViewOrSharded() const;
 
 private:
     boost::optional<AutoGetCollectionForRead> _autoGet;
@@ -389,10 +386,6 @@ public:
         const std::vector<NamespaceStringOrUUID>& secondaryNssOrUUIDs = {})
         : AutoGetCollectionForReadCommandBase(
               opCtx, nsOrUUID, viewMode, deadline, logMode, secondaryNssOrUUIDs) {}
-
-    Database* getDb() const {
-        return _autoCollForRead.getDb();
-    }
 };
 
 /**
diff --git a/src/mongo/db/db_raii_multi_collection_test.cpp b/src/mongo/db/db_raii_multi_collection_test.cpp
index 1bbf3df0f62..cc2ce8c100f 100644
--- a/src/mongo/db/db_raii_multi_collection_test.cpp
+++ b/src/mongo/db/db_raii_multi_collection_test.cpp
@@ -27,11 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
-#include <string>
-
 #include "mongo/db/catalog/catalog_test_fixture.h"
 #include "mongo/db/client.h"
 #include "mongo/db/concurrency/lock_state.h"
@@ -41,7 +36,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -82,10 +76,11 @@ public:
     const NamespaceString _primaryNss = NamespaceString("db1.primary1");
     const NamespaceString _secondaryNss1 = NamespaceString("db1.secondary1");
     const NamespaceString _secondaryNss2 = NamespaceString("db1.secondary2");
-    const NamespaceString _secondaryNssOtherDbNss = NamespaceString("db2.secondary1");
 
     const std::vector<NamespaceStringOrUUID> _secondaryNssOrUUIDVec = {
         NamespaceStringOrUUID(_secondaryNss1), NamespaceStringOrUUID(_secondaryNss2)};
+
+    const NamespaceString _secondaryNssOtherDbNss = NamespaceString("db2.secondary1");
     const std::vector<NamespaceStringOrUUID> _secondaryNssOtherDbNssVec = {
         NamespaceStringOrUUID(_secondaryNssOtherDbNss)};
 
@@ -235,34 +230,6 @@ TEST_F(AutoGetCollectionMultiTest, LockFreeMultiDBs) {
                                                                        _secondaryNssOtherDbNss));
 }
 
-TEST_F(AutoGetCollectionMultiTest, LockedMultiDBs) {
-    auto opCtx1 = _client1.second.get();
-
-    createCollections(opCtx1);
-
-    AutoGetCollectionForRead autoGet(opCtx1,
-                                     NamespaceStringOrUUID(_primaryNss),
-                                     AutoGetCollectionViewMode::kViewsForbidden,
-                                     Date_t::max(),
-                                     _secondaryNssOtherDbNssVec);
-
-    auto locker = opCtx1->lockState();
-    locker->dump();
-    invariant(locker->isLockHeldForMode(resourceIdGlobal, MODE_IS));
-    invariant(locker->isDbLockedForMode(_primaryNss.db(), MODE_IS));
-    invariant(locker->isDbLockedForMode(_secondaryNssOtherDbNss.db(), MODE_IS));
-    // Set 'shouldConflictWithSecondaryBatchApplication' to true so isCollectionLockedForMode()
-    // doesn't return true regardless of what locks are held.
-    opCtx1->lockState()->setShouldConflictWithSecondaryBatchApplication(true);
-    invariant(locker->isCollectionLockedForMode(_primaryNss, MODE_IS));
-    invariant(locker->isCollectionLockedForMode(_secondaryNssOtherDbNss, MODE_IS));
-
-    const auto& coll = autoGet.getCollection();
-    ASSERT(coll);
-    ASSERT(CollectionCatalog::get(opCtx1)->lookupCollectionByNamespace(opCtx1,
-                                                                       _secondaryNssOtherDbNss));
-}
-
 TEST_F(AutoGetCollectionMultiTest, LockFreeSecondaryNamespaceNotFoundIsOK) {
     auto opCtx1 = _client1.second.get();
 
@@ -287,7 +254,7 @@ TEST_F(AutoGetCollectionMultiTest, LockedSecondaryNamespaceNotFound) {
                                      NamespaceStringOrUUID(_primaryNss),
                                      AutoGetCollectionViewMode::kViewsForbidden,
                                      Date_t::max(),
-                                     _secondaryNssOrUUIDAllVec);
+                                     _secondaryNssOrUUIDVec);
 
     auto locker = opCtx1->lockState();
 
@@ -301,9 +268,9 @@ TEST_F(AutoGetCollectionMultiTest, LockedSecondaryNamespaceNotFound) {
     invariant(locker->isDbLockedForMode(_primaryNss.db(), MODE_IS));
     invariant(locker->isCollectionLockedForMode(_primaryNss, MODE_IS));
 
-    for (const auto& secondaryNss : _secondaryNamespacesAll) {
+    for (const auto& secondaryNss : _secondaryNssOrUUIDVec) {
         invariant(locker->isDbLockedForMode(secondaryNss.db(), MODE_IS));
-        invariant(locker->isCollectionLockedForMode(secondaryNss, MODE_IS));
+        invariant(locker->isCollectionLockedForMode(*secondaryNss.nss(), MODE_IS));
     }
 
     const auto& coll = autoGet.getCollection();
diff --git a/src/mongo/db/dbdirectclient.cpp b/src/mongo/db/dbdirectclient.cpp
index 1304b97ad27..de53dd33bed 100644
--- a/src/mongo/db/dbdirectclient.cpp
+++ b/src/mongo/db/dbdirectclient.cpp
@@ -148,10 +148,11 @@ void DBDirectClient::say(Message& toSend, bool isRetry, string* actualServer) {
 }
 
 std::unique_ptr<DBClientCursor> DBDirectClient::find(FindCommandRequest findRequest,
-                                                     const ReadPreferenceSetting& readPref) {
+                                                     const ReadPreferenceSetting& readPref,
+                                                     ExhaustMode exhaustMode) {
     invariant(!findRequest.getReadConcern(),
               "passing readConcern to DBDirectClient::find() is not supported");
-    return DBClientBase::find(std::move(findRequest), readPref);
+    return DBClientBase::find(std::move(findRequest), readPref, exhaustMode);
 }
 
 write_ops::FindAndModifyCommandReply DBDirectClient::findAndModify(
diff --git a/src/mongo/db/dbdirectclient.h b/src/mongo/db/dbdirectclient.h
index 7c8e89d0bc2..e47b6b50ec8 100644
--- a/src/mongo/db/dbdirectclient.h
+++ b/src/mongo/db/dbdirectclient.h
@@ -58,7 +58,8 @@ public:
     using DBClientBase::update;
 
     std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
-                                         const ReadPreferenceSetting& readPref) override;
+                                         const ReadPreferenceSetting& readPref,
+                                         ExhaustMode exhaustMode) override;
 
     write_ops::FindAndModifyCommandReply findAndModify(
         const write_ops::FindAndModifyCommandRequest& findAndModify);
diff --git a/src/mongo/db/dbdirectclient_test.cpp b/src/mongo/db/dbdirectclient_test.cpp
index 19ba4c35e86..ab15c3cd7fb 100644
--- a/src/mongo/db/dbdirectclient_test.cpp
+++ b/src/mongo/db/dbdirectclient_test.cpp
@@ -171,9 +171,9 @@ TEST_F(DBDirectClientTest, ExhaustQuery) {
     ASSERT_FALSE(insertReply.getWriteErrors());
 
     // The query should work even though exhaust mode is requested.
-    int batchSize = 2;
-    auto cursor = client.query_DEPRECATED(
-        kNs, BSONObj{}, Query{}, 0 /*limit*/, 0 /*skip*/, nullptr, QueryOption_Exhaust, batchSize);
+    FindCommandRequest findCmd{kNs};
+    findCmd.setBatchSize(2);
+    auto cursor = client.find(std::move(findCmd), ReadPreferenceSetting{}, ExhaustMode::kOn);
     ASSERT_EQ(cursor->itcount(), numDocs);
 }
 
diff --git a/src/mongo/db/dbhelpers.cpp b/src/mongo/db/dbhelpers.cpp
index 40375b58ddb..4afc53b4840 100644
--- a/src/mongo/db/dbhelpers.cpp
+++ b/src/mongo/db/dbhelpers.cpp
@@ -137,14 +137,11 @@ RecordId Helpers::findOne(OperationContext* opCtx,
 }
 
 bool Helpers::findById(OperationContext* opCtx,
-                       Database* database,
                        StringData ns,
                        BSONObj query,
                        BSONObj& result,
                        bool* nsFound,
                        bool* indexFound) {
-    invariant(database);
-
     // TODO ForRead?
     NamespaceString nss{ns};
     CollectionPtr collection =
diff --git a/src/mongo/db/dbhelpers.h b/src/mongo/db/dbhelpers.h
index b975bceaf21..ecb7081f29e 100644
--- a/src/mongo/db/dbhelpers.h
+++ b/src/mongo/db/dbhelpers.h
@@ -88,7 +88,6 @@ struct Helpers {
      * Returns true if a matching document was found.
      */
     static bool findById(OperationContext* opCtx,
-                         Database* db,
                          StringData ns,
                          BSONObj query,
                          BSONObj& result,
diff --git a/src/mongo/db/dbmessage.h b/src/mongo/db/dbmessage.h
index 1f5472b2272..0b8e8ce84c7 100644
--- a/src/mongo/db/dbmessage.h
+++ b/src/mongo/db/dbmessage.h
@@ -227,7 +227,7 @@ public:
      * Indicates whether this message is expected to have a ns.
      */
     bool messageShouldHaveNs() const {
-        return (_msg.operation() >= dbUpdate) & (_msg.operation() <= dbDelete);
+        return static_cast<int>(_msg.operation() >= dbUpdate) & (_msg.operation() <= dbDelete);
     }
 
     /**
diff --git a/src/mongo/db/dollar_tenant_decoration_test.cpp b/src/mongo/db/dollar_tenant_decoration_test.cpp
deleted file mode 100644
index 391250a1791..00000000000
--- a/src/mongo/db/dollar_tenant_decoration_test.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/bson/oid.h"
-#include "mongo/db/auth/authorization_manager_impl.h"
-#include "mongo/db/auth/authorization_session.h"
-#include "mongo/db/auth/authorization_session_impl.h"
-#include "mongo/db/auth/authz_manager_external_state_mock.h"
-#include "mongo/db/auth/security_token.h"
-#include "mongo/db/multitenancy.h"
-#include "mongo/db/multitenancy_gen.h"
-#include "mongo/db/service_context_test_fixture.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo {
-
-/**
- * Encapsulation thwarting helper for authorizing a user without
- * having to set up any externalstate mocks or transport layers.
- */
-class AuthorizationSessionImplTestHelper {
-public:
-    /**
-     * Synthesize a user with the useTenant privilege and add them to the authorization session.
-     */
-    static void grantUseTenant(OperationContext* opCtx) {
-        User user(UserName("useTenant", "admin"));
-        user.setPrivileges(
-            {Privilege(ResourcePattern::forClusterResource(), ActionType::useTenant)});
-        auto* as =
-            dynamic_cast<AuthorizationSessionImpl*>(AuthorizationSession::get(opCtx->getClient()));
-        if (as->_authenticatedUser != boost::none) {
-            as->logoutAllDatabases(opCtx->getClient(), "AuthorizationSessionImplTestHelper"_sd);
-        }
-        as->_authenticatedUser = std::move(user);
-        as->_authenticationMode = AuthorizationSession::AuthenticationMode::kConnection;
-        as->_updateInternalAuthorizationState();
-    }
-};
-
-namespace {
-
-class DollarTenantDecorationTest : public ScopedGlobalServiceContextForTest, public unittest::Test {
-protected:
-    void setUp() final {
-        auto authzManagerState = std::make_unique<AuthzManagerExternalStateMock>();
-        auto authzManager = std::make_unique<AuthorizationManagerImpl>(
-            getServiceContext(), std::move(authzManagerState));
-        authzManager->setAuthEnabled(true);
-        AuthorizationManager::set(getServiceContext(), std::move(authzManager));
-
-        client = getServiceContext()->makeClient("test");
-        opCtxPtr = getServiceContext()->makeOperationContext(client.get());
-        opCtx = opCtxPtr.get();
-    }
-
-    BSONObj makeSecurityToken(const UserName& userName) {
-        constexpr auto authUserFieldName = auth::SecurityToken::kAuthenticatedUserFieldName;
-        auto authUser = userName.toBSON(true /* serialize token */);
-        ASSERT_EQ(authUser["tenant"_sd].type(), jstOID);
-        return auth::signSecurityToken(BSON(authUserFieldName << authUser));
-    }
-
-    ServiceContext::UniqueClient client;
-    ServiceContext::UniqueOperationContext opCtxPtr;
-    OperationContext* opCtx;
-};
-
-TEST_F(DollarTenantDecorationTest, ParseDollarTenantFromRequestSecurityTokenAlreadySet) {
-    gMultitenancySupport = true;
-
-    // Ensure the security token is set on the opCtx.
-    const auto kTenantId = TenantId(OID::gen());
-    auto token = makeSecurityToken(UserName("user", "admin", kTenantId));
-    auth::readSecurityTokenMetadata(opCtx, token);
-    ASSERT(getActiveTenant(opCtx));
-    ASSERT_EQ(*getActiveTenant(opCtx), kTenantId);
-
-    // Grant authorization to set $tenant.
-    AuthorizationSessionImplTestHelper::grantUseTenant(opCtx);
-
-    // The dollarTenantDecoration should not be set because the security token is already set.
-    const auto kTenantParameter = OID::gen();
-    auto opMsgRequest = OpMsgRequest::fromDBAndBody("test", BSON("$tenant" << kTenantParameter));
-    ASSERT_THROWS_CODE(
-        parseDollarTenantFromRequest(opCtx, opMsgRequest), AssertionException, 6223901);
-
-    // getActiveTenant should still return the tenantId in the security token.
-    ASSERT(getActiveTenant(opCtx));
-    ASSERT_EQ(*getActiveTenant(opCtx), kTenantId);
-}
-
-TEST_F(DollarTenantDecorationTest, ParseDollarTenantFromRequestUnauthorized) {
-    gMultitenancySupport = true;
-    const auto kOid = OID::gen();
-
-    // We are not authenticated at all.
-    auto opMsgRequest = OpMsgRequest::fromDBAndBody("test", BSON("$tenant" << kOid));
-    ASSERT_THROWS_CODE(parseDollarTenantFromRequest(opCtx, opMsgRequest),
-                       AssertionException,
-                       ErrorCodes::Unauthorized);
-    ASSERT(!getActiveTenant(opCtx));
-}
-
-TEST_F(DollarTenantDecorationTest, ParseDollarTenantMultitenancySupportDisabled) {
-    gMultitenancySupport = false;
-    const auto kOid = OID::gen();
-
-    // Grant authorization to set $tenant.
-    AuthorizationSessionImplTestHelper::grantUseTenant(opCtx);
-
-    // TenantId is passed as the '$tenant' parameter. "multitenancySupport" is disabled, so we
-    // should throw when attempting to set this tenantId on the opCtx.
-    auto opMsgRequestParameter = OpMsgRequest::fromDBAndBody("test", BSON("$tenant" << kOid));
-    ASSERT_THROWS_CODE(parseDollarTenantFromRequest(opCtx, opMsgRequestParameter),
-                       AssertionException,
-                       ErrorCodes::InvalidOptions);
-    ASSERT(!getActiveTenant(opCtx));
-}
-
-TEST_F(DollarTenantDecorationTest, ParseDollarTenantFromRequestSuccess) {
-    gMultitenancySupport = true;
-    const auto kOid = OID::gen();
-
-    // Grant authorization to set $tenant.
-    AuthorizationSessionImplTestHelper::grantUseTenant(opCtx);
-
-    // The tenantId should be successfully set because "multitenancySupport" is enabled and we're
-    // authorized.
-    auto opMsgRequest = OpMsgRequest::fromDBAndBody("test", BSON("$tenant" << kOid));
-    parseDollarTenantFromRequest(opCtx, opMsgRequest);
-
-    auto tenantId = getActiveTenant(opCtx);
-    ASSERT(tenantId);
-    ASSERT_EQ(tenantId->toString(), kOid.toString());
-}
-
-}  // namespace
-}  // namespace mongo
diff --git a/src/mongo/db/exec/add_fields_projection_executor.cpp b/src/mongo/db/exec/add_fields_projection_executor.cpp
index 592074b4834..a0fd7f08580 100644
--- a/src/mongo/db/exec/add_fields_projection_executor.cpp
+++ b/src/mongo/db/exec/add_fields_projection_executor.cpp
@@ -92,38 +92,6 @@ private:
     // The original object. Used to generate more helpful error messages.
     const BSONObj& _rawObj;
 
-    // Custom comparator that orders fieldpath strings by path prefix first, then by field.
-    struct PathPrefixComparator {
-        static constexpr char dot = '.';
-
-        // Returns true if the lhs value should sort before the rhs, false otherwise.
-        bool operator()(const std::string& lhs, const std::string& rhs) const {
-            for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) {
-                auto &lchar = lhs[pos], &rchar = rhs[pos];
-                if (lchar == rchar) {
-                    continue;
-                }
-
-                // Consider the path delimiter '.' as being less than all other characters, so that
-                // paths sort directly before any paths they prefix and directly after any paths
-                // which prefix them.
-                if (lchar == dot) {
-                    return true;
-                } else if (rchar == dot) {
-                    return false;
-                }
-
-                // Otherwise, default to normal character comparison.
-                return lchar < rchar;
-            }
-
-            // If we get here, then we have reached the end of lhs and/or rhs and all of their path
-            // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs
-            // and should sort before it.
-            return lhs.size() < rhs.size();
-        }
-    };
-
     // Tracks which paths we've seen to ensure no two paths conflict with each other.
     std::set<std::string, PathPrefixComparator> _seenPaths;
 };
diff --git a/src/mongo/db/exec/batched_delete_stage.cpp b/src/mongo/db/exec/batched_delete_stage.cpp
index 436aedc5232..588fdfe2b23 100644
--- a/src/mongo/db/exec/batched_delete_stage.cpp
+++ b/src/mongo/db/exec/batched_delete_stage.cpp
@@ -257,6 +257,16 @@ PlanStage::StageState BatchedDeleteStage::_deleteBatch(WorkingSetID* out) {
         timeInBatch = _commitBatch(out, &recordsToSkip, &docsDeleted, &bufferOffset);
     } catch (const WriteConflictException&) {
         return _prepareToRetryDrainAfterWCE(out, recordsToSkip);
+    } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+        if (ex->getVersionReceived() == ChunkVersion::IGNORED() && ex->getCriticalSectionSignal()) {
+            // If ChunkVersion is IGNORED and we encountered a critical section, then yield, wait
+            // for critical section to finish and then we'll resume the write from the point we had
+            // left. We do this to prevent large multi-writes from repeatedly failing due to
+            // StaleConfig and exhausting the mongos retry attempts.
+            planExecutorShardingCriticalSectionFuture(opCtx()) = ex->getCriticalSectionSignal();
+            return _prepareToRetryDrainAfterWCE(out, recordsToSkip);
+        }
+        throw;
     }
 
     incrementSSSMetricNoOverflow(batchedDeletesSSS.docs, docsDeleted);
diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp
index 0651aae78ee..43ccca4a13a 100644
--- a/src/mongo/db/exec/bucket_unpacker.cpp
+++ b/src/mongo/db/exec/bucket_unpacker.cpp
@@ -240,29 +240,14 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
             policy, matchExpr, "can't handle string comparison with a non-default collation"_sd);
     }
 
-    // We must avoid mapping predicates on the meta field onto the control field. These should be
-    // mapped to the meta field instead.
-    //
-    // You might think these were handled earlier, by splitting the match expression into a
-    // metadata-only part, and measurement/time-only part. However, splitting a $match into two
-    // sequential $matches only works when splitting a conjunction. A predicate like
-    // {$or: [ {a: 5}, {meta.b: 5} ]} cannot be split, and can't be metadata-only, so we have to
-    // handle it here.
+    // This function only handles time and measurement predicates--not metadata.
     if (bucketSpec.metaField() &&
         (matchExprPath == bucketSpec.metaField().get() ||
          expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) {
-
-        if (haveComputedMetaField)
-            return handleIneligible(policy, matchExpr, "can't handle a computed meta field");
-
-        if (!includeMetaField)
-            return handleIneligible(policy, matchExpr, "cannot handle an excluded meta field");
-
-        auto result = matchExpr->shallowClone();
-        expression::applyRenamesToExpression(
-            result.get(),
-            {{bucketSpec.metaField().get(), timeseries::kBucketMetaFieldName.toString()}});
-        return result;
+        tasserted(
+            6707200,
+            str::stream() << "createComparisonPredicate() does not handle metadata predicates: "
+                          << matchExpr);
     }
 
     // We must avoid mapping predicates on fields computed via $addFields or a computed $project.
@@ -456,6 +441,33 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField(
 
     tassert(5916304, "BucketSpec::createPredicatesOnBucketLevelField nullptr", matchExpr);
 
+    // If we have a leaf predicate on a meta field, we can map it to the bucket's meta field.
+    // This includes comparisons such as $eq and $lte, as well as other non-comparison predicates
+    // such as $exists, $mod, or $elemMatch.
+    //
+    // Metadata predicates are partially handled earlier, by splitting the match expression into a
+    // metadata-only part, and measurement/time-only part. However, splitting a $match into two
+    // sequential $matches only works when splitting a conjunction. A predicate like
+    // {$or: [ {a: 5}, {meta.b: 5} ]} can't be split, and can't be metadata-only, so we have to
+    // handle it here.
+    const auto matchExprPath = matchExpr->path();
+    if (!matchExprPath.empty() && bucketSpec.metaField() &&
+        (matchExprPath == bucketSpec.metaField().get() ||
+         expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) {
+
+        if (haveComputedMetaField)
+            return handleIneligible(policy, matchExpr, "can't handle a computed meta field");
+
+        if (!includeMetaField)
+            return handleIneligible(policy, matchExpr, "cannot handle an excluded meta field");
+
+        auto result = matchExpr->shallowClone();
+        expression::applyRenamesToExpression(
+            result.get(),
+            {{bucketSpec.metaField().get(), timeseries::kBucketMetaFieldName.toString()}});
+        return result;
+    }
+
     if (matchExpr->matchType() == MatchExpression::AND) {
         auto nextAnd = static_cast<const AndMatchExpression*>(matchExpr);
         auto andMatchExpr = std::make_unique<AndMatchExpression>();
@@ -606,7 +618,7 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField(
     return handleIneligible(policy, matchExpr, "can't handle this predicate");
 }
 
-BSONObj BucketSpec::pushdownPredicate(
+std::pair<bool, BSONObj> BucketSpec::pushdownPredicate(
     const boost::intrusive_ptr<ExpressionContext>& expCtx,
     const TimeseriesOptions& tsOptions,
     ExpressionContext::CollationMatchesDefault collationMatchesDefault,
@@ -666,7 +678,7 @@ BSONObj BucketSpec::pushdownPredicate(
         metaOnlyPredicate->serialize(&result);
     if (bucketMetricPredicate)
         bucketMetricPredicate->serialize(&result);
-    return result.obj();
+    return std::make_pair(bucketMetricPredicate.get(), result.obj());
 }
 
 class BucketUnpacker::UnpackingImpl {
diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h
index 287bd9f2540..7e32629407d 100644
--- a/src/mongo/db/exec/bucket_unpacker.h
+++ b/src/mongo/db/exec/bucket_unpacker.h
@@ -167,8 +167,11 @@ public:
      *
      * When using IneligiblePredicatePolicy::kIgnore, if the predicate can't be pushed down, it
      * returns null. When using IneligiblePredicatePolicy::kError it raises a user error.
+     *
+     * Returns a boolean (alongside the bucket-level predicate) describing if the result contains
+     * a metric predicate.
      */
-    static BSONObj pushdownPredicate(
+    static std::pair<bool, BSONObj> pushdownPredicate(
         const boost::intrusive_ptr<ExpressionContext>& expCtx,
         const TimeseriesOptions& tsOptions,
         ExpressionContext::CollationMatchesDefault collationMatchesDefault,
diff --git a/src/mongo/db/exec/bucket_unpacker_test.cpp b/src/mongo/db/exec/bucket_unpacker_test.cpp
index 8ee0f4e05f5..ce5065318be 100644
--- a/src/mongo/db/exec/bucket_unpacker_test.cpp
+++ b/src/mongo/db/exec/bucket_unpacker_test.cpp
@@ -220,7 +220,8 @@ TEST_F(BucketUnpackerTest, ExcludeASingleField) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, EmptyIncludeGetsEmptyMeasurements) {
@@ -246,7 +247,8 @@ TEST_F(BucketUnpackerTest, EmptyIncludeGetsEmptyMeasurements) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, EmptyExcludeMaterializesAllFields) {
@@ -274,7 +276,8 @@ TEST_F(BucketUnpackerTest, EmptyExcludeMaterializesAllFields) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, SparseColumnsWhereOneColumnIsExhaustedBeforeTheOther) {
@@ -300,7 +303,8 @@ TEST_F(BucketUnpackerTest, SparseColumnsWhereOneColumnIsExhaustedBeforeTheOther)
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, UnpackBasicIncludeWithDollarPrefix) {
@@ -329,7 +333,8 @@ TEST_F(BucketUnpackerTest, UnpackBasicIncludeWithDollarPrefix) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, BucketsWithMetadataOnly) {
@@ -354,7 +359,8 @@ TEST_F(BucketUnpackerTest, BucketsWithMetadataOnly) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, UnorderedRowKeysDoesntAffectMaterialization) {
@@ -412,7 +418,8 @@ TEST_F(BucketUnpackerTest, MissingMetaFieldDoesntMaterializeMetadata) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, MissingMetaFieldDoesntMaterializeMetadataUnorderedKeys) {
@@ -467,7 +474,8 @@ TEST_F(BucketUnpackerTest, ExcludedMetaFieldDoesntMaterializeMetadataWhenBucketH
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUndefinedMeta) {
@@ -486,7 +494,8 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUndefinedMeta) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUnexpectedMeta) {
@@ -506,7 +515,8 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUnexpectedMeta) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, NullMetaInBucketMaterializesAsNull) {
@@ -533,7 +543,8 @@ TEST_F(BucketUnpackerTest, NullMetaInBucketMaterializesAsNull) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, GetNextHandlesMissingMetaInBucket) {
@@ -565,7 +576,8 @@ TEST_F(BucketUnpackerTest, GetNextHandlesMissingMetaInBucket) {
     };
 
     test(bucket);
-    test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+    test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+              .compressedBucket);
 }
 
 TEST_F(BucketUnpackerTest, EmptyDataRegionInBucketIsTolerated) {
@@ -887,7 +899,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedCountLess) {
         "a:{'0':1, '1':2}, b:{'1':1}}}");
 
     auto compressedBucket =
-        timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+        timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+            .compressedBucket;
     // Reduce the count by one to be 1.
     auto modifiedCompressedBucket = modifyCompressedBucketElementCount(*compressedBucket, -1);
 
@@ -922,7 +935,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedCountMore) {
         "a:{'0':1, '1':2}, b:{'1':1}}}");
 
     auto compressedBucket =
-        timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+        timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+            .compressedBucket;
     // Increase the count by one to be 3.
     auto modifiedCompressedBucket = modifyCompressedBucketElementCount(*compressedBucket, 1);
 
@@ -957,7 +971,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedCountMissing) {
         "a:{'0':1, '1':2}, b:{'1':1}}}");
 
     auto compressedBucket =
-        timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+        timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+            .compressedBucket;
     // Remove the count field
     auto modifiedCompressedBucket = modifyCompressedBucketElementCount(*compressedBucket, 0);
 
@@ -993,7 +1008,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedElementMismatchDataField) {
         "a:{'0':1, '1':2}, b:{'1':1}}}");
 
     auto compressedBucket =
-        timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+        timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+            .compressedBucket;
     // Remove an element in the "a" field.
     auto modifiedCompressedBucket =
         modifyCompressedBucketRemoveLastInField(*compressedBucket, "a"_sd);
@@ -1028,7 +1044,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedElementMismatchTimeField) {
         "a:{'0':1, '1':2}, b:{'1':1}}}");
 
     auto compressedBucket =
-        timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+        timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+            .compressedBucket;
     // Remove an element in the time field
     auto modifiedCompressedBucket =
         modifyCompressedBucketRemoveLastInField(*compressedBucket, "time"_sd);
diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp
index b8f0df82dcd..19d8033d3f1 100644
--- a/src/mongo/db/exec/collection_scan.cpp
+++ b/src/mongo/db/exec/collection_scan.cpp
@@ -80,7 +80,7 @@ CollectionScan::CollectionScan(ExpressionContext* expCtx,
         // The 'minRecord' and 'maxRecord' parameters are used for a special optimization that
         // applies only to forwards scans of the oplog and scans on clustered collections.
         invariant(!params.resumeAfterRecordId);
-        if (collection->ns().isOplog()) {
+        if (collection->ns().isOplogOrChangeCollection()) {
             invariant(params.direction == CollectionScanParams::FORWARD);
         } else {
             invariant(collection->isClustered());
@@ -109,17 +109,26 @@ CollectionScan::CollectionScan(ExpressionContext* expCtx,
                 "collection scan bounds",
                 "min"_attr = (!_params.minRecord) ? "none" : _params.minRecord->toString(),
                 "max"_attr = (!_params.maxRecord) ? "none" : _params.maxRecord->toString());
-    invariant(!_params.shouldTrackLatestOplogTimestamp || collection->ns().isOplog());
-
-    if (params.assertTsHasNotFallenOffOplog) {
-        invariant(params.shouldTrackLatestOplogTimestamp);
-        invariant(params.direction == CollectionScanParams::FORWARD);
+    tassert(6521000,
+            "Expected an oplog or a change collection with 'shouldTrackLatestOplogTimestamp'",
+            !_params.shouldTrackLatestOplogTimestamp ||
+                collection->ns().isOplogOrChangeCollection());
+
+    if (params.assertTsHasNotFallenOff) {
+        tassert(6521001,
+                "Expected 'shouldTrackLatestOplogTimestamp' with 'assertTsHasNotFallenOff'",
+                params.shouldTrackLatestOplogTimestamp);
+        tassert(6521002,
+                "Expected forward collection scan with 'assertTsHasNotFallenOff'",
+                params.direction == CollectionScanParams::FORWARD);
     }
 
     if (params.resumeAfterRecordId) {
         // The 'resumeAfterRecordId' parameter is used for resumable collection scans, which we
         // only support in the forward direction.
-        invariant(params.direction == CollectionScanParams::FORWARD);
+        tassert(6521003,
+                "Expected forward collection scan with 'resumeAfterRecordId'",
+                params.direction == CollectionScanParams::FORWARD);
     }
 }
 
@@ -227,8 +236,8 @@ PlanStage::StageState CollectionScan::doWork(WorkingSetID* out) {
     }
 
     _lastSeenId = record->id;
-    if (_params.assertTsHasNotFallenOffOplog) {
-        assertTsHasNotFallenOffOplog(*record);
+    if (_params.assertTsHasNotFallenOff) {
+        assertTsHasNotFallenOff(*record);
     }
     if (_params.shouldTrackLatestOplogTimestamp) {
         setLatestOplogEntryTimestamp(*record);
@@ -259,22 +268,28 @@ void CollectionScan::setLatestOplogEntryTimestamp(const Record& record) {
     _latestOplogEntryTimestamp = std::max(_latestOplogEntryTimestamp, tsElem.timestamp());
 }
 
-void CollectionScan::assertTsHasNotFallenOffOplog(const Record& record) {
-    // If the first entry we see in the oplog is the replset initialization, then it doesn't matter
-    // if its timestamp is later than the timestamp that should not have fallen off the oplog; no
-    // events earlier can have fallen off this oplog. Otherwise, verify that the timestamp of the
-    // first observed oplog entry is earlier than or equal to timestamp that should not have fallen
-    // off the oplog.
+void CollectionScan::assertTsHasNotFallenOff(const Record& record) {
     auto oplogEntry = uassertStatusOK(repl::OplogEntry::parse(record.data.toBson()));
     invariant(_specificStats.docsTested == 0);
+
+    // If the first entry we see in the oplog is the replset initialization, then it doesn't matter
+    // if its timestamp is later than the timestamp that should not have fallen off the oplog; no
+    // events earlier can have fallen off this oplog.
+    // NOTE: A change collection can be created at any moment as such it might not have replset
+    // initialization message, as such this case is not fully applicable for the change collection.
     const bool isNewRS =
         oplogEntry.getObject().binaryEqual(BSON("msg" << repl::kInitiatingSetMsg)) &&
         oplogEntry.getOpType() == repl::OpTypeEnum::kNoop;
+
+    // Verify that the timestamp of the first observed oplog entry is earlier than or equal to
+    // timestamp that should not have fallen off the oplog.
+    const bool tsHasNotFallenOff = oplogEntry.getTimestamp() <= *_params.assertTsHasNotFallenOff;
+
     uassert(ErrorCodes::OplogQueryMinTsMissing,
             "Specified timestamp has already fallen off the oplog",
-            isNewRS || oplogEntry.getTimestamp() <= *_params.assertTsHasNotFallenOffOplog);
+            isNewRS || tsHasNotFallenOff);
     // We don't need to check this assertion again after we've confirmed the first oplog event.
-    _params.assertTsHasNotFallenOffOplog = boost::none;
+    _params.assertTsHasNotFallenOff = boost::none;
 }
 
 namespace {
diff --git a/src/mongo/db/exec/collection_scan.h b/src/mongo/db/exec/collection_scan.h
index f9ce637dbad..a3737635ad6 100644
--- a/src/mongo/db/exec/collection_scan.h
+++ b/src/mongo/db/exec/collection_scan.h
@@ -117,7 +117,7 @@ private:
     /**
      * Asserts that the minimum timestamp in the query filter has not already fallen off the oplog.
      */
-    void assertTsHasNotFallenOffOplog(const Record& record);
+    void assertTsHasNotFallenOff(const Record& record);
 
     // WorkingSet is not owned by us.
     WorkingSet* _workingSet;
diff --git a/src/mongo/db/exec/collection_scan_common.h b/src/mongo/db/exec/collection_scan_common.h
index ba5559a4491..a0e550a904d 100644
--- a/src/mongo/db/exec/collection_scan_common.h
+++ b/src/mongo/db/exec/collection_scan_common.h
@@ -98,7 +98,7 @@ struct CollectionScanParams {
     bool tailable = false;
 
     // Assert that the specified timestamp has not fallen off the oplog on a forward scan.
-    boost::optional<Timestamp> assertTsHasNotFallenOffOplog = boost::none;
+    boost::optional<Timestamp> assertTsHasNotFallenOff = boost::none;
 
     // Should we keep track of the timestamp of the latest oplog entry we've seen? This information
     // is needed to merge cursors from the oplog in order of operation time when reading the oplog
diff --git a/src/mongo/db/exec/delete_stage.cpp b/src/mongo/db/exec/delete_stage.cpp
index 75ae33e9dc8..331a2293680 100644
--- a/src/mongo/db/exec/delete_stage.cpp
+++ b/src/mongo/db/exec/delete_stage.cpp
@@ -180,23 +180,38 @@ PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
 
     bool writeToOrphan = false;
     if (!_params->isExplain && !_params->fromMigrate) {
-        const auto action = _preWriteFilter.computeAction(member->doc.value());
-        if (action == write_stage_common::PreWriteFilter::Action::kSkip) {
-            LOGV2_DEBUG(5983201,
-                        3,
-                        "Skipping delete operation to orphan document to prevent a wrong change "
-                        "stream event",
-                        "namespace"_attr = collection()->ns(),
-                        "record"_attr = member->doc.value());
-            return PlanStage::NEED_TIME;
-        } else if (action == write_stage_common::PreWriteFilter::Action::kWriteAsFromMigrate) {
-            LOGV2_DEBUG(6184700,
-                        3,
-                        "Marking delete operation to orphan document with the fromMigrate flag "
-                        "to prevent a wrong change stream event",
-                        "namespace"_attr = collection()->ns(),
-                        "record"_attr = member->doc.value());
-            writeToOrphan = true;
+        try {
+            const auto action = _preWriteFilter.computeAction(member->doc.value());
+            if (action == write_stage_common::PreWriteFilter::Action::kSkip) {
+                LOGV2_DEBUG(
+                    5983201,
+                    3,
+                    "Skipping delete operation to orphan document to prevent a wrong change "
+                    "stream event",
+                    "namespace"_attr = collection()->ns(),
+                    "record"_attr = member->doc.value());
+                return PlanStage::NEED_TIME;
+            } else if (action == write_stage_common::PreWriteFilter::Action::kWriteAsFromMigrate) {
+                LOGV2_DEBUG(6184700,
+                            3,
+                            "Marking delete operation to orphan document with the fromMigrate flag "
+                            "to prevent a wrong change stream event",
+                            "namespace"_attr = collection()->ns(),
+                            "record"_attr = member->doc.value());
+                writeToOrphan = true;
+            }
+        } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+            if (ex->getVersionReceived() == ChunkVersion::IGNORED() &&
+                ex->getCriticalSectionSignal()) {
+                // If ChunkVersion is IGNORED and we encountered a critical section, then yield,
+                // wait for the critical section to finish and then we'll resume the write from the
+                // point we had left. We do this to prevent large multi-writes from repeatedly
+                // failing due to StaleConfig and exhausting the mongos retry attempts.
+                planExecutorShardingCriticalSectionFuture(opCtx()) = ex->getCriticalSectionSignal();
+                memberFreer.dismiss();  // Keep this member around so we can retry deleting it.
+                return prepareToRetryWSM(id, out);
+            }
+            throw;
         }
     }
 
@@ -237,6 +252,18 @@ PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
         } catch (const WriteConflictException&) {
             memberFreer.dismiss();  // Keep this member around so we can retry deleting it.
             return prepareToRetryWSM(id, out);
+        } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+            if (ex->getVersionReceived() == ChunkVersion::IGNORED() &&
+                ex->getCriticalSectionSignal()) {
+                // If ChunkVersion is IGNORED and we encountered a critical section, then yield,
+                // wait for the critical section to finish and then we'll resume the write from the
+                // point we had left. We do this to prevent large multi-writes from repeatedly
+                // failing due to StaleConfig and exhausting the mongos retry attempts.
+                planExecutorShardingCriticalSectionFuture(opCtx()) = ex->getCriticalSectionSignal();
+                memberFreer.dismiss();  // Keep this member around so we can retry deleting it.
+                return prepareToRetryWSM(id, out);
+            }
+            throw;
         }
     }
     _specificStats.docsDeleted += _params->numStatsForDoc ? _params->numStatsForDoc(bsonObjDoc) : 1;
diff --git a/src/mongo/db/exec/multi_plan.cpp b/src/mongo/db/exec/multi_plan.cpp
index 1db8860dc2e..0dbb0c4a405 100644
--- a/src/mongo/db/exec/multi_plan.cpp
+++ b/src/mongo/db/exec/multi_plan.cpp
@@ -46,6 +46,7 @@
 #include "mongo/db/query/classic_plan_cache.h"
 #include "mongo/db/query/collection_query_info.h"
 #include "mongo/db/query/explain.h"
+#include "mongo/db/query/multiple_collection_accessor.h"
 #include "mongo/db/query/plan_cache_key_factory.h"
 #include "mongo/db/query/plan_ranker.h"
 #include "mongo/db/query/plan_ranker_util.h"
@@ -280,8 +281,12 @@ Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
         }
     }
 
-    plan_cache_util::updatePlanCache(
-        expCtx()->opCtx, collection(), _cachingMode, *_query, std::move(ranking), _candidates);
+    plan_cache_util::updatePlanCache(expCtx()->opCtx,
+                                     MultipleCollectionAccessor(collection()),
+                                     _cachingMode,
+                                     *_query,
+                                     std::move(ranking),
+                                     _candidates);
 
     return Status::OK();
 }
diff --git a/src/mongo/db/exec/plan_cache_util.cpp b/src/mongo/db/exec/plan_cache_util.cpp
index 85d5c823849..a3fc5ff19d1 100644
--- a/src/mongo/db/exec/plan_cache_util.cpp
+++ b/src/mongo/db/exec/plan_cache_util.cpp
@@ -74,17 +74,17 @@ void logNotCachingNoData(std::string&& solution) {
 }  // namespace log_detail
 
 void updatePlanCache(OperationContext* opCtx,
-                     const CollectionPtr& collection,
+                     const MultipleCollectionAccessor& collections,
                      const CanonicalQuery& query,
                      const QuerySolution& solution,
                      const sbe::PlanStage& root,
                      const stage_builder::PlanStageData& data) {
-    // TODO SERVER-61507: Integration between lowering parts of aggregation pipeline into the find
-    // subsystem and the new SBE cache isn't implemented yet. Remove cq->pipeline().empty() check
-    // once it's implemented.
-    if (shouldCacheQuery(query) && collection && query.pipeline().empty() &&
+    // TODO SERVER-61507: Remove canUseSbePlanCache check once $group pushdown is
+    // integrated with SBE plan cache.
+    if (shouldCacheQuery(query) && collections.getMainCollection() &&
+        canonical_query_encoder::canUseSbePlanCache(query) &&
         feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
-        auto key = plan_cache_key_factory::make<sbe::PlanCacheKey>(query, collection);
+        auto key = plan_cache_key_factory::make(query, collections);
         auto plan = std::make_unique<sbe::CachedSbePlan>(root.clone(), data);
         plan->indexFilterApplied = solution.indexFilterApplied;
         sbe::getPlanCache(opCtx).setPinned(
diff --git a/src/mongo/db/exec/plan_cache_util.h b/src/mongo/db/exec/plan_cache_util.h
index 630458cbcd4..2fb16d8be89 100644
--- a/src/mongo/db/exec/plan_cache_util.h
+++ b/src/mongo/db/exec/plan_cache_util.h
@@ -32,6 +32,7 @@
 #include "mongo/db/exec/plan_stats.h"
 #include "mongo/db/query/canonical_query.h"
 #include "mongo/db/query/collection_query_info.h"
+#include "mongo/db/query/multiple_collection_accessor.h"
 #include "mongo/db/query/plan_cache_debug_info.h"
 #include "mongo/db/query/plan_cache_key_factory.h"
 #include "mongo/db/query/plan_explainer_factory.h"
@@ -98,7 +99,7 @@ plan_cache_debug_info::DebugInfoSBE buildDebugInfo(const QuerySolution* solution
 template <typename PlanStageType, typename ResultType, typename Data>
 void updatePlanCache(
     OperationContext* opCtx,
-    const CollectionPtr& collection,
+    const MultipleCollectionAccessor& collections,
     PlanCachingMode cachingMode,
     const CanonicalQuery& query,
     std::unique_ptr<plan_ranker::PlanRankingDecision> ranking,
@@ -183,6 +184,7 @@ void updatePlanCache(
                 callbacks{query, buildDebugInfoFn};
             winningPlan.solution->cacheData->indexFilterApplied =
                 winningPlan.solution->indexFilterApplied;
+            auto& collection = collections.getMainCollection();
             uassertStatusOK(CollectionQueryInfo::get(collection)
                                 .getPlanCache()
                                 ->set(plan_cache_key_factory::make<PlanCacheKey>(query, collection),
@@ -195,10 +197,10 @@ void updatePlanCache(
 
         if (winningPlan.solution->cacheData != nullptr) {
             if constexpr (std::is_same_v<PlanStageType, std::unique_ptr<sbe::PlanStage>>) {
-                // TODO SERVER-61507: Integration between lowering parts of aggregation pipeline
-                // into the find subsystem and the new SBE cache isn't implemented yet.
+                // TODO SERVER-61507: Remove canUseSbePlanCache check once $group pushdown
+                // is integrated with SBE plan cache.
                 if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
-                    query.pipeline().empty()) {
+                    canonical_query_encoder::canUseSbePlanCache(query)) {
                     tassert(6142201,
                             "The winning CandidatePlan should contain the original plan",
                             winningPlan.clonedPlan);
@@ -215,16 +217,16 @@ void updatePlanCache(
                                            plan_cache_debug_info::DebugInfoSBE>
                         callbacks{query, buildDebugInfoFn};
                     uassertStatusOK(sbe::getPlanCache(opCtx).set(
-                        plan_cache_key_factory::make<sbe::PlanCacheKey>(query, collection),
+                        plan_cache_key_factory::make(query, collections),
                         std::move(cachedPlan),
                         *rankingDecision,
                         opCtx->getServiceContext()->getPreciseClockSource()->now(),
                         &callbacks,
                         boost::none /* worksGrowthCoefficient */));
                 } else {
-                    // TODO(SERVER-61507, SERVER-64882): Fall back to use the classic plan cache.
-                    // Remove this branch after "gFeatureFlagSbePlanCache" is removed and lowering
-                    // parts of pipeline is integrated with SBE cache.
+                    // TODO(SERVER-64882, SERVER-61507): Fall back to use the classic plan cache.
+                    // Remove this branch after "gFeatureFlagSbePlanCache" is removed and $group
+                    // pushdown is integrated with SBE plan cache.
                     cacheClassicPlan();
                 }
             } else {
@@ -245,7 +247,7 @@ void updatePlanCache(
  * the cache, the plan immediately becomes "active".
  */
 void updatePlanCache(OperationContext* opCtx,
-                     const CollectionPtr& collection,
+                     const MultipleCollectionAccessor& collections,
                      const CanonicalQuery& query,
                      const QuerySolution& solution,
                      const sbe::PlanStage& root,
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript
index 6a2503d7f26..6ee97450f2b 100644
--- a/src/mongo/db/exec/sbe/SConscript
+++ b/src/mongo/db/exec/sbe/SConscript
@@ -223,6 +223,7 @@ env.CppUnitTest(
         '$BUILD_DIR/mongo/db/query/collation/collator_interface_mock',
         '$BUILD_DIR/mongo/db/service_context_d_test_fixture',
         '$BUILD_DIR/mongo/db/service_context_test_fixture',
+        '$BUILD_DIR/mongo/util/pcre_wrapper',
         'sbe_plan_stage_test',
     ],
 )
diff --git a/src/mongo/db/exec/sbe/abt/abt_lower.cpp b/src/mongo/db/exec/sbe/abt/abt_lower.cpp
index a706d150d61..25ea76bd8d0 100644
--- a/src/mongo/db/exec/sbe/abt/abt_lower.cpp
+++ b/src/mongo/db/exec/sbe/abt/abt_lower.cpp
@@ -990,9 +990,20 @@ std::unique_ptr<sbe::PlanStage> SBENodeLowering::walk(const IndexScanNode& n, co
     generateSlots(fieldProjectionMap, ridSlot, rootSlot, fields, vars);
     uassert(6624233, "Cannot deliver root projection in this context", !rootSlot.has_value());
 
+    std::vector<std::pair<size_t, sbe::value::SlotId>> indexVars;
     sbe::IndexKeysInclusionSet indexKeysToInclude;
-    for (const std::string& fieldName : fields) {
-        indexKeysToInclude.set(decodeIndexKeyName(fieldName), true);
+
+    for (size_t index = 0; index < fields.size(); index++) {
+        const size_t indexFieldPos = decodeIndexKeyName(fields.at(index));
+        indexVars.emplace_back(indexFieldPos, vars.at(index));
+        indexKeysToInclude.set(indexFieldPos, true);
+    }
+
+    // Make sure vars are in sorted order on index field position.
+    std::sort(indexVars.begin(), indexVars.end());
+    vars.clear();
+    for (const auto& [indexFieldPos, slot] : indexVars) {
+        vars.push_back(slot);
     }
 
     auto lowerBoundExpr = convertBoundsToExpr(true /*isLower*/, indexDef, interval);
diff --git a/src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp b/src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp
index 252b7ce52b4..34aa0441994 100644
--- a/src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp
+++ b/src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp
@@ -247,6 +247,14 @@ TEST_F(NodeSBE, DiffTest) {
 
     ASSERT_TRUE(compare("[{$match: {'a': {$ne: 2}}}]",
                         {"{a: 1}", "{a: 2}", "{a: [1, 2]}", "{a: [1]}", "{a: [2]}"}));
+
+
+    ASSERT_TRUE(compare("[{$project: {concat: {$concat: ['$a', ' - ', '$b', ' - ', '$c']}}}]",
+                        {"{a: 'a1', b: 'b1', c: 'c1'}"}));
+    ASSERT_TRUE(compare(
+        "[{$project: {res1: {$divide: ['$a', '$b']}, res2: {$divide: ['$c', '$a']}, res3: {$mod: "
+        "['$d', '$b']}, res4: {$abs: '$e'}, res5: {$floor: '$f'}, res6: {$ceil: {$ln: '$d'}}}}]",
+        {"{a: 5, b: 10, c: 20, d: 25, e: -5, f: 2.4}"}));
 }
 
 }  // namespace
diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp
index 61812667316..970543b706a 100644
--- a/src/mongo/db/exec/sbe/expressions/expression.cpp
+++ b/src/mongo/db/exec/sbe/expressions/expression.cpp
@@ -582,6 +582,11 @@ static stdx::unordered_map<std::string, InstrFn> kInstrFunctions = {
     {"collMin", InstrFn{[](size_t n) { return n == 2; }, &vm::CodeFragment::appendCollMin, true}},
     {"collMax", InstrFn{[](size_t n) { return n == 2; }, &vm::CodeFragment::appendCollMax, true}},
     {"mod", InstrFn{[](size_t n) { return n == 2; }, &vm::CodeFragment::appendMod, false}},
+    // Note that we do not provide a pointer to a function for appending the 'applyClassicMatcher'
+    // instruction, because it's required that the first argument to applyClassicMatcher be a
+    // constant MatchExpression. This constant is stored as part of the bytecode itself, to avoid
+    // the stack manipulation overhead.
+    {"applyClassicMatcher", InstrFn{[](size_t n) { return n == 2; }, nullptr, false}},
 };
 }  // namespace
 
@@ -689,6 +694,18 @@ vm::CodeFragment EFunction::compileDirect(CompileCtx& ctx) const {
             code.appendTraverseP(bodyPosition);
 
             return code;
+        } else if (_name == "applyClassicMatcher") {
+            tassert(6681400,
+                    "First argument to applyClassicMatcher must be constant",
+                    _nodes[0]->as<EConstant>());
+            auto [matcherTag, matcherVal] = _nodes[0]->as<EConstant>()->getConstant();
+            tassert(6681409,
+                    "First argument to applyClassicMatcher must be a classic matcher",
+                    matcherTag == value::TypeTags::classicMatchExpresion);
+
+            code.append(_nodes[1]->compileDirect(ctx));
+            code.appendApplyClassicMatcher(value::getClassicMatchExpressionView(matcherVal));
+            return code;
         }
 
         // The order of evaluation is flipped for instruction functions. We may want to change the
diff --git a/src/mongo/db/exec/sbe/sbe_test.cpp b/src/mongo/db/exec/sbe/sbe_test.cpp
index 323368a5334..5a577f02462 100644
--- a/src/mongo/db/exec/sbe/sbe_test.cpp
+++ b/src/mongo/db/exec/sbe/sbe_test.cpp
@@ -31,6 +31,7 @@
 #include "mongo/db/exec/sbe/values/value.h"
 #include "mongo/db/exec/sbe/vm/vm.h"
 #include "mongo/unittest/unittest.h"
+#include "mongo/util/pcre.h"
 
 namespace mongo::sbe {
 
@@ -421,6 +422,85 @@ TEST(SBEVM, ConvertBinDataToBsonObj) {
 
 namespace {
 
+// The hex representation of memory addresses in the output of CodeFragment::toString() differs on
+// Linux and Windows machines so 'addrPattern' is used to cover both cases.
+static const std::string kLinuxAddrPattern{"(0x[a-f0-9]+)"};
+static const std::string kWindowsAddrPattern{"([A-F0-9]+)"};
+static const std::string kAddrPattern{"(" + kLinuxAddrPattern + "|" + kWindowsAddrPattern + ")"};
+
+// The beginning of the output from CodeFragment::toString() gives a range of the addresses that
+// 'pcPointer' will traverse.
+static const std::string kPcPointerRangePattern{"(\\[" + kAddrPattern + ")-(" + kAddrPattern +
+                                                ")\\])"};
+
+/**
+ * Creates a pcre pattern to match the instructions in the output of CodeFragment::toString(). Any
+ * arguments must be passed in a single comma separated string, and no arguments can be represented
+ * using an empty string.
+ */
+std::string instrPattern(std::string op, std::string args) {
+    return "(" + kAddrPattern + ": " + op + "\\(" + args + "\\); )";
+}
+}  // namespace
+
+TEST(SBEVM, CodeFragmentToString) {
+    {
+        vm::CodeFragment code;
+        std::string toStringPattern{kPcPointerRangePattern + "( )"};
+
+        code.appendDiv();
+        toStringPattern += instrPattern("div", "");
+        code.appendMul();
+        toStringPattern += instrPattern("mul", "");
+        code.appendAdd();
+        toStringPattern += instrPattern("add", "");
+
+        std::string instrs = code.toString();
+
+        static const pcre::Regex validToStringOutput{toStringPattern};
+
+        ASSERT_TRUE(!!validToStringOutput.matchView(instrs));
+    }
+}
+
+TEST(SBEVM, CodeFragmentToStringArgs) {
+    {
+        vm::CodeFragment code;
+        std::string toStringPattern{kAddrPattern};
+
+        code.appendFillEmpty(vm::Instruction::True);
+        toStringPattern += instrPattern("fillEmptyConst", "k: True");
+        code.appendFillEmpty(vm::Instruction::Null);
+        toStringPattern += instrPattern("fillEmptyConst", "k: Null");
+        code.appendFillEmpty(vm::Instruction::False);
+        toStringPattern += instrPattern("fillEmptyConst", "k: False");
+
+        code.appendTraverseP(0xAA);
+        auto offsetP = 0xAA - code.instrs().size();
+        toStringPattern += instrPattern("traversePConst", "offset: " + std::to_string(offsetP));
+        code.appendTraverseF(0xBB, vm::Instruction::True);
+        auto offsetF = 0xBB - code.instrs().size();
+        toStringPattern +=
+            instrPattern("traverseFConst", "k: True, offset: " + std::to_string(offsetF));
+
+        auto [tag, val] = value::makeNewString("Hello world!");
+        value::ValueGuard guard{tag, val};
+        code.appendGetField(tag, val);
+        toStringPattern += instrPattern("getFieldConst", "value: \"Hello world!\"");
+
+        code.appendAdd();
+        toStringPattern += instrPattern("add", "");
+
+        std::string instrs = code.toString();
+
+        static const pcre::Regex validToStringOutput{toStringPattern};
+
+        ASSERT_TRUE(!!validToStringOutput.matchView(instrs));
+    }
+}
+
+namespace {
+
 /**
  * Fills bytes after the null terminator in the string with 'pattern'.
  *
diff --git a/src/mongo/db/exec/sbe/stages/branch.cpp b/src/mongo/db/exec/sbe/stages/branch.cpp
index bec12b12ee2..adbbd533273 100644
--- a/src/mongo/db/exec/sbe/stages/branch.cpp
+++ b/src/mongo/db/exec/sbe/stages/branch.cpp
@@ -42,8 +42,9 @@ BranchStage::BranchStage(std::unique_ptr<PlanStage> inputThen,
                          value::SlotVector inputThenVals,
                          value::SlotVector inputElseVals,
                          value::SlotVector outputVals,
-                         PlanNodeId planNodeId)
-    : PlanStage("branch"_sd, planNodeId),
+                         PlanNodeId planNodeId,
+                         bool participateInTrialRunTracking)
+    : PlanStage("branch"_sd, planNodeId, participateInTrialRunTracking),
       _filter(std::move(filter)),
       _inputThenVals(std::move(inputThenVals)),
       _inputElseVals(std::move(inputElseVals)),
@@ -61,7 +62,8 @@ std::unique_ptr<PlanStage> BranchStage::clone() const {
                                          _inputThenVals,
                                          _inputElseVals,
                                          _outputVals,
-                                         _commonStats.nodeId);
+                                         _commonStats.nodeId,
+                                         _participateInTrialRunTracking);
 }
 
 void BranchStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/branch.h b/src/mongo/db/exec/sbe/stages/branch.h
index 67b5af8a517..df813e762a4 100644
--- a/src/mongo/db/exec/sbe/stages/branch.h
+++ b/src/mongo/db/exec/sbe/stages/branch.h
@@ -52,7 +52,8 @@ public:
                 value::SlotVector inputThenVals,
                 value::SlotVector inputElseVals,
                 value::SlotVector outputVals,
-                PlanNodeId planNodeId);
+                PlanNodeId planNodeId,
+                bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/bson_scan.cpp b/src/mongo/db/exec/sbe/stages/bson_scan.cpp
index c340071ba0e..3a4c3b50512 100644
--- a/src/mongo/db/exec/sbe/stages/bson_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/bson_scan.cpp
@@ -42,8 +42,9 @@ BSONScanStage::BSONScanStage(const char* bsonBegin,
                              boost::optional<value::SlotId> recordSlot,
                              std::vector<std::string> fields,
                              value::SlotVector vars,
-                             PlanNodeId planNodeId)
-    : PlanStage("bsonscan"_sd, planNodeId),
+                             PlanNodeId planNodeId,
+                             bool participateInTrialRunTracking)
+    : PlanStage("bsonscan"_sd, planNodeId, participateInTrialRunTracking),
       _bsonBegin(bsonBegin),
       _bsonEnd(bsonEnd),
       _recordSlot(recordSlot),
@@ -52,8 +53,13 @@ BSONScanStage::BSONScanStage(const char* bsonBegin,
       _bsonCurrent(bsonBegin) {}
 
 std::unique_ptr<PlanStage> BSONScanStage::clone() const {
-    return std::make_unique<BSONScanStage>(
-        _bsonBegin, _bsonEnd, _recordSlot, _fields, _vars, _commonStats.nodeId);
+    return std::make_unique<BSONScanStage>(_bsonBegin,
+                                           _bsonEnd,
+                                           _recordSlot,
+                                           _fields,
+                                           _vars,
+                                           _commonStats.nodeId,
+                                           _participateInTrialRunTracking);
 }
 
 void BSONScanStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/bson_scan.h b/src/mongo/db/exec/sbe/stages/bson_scan.h
index 7804bcd4149..79238f695a2 100644
--- a/src/mongo/db/exec/sbe/stages/bson_scan.h
+++ b/src/mongo/db/exec/sbe/stages/bson_scan.h
@@ -51,7 +51,8 @@ public:
                   boost::optional<value::SlotId> recordSlot,
                   std::vector<std::string> fields,
                   value::SlotVector vars,
-                  PlanNodeId planNodeId);
+                  PlanNodeId planNodeId,
+                  bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/check_bounds.cpp b/src/mongo/db/exec/sbe/stages/check_bounds.cpp
index 483e9f50260..bc62b089005 100644
--- a/src/mongo/db/exec/sbe/stages/check_bounds.cpp
+++ b/src/mongo/db/exec/sbe/stages/check_bounds.cpp
@@ -39,8 +39,9 @@ CheckBoundsStage::CheckBoundsStage(std::unique_ptr<PlanStage> input,
                                    value::SlotId inKeySlot,
                                    value::SlotId inRecordIdSlot,
                                    value::SlotId outSlot,
-                                   PlanNodeId planNodeId)
-    : PlanStage{"chkbounds"_sd, planNodeId},
+                                   PlanNodeId planNodeId,
+                                   bool participateInTrialRunTracking)
+    : PlanStage{"chkbounds"_sd, planNodeId, participateInTrialRunTracking},
       _params{std::move(params)},
       _inKeySlot{inKeySlot},
       _inRecordIdSlot{inRecordIdSlot},
@@ -49,8 +50,13 @@ CheckBoundsStage::CheckBoundsStage(std::unique_ptr<PlanStage> input,
 }
 
 std::unique_ptr<PlanStage> CheckBoundsStage::clone() const {
-    return std::make_unique<CheckBoundsStage>(
-        _children[0]->clone(), _params, _inKeySlot, _inRecordIdSlot, _outSlot, _commonStats.nodeId);
+    return std::make_unique<CheckBoundsStage>(_children[0]->clone(),
+                                              _params,
+                                              _inKeySlot,
+                                              _inRecordIdSlot,
+                                              _outSlot,
+                                              _commonStats.nodeId,
+                                              _participateInTrialRunTracking);
 }
 
 void CheckBoundsStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/check_bounds.h b/src/mongo/db/exec/sbe/stages/check_bounds.h
index 29f52faa523..dbdf87938f7 100644
--- a/src/mongo/db/exec/sbe/stages/check_bounds.h
+++ b/src/mongo/db/exec/sbe/stages/check_bounds.h
@@ -76,7 +76,8 @@ public:
                      value::SlotId inKeySlot,
                      value::SlotId inRecordIdSlot,
                      value::SlotId outSlot,
-                     PlanNodeId planNodeId);
+                     PlanNodeId planNodeId,
+                     bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/co_scan.cpp b/src/mongo/db/exec/sbe/stages/co_scan.cpp
index 73e89a5e87e..9666d03cf01 100644
--- a/src/mongo/db/exec/sbe/stages/co_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/co_scan.cpp
@@ -34,11 +34,14 @@
 #include "mongo/db/exec/sbe/expressions/expression.h"
 
 namespace mongo::sbe {
-CoScanStage::CoScanStage(PlanNodeId planNodeId, PlanYieldPolicy* yieldPolicy)
-    : PlanStage("coscan"_sd, yieldPolicy, planNodeId) {}
+CoScanStage::CoScanStage(PlanNodeId planNodeId,
+                         PlanYieldPolicy* yieldPolicy,
+                         bool participateInTrialRunTracking)
+    : PlanStage("coscan"_sd, yieldPolicy, planNodeId, participateInTrialRunTracking) {}
 
 std::unique_ptr<PlanStage> CoScanStage::clone() const {
-    return std::make_unique<CoScanStage>(_commonStats.nodeId);
+    return std::make_unique<CoScanStage>(
+        _commonStats.nodeId, _yieldPolicy, _participateInTrialRunTracking);
 }
 void CoScanStage::prepare(CompileCtx& ctx) {}
 value::SlotAccessor* CoScanStage::getAccessor(CompileCtx& ctx, value::SlotId slot) {
diff --git a/src/mongo/db/exec/sbe/stages/co_scan.h b/src/mongo/db/exec/sbe/stages/co_scan.h
index 4625b636a14..1f8c8d5404d 100644
--- a/src/mongo/db/exec/sbe/stages/co_scan.h
+++ b/src/mongo/db/exec/sbe/stages/co_scan.h
@@ -42,7 +42,9 @@ namespace mongo::sbe {
  */
 class CoScanStage final : public PlanStage {
 public:
-    explicit CoScanStage(PlanNodeId, PlanYieldPolicy* yieldPolicy = nullptr);
+    explicit CoScanStage(PlanNodeId,
+                         PlanYieldPolicy* yieldPolicy = nullptr,
+                         bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index 8058307a916..24f769fa2c7 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -59,8 +59,9 @@ ColumnScanStage::ColumnScanStage(UUID collectionUuid,
                                  std::vector<std::unique_ptr<EExpression>> pathExprs,
                                  value::SlotId rowStoreSlot,
                                  PlanYieldPolicy* yieldPolicy,
-                                 PlanNodeId nodeId)
-    : PlanStage("columnscan"_sd, yieldPolicy, nodeId),
+                                 PlanNodeId nodeId,
+                                 bool participateInTrialRunTracking)
+    : PlanStage("columnscan"_sd, yieldPolicy, nodeId, participateInTrialRunTracking),
       _collUuid(collectionUuid),
       _columnIndexName(columnIndexName),
       _fieldSlots(std::move(fieldSlots)),
@@ -89,7 +90,8 @@ std::unique_ptr<PlanStage> ColumnScanStage::clone() const {
                                              std::move(pathExprs),
                                              _rowStoreSlot,
                                              _yieldPolicy,
-                                             _commonStats.nodeId);
+                                             _commonStats.nodeId,
+                                             _participateInTrialRunTracking);
 }
 
 void ColumnScanStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h
index d00d4641171..1efeef25bca 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.h
+++ b/src/mongo/db/exec/sbe/stages/column_scan.h
@@ -53,7 +53,8 @@ public:
                     std::vector<std::unique_ptr<EExpression>> pathExprs,
                     value::SlotId internalSlot,
                     PlanYieldPolicy* yieldPolicy,
-                    PlanNodeId nodeId);
+                    PlanNodeId planNodeId,
+                    bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/exchange.cpp b/src/mongo/db/exec/sbe/stages/exchange.cpp
index 8cd7b065559..fdbb6531913 100644
--- a/src/mongo/db/exec/sbe/stages/exchange.cpp
+++ b/src/mongo/db/exec/sbe/stages/exchange.cpp
@@ -171,8 +171,9 @@ ExchangeConsumer::ExchangeConsumer(std::unique_ptr<PlanStage> input,
                                    ExchangePolicy policy,
                                    std::unique_ptr<EExpression> partition,
                                    std::unique_ptr<EExpression> orderLess,
-                                   PlanNodeId planNodeId)
-    : PlanStage("exchange"_sd, planNodeId) {
+                                   PlanNodeId planNodeId,
+                                   bool participateInTrialRunTracking)
+    : PlanStage("exchange"_sd, planNodeId, participateInTrialRunTracking) {
     _children.emplace_back(std::move(input));
     _state = std::make_shared<ExchangeState>(
         numOfProducers, std::move(fields), policy, std::move(partition), std::move(orderLess));
@@ -186,13 +187,16 @@ ExchangeConsumer::ExchangeConsumer(std::unique_ptr<PlanStage> input,
         uassert(5922202, "partition expression must not be present", !_state->partitionExpr());
     }
 }
-ExchangeConsumer::ExchangeConsumer(std::shared_ptr<ExchangeState> state, PlanNodeId planNodeId)
-    : PlanStage("exchange"_sd, planNodeId), _state(state) {
+ExchangeConsumer::ExchangeConsumer(std::shared_ptr<ExchangeState> state,
+                                   PlanNodeId planNodeId,
+                                   bool participateInTrialRunTracking)
+    : PlanStage("exchange"_sd, planNodeId, participateInTrialRunTracking), _state(state) {
     _tid = _state->addConsumer(this);
     _orderPreserving = _state->isOrderPreserving();
 }
 std::unique_ptr<PlanStage> ExchangeConsumer::clone() const {
-    return std::make_unique<ExchangeConsumer>(_state, _commonStats.nodeId);
+    return std::make_unique<ExchangeConsumer>(
+        _state, _commonStats.nodeId, _participateInTrialRunTracking);
 }
 void ExchangeConsumer::prepare(CompileCtx& ctx) {
     for (size_t idx = 0; idx < _state->fields().size(); ++idx) {
@@ -486,8 +490,9 @@ void ExchangeProducer::closePipes() {
 
 ExchangeProducer::ExchangeProducer(std::unique_ptr<PlanStage> input,
                                    std::shared_ptr<ExchangeState> state,
-                                   PlanNodeId planNodeId)
-    : PlanStage("exchangep"_sd, planNodeId), _state(state) {
+                                   PlanNodeId planNodeId,
+                                   bool participateInTrialRunTracking)
+    : PlanStage("exchangep"_sd, planNodeId, participateInTrialRunTracking), _state(state) {
     _children.emplace_back(std::move(input));
 
     _tid = _state->addProducer(this);
diff --git a/src/mongo/db/exec/sbe/stages/exchange.h b/src/mongo/db/exec/sbe/stages/exchange.h
index b94b4968f66..15928cd50fb 100644
--- a/src/mongo/db/exec/sbe/stages/exchange.h
+++ b/src/mongo/db/exec/sbe/stages/exchange.h
@@ -261,9 +261,12 @@ public:
                      ExchangePolicy policy,
                      std::unique_ptr<EExpression> partition,
                      std::unique_ptr<EExpression> orderLess,
-                     PlanNodeId planNodeId);
+                     PlanNodeId planNodeId,
+                     bool participateInTrialRunTracking = true);
 
-    ExchangeConsumer(std::shared_ptr<ExchangeState> state, PlanNodeId planNodeId);
+    ExchangeConsumer(std::shared_ptr<ExchangeState> state,
+                     PlanNodeId planNodeId,
+                     bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
@@ -311,7 +314,8 @@ class ExchangeProducer final : public PlanStage {
 public:
     ExchangeProducer(std::unique_ptr<PlanStage> input,
                      std::shared_ptr<ExchangeState> state,
-                     PlanNodeId planNodeId);
+                     PlanNodeId planNodeId,
+                     bool participateInTrialRunTracking = true);
 
     static void start(OperationContext* opCtx,
                       CompileCtx& ctx,
diff --git a/src/mongo/db/exec/sbe/stages/filter.h b/src/mongo/db/exec/sbe/stages/filter.h
index 2120be1c062..059dd1c7ab4 100644
--- a/src/mongo/db/exec/sbe/stages/filter.h
+++ b/src/mongo/db/exec/sbe/stages/filter.h
@@ -58,16 +58,21 @@ class FilterStage final : public PlanStage {
 public:
     FilterStage(std::unique_ptr<PlanStage> input,
                 std::unique_ptr<EExpression> filter,
-                PlanNodeId planNodeId)
-        : PlanStage(IsConst ? "cfilter"_sd : (IsEof ? "efilter" : "filter"_sd), planNodeId),
+                PlanNodeId planNodeId,
+                bool participateInTrialRunTracking = true)
+        : PlanStage(IsConst ? "cfilter"_sd : (IsEof ? "efilter" : "filter"_sd),
+                    planNodeId,
+                    participateInTrialRunTracking),
           _filter(std::move(filter)) {
         static_assert(!IsEof || !IsConst);
         _children.emplace_back(std::move(input));
     }
 
     std::unique_ptr<PlanStage> clone() const final {
-        return std::make_unique<FilterStage<IsConst, IsEof>>(
-            _children[0]->clone(), _filter->clone(), _commonStats.nodeId);
+        return std::make_unique<FilterStage<IsConst, IsEof>>(_children[0]->clone(),
+                                                             _filter->clone(),
+                                                             _commonStats.nodeId,
+                                                             _participateInTrialRunTracking);
     }
 
     void prepare(CompileCtx& ctx) final {
diff --git a/src/mongo/db/exec/sbe/stages/hash_agg.cpp b/src/mongo/db/exec/sbe/stages/hash_agg.cpp
index e3fd62cb86b..f930d4b5e95 100644
--- a/src/mongo/db/exec/sbe/stages/hash_agg.cpp
+++ b/src/mongo/db/exec/sbe/stages/hash_agg.cpp
@@ -47,8 +47,9 @@ HashAggStage::HashAggStage(std::unique_ptr<PlanStage> input,
                            bool optimizedClose,
                            boost::optional<value::SlotId> collatorSlot,
                            bool allowDiskUse,
-                           PlanNodeId planNodeId)
-    : PlanStage("group"_sd, planNodeId),
+                           PlanNodeId planNodeId,
+                           bool participateInTrialRunTracking)
+    : PlanStage("group"_sd, planNodeId, participateInTrialRunTracking),
       _gbs(std::move(gbs)),
       _aggs(std::move(aggs)),
       _collatorSlot(collatorSlot),
@@ -74,7 +75,8 @@ std::unique_ptr<PlanStage> HashAggStage::clone() const {
                                           _optimizedClose,
                                           _collatorSlot,
                                           _allowDiskUse,
-                                          _commonStats.nodeId);
+                                          _commonStats.nodeId,
+                                          _participateInTrialRunTracking);
 }
 
 void HashAggStage::doSaveState(bool relinquishCursor) {
@@ -354,25 +356,18 @@ void HashAggStage::open(bool reOpen) {
                 key.reset(idx++, false, tag, val);
             }
 
-            if (!_recordStore) {
-                // The memory limit hasn't been reached yet, accumulate state in '_ht'.
-                auto [it, inserted] = _ht->try_emplace(std::move(key), value::MaterializedRow{0});
-                if (inserted) {
-                    // Copy keys.
-                    const_cast<value::MaterializedRow&>(it->first).makeOwned();
-                    // Initialize accumulators.
-                    it->second.resize(_outAggAccessors.size());
-                }
-                // Always update the state in the '_ht' for the branch when data hasn't been
-                // spilled to disk.
+
+            if (_htIt = _ht->find(key); !_recordStore && _htIt == _ht->end()) {
+                // The memory limit hasn't been reached yet, insert a new key in '_ht' by copying
+                // the key. Note as a future optimization, we should avoid the lookup in the find()
+                // call and the emplace.
+                key.makeOwned();
+                auto [it, _] = _ht->emplace(std::move(key), value::MaterializedRow{0});
+                // Initialize accumulators.
+                it->second.resize(_outAggAccessors.size());
                 _htIt = it;
-                updateAggStateHt = true;
-            } else {
-                // The memory limit has been reached, accumulate state in '_ht' only if we
-                // find the key in '_ht'.
-                _htIt = _ht->find(key);
-                updateAggStateHt = _htIt != _ht->end();
             }
+            updateAggStateHt = _htIt != _ht->end();
 
             if (updateAggStateHt) {
                 // Accumulate state in '_ht' by pointing the '_outAggAccessors' the
@@ -500,9 +495,9 @@ PlanState HashAggStage::getNext() {
                 KeyString::TypeBits::fromBuffer(KeyString::Version::kLatestVersion, &valReader);
             _aggValueRecordStore = val;
 
-            BufBuilder buf;
+            _aggKeyRSBuffer.reset();
             _aggKeyRecordStore = value::MaterializedRow::deserializeFromKeyString(
-                decodeKeyString(nextRecord->id, typeBits), &buf);
+                decodeKeyString(nextRecord->id, typeBits), &_aggKeyRSBuffer);
             return trackPlanState(PlanState::ADVANCED);
         } else {
             _rsCursor.reset();
diff --git a/src/mongo/db/exec/sbe/stages/hash_agg.h b/src/mongo/db/exec/sbe/stages/hash_agg.h
index 8c117e8717d..d200c4b9c3d 100644
--- a/src/mongo/db/exec/sbe/stages/hash_agg.h
+++ b/src/mongo/db/exec/sbe/stages/hash_agg.h
@@ -75,7 +75,8 @@ public:
                  bool optimizedClose,
                  boost::optional<value::SlotId> collatorSlot,
                  bool allowDiskUse,
-                 PlanNodeId planNodeId);
+                 PlanNodeId planNodeId,
+                 bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
@@ -184,6 +185,11 @@ private:
     std::vector<std::unique_ptr<value::MaterializedSingleRowAccessor>> _outRecordStoreKeyAccessors;
     std::vector<std::unique_ptr<value::MaterializedSingleRowAccessor>> _outRecordStoreAggAccessors;
 
+    // This buffer stores values for the spilled '_aggKeyRecordStore' that's loaded into memory from
+    // the '_recordStore'. Values in the '_aggKeyRecordStore' row are pointers that point to data in
+    // this buffer.
+    BufBuilder _aggKeyRSBuffer;
+
     std::vector<value::SlotAccessor*> _seekKeysAccessors;
     value::MaterializedRow _seekKeys;
 
diff --git a/src/mongo/db/exec/sbe/stages/hash_join.cpp b/src/mongo/db/exec/sbe/stages/hash_join.cpp
index 86675029c0e..bad53262acb 100644
--- a/src/mongo/db/exec/sbe/stages/hash_join.cpp
+++ b/src/mongo/db/exec/sbe/stages/hash_join.cpp
@@ -44,8 +44,9 @@ HashJoinStage::HashJoinStage(std::unique_ptr<PlanStage> outer,
                              value::SlotVector innerCond,
                              value::SlotVector innerProjects,
                              boost::optional<value::SlotId> collatorSlot,
-                             PlanNodeId planNodeId)
-    : PlanStage("hj"_sd, planNodeId),
+                             PlanNodeId planNodeId,
+                             bool participateInTrialRunTracking)
+    : PlanStage("hj"_sd, planNodeId, participateInTrialRunTracking),
       _outerCond(std::move(outerCond)),
       _outerProjects(std::move(outerProjects)),
       _innerCond(std::move(innerCond)),
@@ -68,7 +69,8 @@ std::unique_ptr<PlanStage> HashJoinStage::clone() const {
                                            _innerCond,
                                            _innerProjects,
                                            _collatorSlot,
-                                           _commonStats.nodeId);
+                                           _commonStats.nodeId,
+                                           _participateInTrialRunTracking);
 }
 
 void HashJoinStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/hash_join.h b/src/mongo/db/exec/sbe/stages/hash_join.h
index ed4781116d9..a3997074db0 100644
--- a/src/mongo/db/exec/sbe/stages/hash_join.h
+++ b/src/mongo/db/exec/sbe/stages/hash_join.h
@@ -66,7 +66,8 @@ public:
                   value::SlotVector innerCond,
                   value::SlotVector innerProjects,
                   boost::optional<value::SlotId> collatorSlot,
-                  PlanNodeId planNodeId);
+                  PlanNodeId planNodeId,
+                  bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/hash_lookup.cpp b/src/mongo/db/exec/sbe/stages/hash_lookup.cpp
index a65f2f8bd89..16e61d68630 100644
--- a/src/mongo/db/exec/sbe/stages/hash_lookup.cpp
+++ b/src/mongo/db/exec/sbe/stages/hash_lookup.cpp
@@ -47,8 +47,9 @@ HashLookupStage::HashLookupStage(std::unique_ptr<PlanStage> outer,
                                  value::SlotVector innerProjects,
                                  value::SlotMap<std::unique_ptr<EExpression>> innerAggs,
                                  boost::optional<value::SlotId> collatorSlot,
-                                 PlanNodeId planNodeId)
-    : PlanStage("hash_lookup"_sd, planNodeId),
+                                 PlanNodeId planNodeId,
+                                 bool participateInTrialRunTracking)
+    : PlanStage("hash_lookup"_sd, planNodeId, participateInTrialRunTracking),
       _outerCond(outerCond),
       _innerCond(innerCond),
       _innerProjects(innerProjects),
@@ -72,7 +73,8 @@ std::unique_ptr<PlanStage> HashLookupStage::clone() const {
                                              _innerProjects,
                                              std::move(innerAggs),
                                              _collatorSlot,
-                                             _commonStats.nodeId);
+                                             _commonStats.nodeId,
+                                             _participateInTrialRunTracking);
 }
 
 void HashLookupStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/hash_lookup.h b/src/mongo/db/exec/sbe/stages/hash_lookup.h
index 2e3f0b34816..611c5603606 100644
--- a/src/mongo/db/exec/sbe/stages/hash_lookup.h
+++ b/src/mongo/db/exec/sbe/stages/hash_lookup.h
@@ -86,7 +86,8 @@ public:
                     value::SlotVector innerProjects,
                     value::SlotMap<std::unique_ptr<EExpression>> innerAggs,
                     boost::optional<value::SlotId> collatorSlot,
-                    PlanNodeId planNodeId);
+                    PlanNodeId planNodeId,
+                    bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/ix_scan.cpp b/src/mongo/db/exec/sbe/stages/ix_scan.cpp
index 520e68fe074..1c4a54248dd 100644
--- a/src/mongo/db/exec/sbe/stages/ix_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/ix_scan.cpp
@@ -81,7 +81,8 @@ std::unique_ptr<PlanStage> IndexScanStage::clone() const {
                                             _seekKeyLow->clone(),
                                             _seekKeyHigh->clone(),
                                             _yieldPolicy,
-                                            _commonStats.nodeId);
+                                            _commonStats.nodeId,
+                                            _participateInTrialRunTracking);
 }
 
 void IndexScanStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/ix_scan.h b/src/mongo/db/exec/sbe/stages/ix_scan.h
index c57389b3434..3d1ae2eec78 100644
--- a/src/mongo/db/exec/sbe/stages/ix_scan.h
+++ b/src/mongo/db/exec/sbe/stages/ix_scan.h
@@ -85,7 +85,8 @@ public:
                    std::unique_ptr<EExpression> seekKeyLow,
                    std::unique_ptr<EExpression> seekKeyHigh,
                    PlanYieldPolicy* yieldPolicy,
-                   PlanNodeId nodeId);
+                   PlanNodeId planNodeId,
+                   bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/limit_skip.cpp b/src/mongo/db/exec/sbe/stages/limit_skip.cpp
index 359355582ac..8343f56ca96 100644
--- a/src/mongo/db/exec/sbe/stages/limit_skip.cpp
+++ b/src/mongo/db/exec/sbe/stages/limit_skip.cpp
@@ -37,8 +37,9 @@ namespace mongo::sbe {
 LimitSkipStage::LimitSkipStage(std::unique_ptr<PlanStage> input,
                                boost::optional<long long> limit,
                                boost::optional<long long> skip,
-                               PlanNodeId planNodeId)
-    : PlanStage(!skip ? "limit"_sd : "limitskip"_sd, planNodeId),
+                               PlanNodeId planNodeId,
+                               bool participateInTrialRunTracking)
+    : PlanStage(!skip ? "limit"_sd : "limitskip"_sd, planNodeId, participateInTrialRunTracking),
       _limit(limit),
       _skip(skip),
       _current(0),
@@ -51,7 +52,7 @@ LimitSkipStage::LimitSkipStage(std::unique_ptr<PlanStage> input,
 
 std::unique_ptr<PlanStage> LimitSkipStage::clone() const {
     return std::make_unique<LimitSkipStage>(
-        _children[0]->clone(), _limit, _skip, _commonStats.nodeId);
+        _children[0]->clone(), _limit, _skip, _commonStats.nodeId, _participateInTrialRunTracking);
 }
 
 void LimitSkipStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/limit_skip.h b/src/mongo/db/exec/sbe/stages/limit_skip.h
index f0f62b34239..7fc366a2174 100644
--- a/src/mongo/db/exec/sbe/stages/limit_skip.h
+++ b/src/mongo/db/exec/sbe/stages/limit_skip.h
@@ -50,7 +50,8 @@ public:
     LimitSkipStage(std::unique_ptr<PlanStage> input,
                    boost::optional<long long> limit,
                    boost::optional<long long> skip,
-                   PlanNodeId planNodeId);
+                   PlanNodeId planNodeId,
+                   bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/loop_join.cpp b/src/mongo/db/exec/sbe/stages/loop_join.cpp
index 6c49f2e700a..3df5e179a09 100644
--- a/src/mongo/db/exec/sbe/stages/loop_join.cpp
+++ b/src/mongo/db/exec/sbe/stages/loop_join.cpp
@@ -41,7 +41,8 @@ LoopJoinStage::LoopJoinStage(std::unique_ptr<PlanStage> outer,
                              value::SlotVector outerProjects,
                              value::SlotVector outerCorrelated,
                              std::unique_ptr<EExpression> predicate,
-                             PlanNodeId nodeId)
+                             PlanNodeId nodeId,
+                             bool participateInTrialRunTracking)
     : LoopJoinStage(std::move(outer),
                     std::move(inner),
                     std::move(outerProjects),
@@ -49,7 +50,8 @@ LoopJoinStage::LoopJoinStage(std::unique_ptr<PlanStage> outer,
                     value::SlotVector{},
                     std::move(predicate),
                     JoinType::Inner,
-                    nodeId) {}
+                    nodeId,
+                    participateInTrialRunTracking) {}
 
 LoopJoinStage::LoopJoinStage(std::unique_ptr<PlanStage> outer,
                              std::unique_ptr<PlanStage> inner,
@@ -58,8 +60,9 @@ LoopJoinStage::LoopJoinStage(std::unique_ptr<PlanStage> outer,
                              value::SlotVector innerProjects,
                              std::unique_ptr<EExpression> predicate,
                              JoinType joinType,
-                             PlanNodeId nodeId)
-    : PlanStage("nlj"_sd, nodeId),
+                             PlanNodeId nodeId,
+                             bool participateInTrialRunTracking)
+    : PlanStage("nlj"_sd, nodeId, participateInTrialRunTracking),
       _outerProjects(std::move(outerProjects)),
       _outerCorrelated(std::move(outerCorrelated)),
       _innerProjects(std::move(innerProjects)),
@@ -80,7 +83,8 @@ std::unique_ptr<PlanStage> LoopJoinStage::clone() const {
                                            _innerProjects,
                                            _predicate ? _predicate->clone() : nullptr,
                                            _joinType,
-                                           _commonStats.nodeId);
+                                           _commonStats.nodeId,
+                                           _participateInTrialRunTracking);
 }
 
 void LoopJoinStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/loop_join.h b/src/mongo/db/exec/sbe/stages/loop_join.h
index 076655bca4c..c69010071fd 100644
--- a/src/mongo/db/exec/sbe/stages/loop_join.h
+++ b/src/mongo/db/exec/sbe/stages/loop_join.h
@@ -63,7 +63,8 @@ public:
                   value::SlotVector outerProjects,
                   value::SlotVector outerCorrelated,
                   std::unique_ptr<EExpression> predicate,
-                  PlanNodeId nodeId);
+                  PlanNodeId planNodeId,
+                  bool participateInTrialRunTracking = true);
 
     LoopJoinStage(std::unique_ptr<PlanStage> outer,
                   std::unique_ptr<PlanStage> inner,
@@ -72,7 +73,8 @@ public:
                   value::SlotVector innerProjects,
                   std::unique_ptr<EExpression> predicate,
                   JoinType joinType,
-                  PlanNodeId nodeId);
+                  PlanNodeId planNodeId,
+                  bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/makeobj.cpp b/src/mongo/db/exec/sbe/stages/makeobj.cpp
index e0402934517..0c84fde3083 100644
--- a/src/mongo/db/exec/sbe/stages/makeobj.cpp
+++ b/src/mongo/db/exec/sbe/stages/makeobj.cpp
@@ -46,8 +46,11 @@ MakeObjStageBase<O>::MakeObjStageBase(std::unique_ptr<PlanStage> input,
                                       value::SlotVector projectVars,
                                       bool forceNewObject,
                                       bool returnOldObject,
-                                      PlanNodeId planNodeId)
-    : PlanStage(O == MakeObjOutputType::object ? "mkobj"_sd : "mkbson"_sd, planNodeId),
+                                      PlanNodeId planNodeId,
+                                      bool participateInTrialRunTracking)
+    : PlanStage(O == MakeObjOutputType::object ? "mkobj"_sd : "mkbson"_sd,
+                planNodeId,
+                participateInTrialRunTracking),
       _objSlot(objSlot),
       _rootSlot(rootSlot),
       _fieldBehavior(fieldBehavior),
@@ -62,6 +65,29 @@ MakeObjStageBase<O>::MakeObjStageBase(std::unique_ptr<PlanStage> input,
 }
 
 template <MakeObjOutputType O>
+MakeObjStageBase<O>::MakeObjStageBase(std::unique_ptr<PlanStage> input,
+                                      value::SlotId objSlot,
+                                      boost::optional<value::SlotId> rootSlot,
+                                      boost::optional<FieldBehavior> fieldBehavior,
+                                      std::set<std::string> fields,
+                                      std::set<std::string> projectFields,
+                                      value::SlotVector projectVars,
+                                      bool forceNewObject,
+                                      bool returnOldObject,
+                                      PlanNodeId planNodeId)
+    : MakeObjStageBase<O>::MakeObjStageBase(
+          std::move(input),
+          objSlot,
+          rootSlot,
+          fieldBehavior,
+          std::vector<std::string>(fields.begin(), fields.end()),
+          std::vector<std::string>(projectFields.begin(), projectFields.end()),
+          std::move(projectVars),
+          forceNewObject,
+          returnOldObject,
+          planNodeId) {}
+
+template <MakeObjOutputType O>
 std::unique_ptr<PlanStage> MakeObjStageBase<O>::clone() const {
     return std::make_unique<MakeObjStageBase<O>>(_children[0]->clone(),
                                                  _objSlot,
@@ -72,7 +98,8 @@ std::unique_ptr<PlanStage> MakeObjStageBase<O>::clone() const {
                                                  _projectVars,
                                                  _forceNewObject,
                                                  _returnOldObject,
-                                                 _commonStats.nodeId);
+                                                 _commonStats.nodeId,
+                                                 _participateInTrialRunTracking);
 }
 
 template <MakeObjOutputType O>
diff --git a/src/mongo/db/exec/sbe/stages/makeobj.h b/src/mongo/db/exec/sbe/stages/makeobj.h
index 1cf0755f1c5..3034470b95a 100644
--- a/src/mongo/db/exec/sbe/stages/makeobj.h
+++ b/src/mongo/db/exec/sbe/stages/makeobj.h
@@ -87,6 +87,22 @@ public:
                      value::SlotVector projectVars,
                      bool forceNewObject,
                      bool returnOldObject,
+                     PlanNodeId planNodeId,
+                     bool participateInTrialRunTracking = true);
+
+    /**
+     * A convenience constructor that takes a set instead of a vector for 'fields' and
+     * 'projectedFields'.
+     */
+    MakeObjStageBase(std::unique_ptr<PlanStage> input,
+                     value::SlotId objSlot,
+                     boost::optional<value::SlotId> rootSlot,
+                     boost::optional<FieldBehavior> fieldBehavior,
+                     std::set<std::string> fields,
+                     std::set<std::string> projectFields,
+                     value::SlotVector projectVars,
+                     bool forceNewObject,
+                     bool returnOldObject,
                      PlanNodeId planNodeId);
 
     std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/merge_join.cpp b/src/mongo/db/exec/sbe/stages/merge_join.cpp
index 170227e0575..d6f03af7502 100644
--- a/src/mongo/db/exec/sbe/stages/merge_join.cpp
+++ b/src/mongo/db/exec/sbe/stages/merge_join.cpp
@@ -76,8 +76,9 @@ MergeJoinStage::MergeJoinStage(std::unique_ptr<PlanStage> outer,
                                value::SlotVector innerKeys,
                                value::SlotVector innerProjects,
                                std::vector<value::SortDirection> sortDirs,
-                               PlanNodeId planNodeId)
-    : PlanStage("mj"_sd, planNodeId),
+                               PlanNodeId planNodeId,
+                               bool participateInTrialRunTracking)
+    : PlanStage("mj"_sd, planNodeId, participateInTrialRunTracking),
       _outerKeys(std::move(outerKeys)),
       _outerProjects(std::move(outerProjects)),
       _innerKeys(std::move(innerKeys)),
@@ -104,7 +105,8 @@ std::unique_ptr<PlanStage> MergeJoinStage::clone() const {
                                             _innerKeys,
                                             _innerProjects,
                                             _dirs,
-                                            _commonStats.nodeId);
+                                            _commonStats.nodeId,
+                                            _participateInTrialRunTracking);
 }
 
 void MergeJoinStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/merge_join.h b/src/mongo/db/exec/sbe/stages/merge_join.h
index b0f61cd677c..ff94784ac0d 100644
--- a/src/mongo/db/exec/sbe/stages/merge_join.h
+++ b/src/mongo/db/exec/sbe/stages/merge_join.h
@@ -62,7 +62,8 @@ public:
                    value::SlotVector innerKeys,
                    value::SlotVector innerProjects,
                    std::vector<value::SortDirection> sortDirs,
-                   PlanNodeId planNodeId);
+                   PlanNodeId planNodeId,
+                   bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/project.cpp b/src/mongo/db/exec/sbe/stages/project.cpp
index 736110bc83a..c534c5c8cdc 100644
--- a/src/mongo/db/exec/sbe/stages/project.cpp
+++ b/src/mongo/db/exec/sbe/stages/project.cpp
@@ -37,8 +37,10 @@ namespace mongo {
 namespace sbe {
 ProjectStage::ProjectStage(std::unique_ptr<PlanStage> input,
                            value::SlotMap<std::unique_ptr<EExpression>> projects,
-                           PlanNodeId nodeId)
-    : PlanStage("project"_sd, nodeId), _projects(std::move(projects)) {
+                           PlanNodeId nodeId,
+                           bool participateInTrialRunTracking)
+    : PlanStage("project"_sd, nodeId, participateInTrialRunTracking),
+      _projects(std::move(projects)) {
     _children.emplace_back(std::move(input));
 }
 
@@ -47,8 +49,10 @@ std::unique_ptr<PlanStage> ProjectStage::clone() const {
     for (auto& [k, v] : _projects) {
         projects.emplace(k, v->clone());
     }
-    return std::make_unique<ProjectStage>(
-        _children[0]->clone(), std::move(projects), _commonStats.nodeId);
+    return std::make_unique<ProjectStage>(_children[0]->clone(),
+                                          std::move(projects),
+                                          _commonStats.nodeId,
+                                          _participateInTrialRunTracking);
 }
 
 void ProjectStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/project.h b/src/mongo/db/exec/sbe/stages/project.h
index 1754dd7d2a9..bf4e169c8c9 100644
--- a/src/mongo/db/exec/sbe/stages/project.h
+++ b/src/mongo/db/exec/sbe/stages/project.h
@@ -47,7 +47,8 @@ class ProjectStage final : public PlanStage {
 public:
     ProjectStage(std::unique_ptr<PlanStage> input,
                  value::SlotMap<std::unique_ptr<EExpression>> projects,
-                 PlanNodeId nodeId);
+                 PlanNodeId planNodeId,
+                 bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/scan.cpp b/src/mongo/db/exec/sbe/stages/scan.cpp
index 678d3f84ef9..fbbc3a9ae0d 100644
--- a/src/mongo/db/exec/sbe/stages/scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/scan.cpp
@@ -56,8 +56,10 @@ ScanStage::ScanStage(UUID collectionUuid,
                      PlanYieldPolicy* yieldPolicy,
                      PlanNodeId nodeId,
                      ScanCallbacks scanCallbacks,
-                     bool useRandomCursor)
-    : PlanStage(seekKeySlot ? "seek"_sd : "scan"_sd, yieldPolicy, nodeId),
+                     bool useRandomCursor,
+                     bool participateInTrialRunTracking)
+    : PlanStage(
+          seekKeySlot ? "seek"_sd : "scan"_sd, yieldPolicy, nodeId, participateInTrialRunTracking),
       _collUuid(collectionUuid),
       _recordSlot(recordSlot),
       _recordIdSlot(recordIdSlot),
@@ -98,7 +100,9 @@ std::unique_ptr<PlanStage> ScanStage::clone() const {
                                        _forward,
                                        _yieldPolicy,
                                        _commonStats.nodeId,
-                                       _scanCallbacks);
+                                       _scanCallbacks,
+                                       _useRandomCursor,
+                                       _participateInTrialRunTracking);
 }
 
 void ScanStage::prepare(CompileCtx& ctx) {
@@ -592,8 +596,9 @@ ParallelScanStage::ParallelScanStage(UUID collectionUuid,
                                      value::SlotVector vars,
                                      PlanYieldPolicy* yieldPolicy,
                                      PlanNodeId nodeId,
-                                     ScanCallbacks callbacks)
-    : PlanStage("pscan"_sd, yieldPolicy, nodeId),
+                                     ScanCallbacks callbacks,
+                                     bool participateInTrialRunTracking)
+    : PlanStage("pscan"_sd, yieldPolicy, nodeId, participateInTrialRunTracking),
       _collUuid(collectionUuid),
       _recordSlot(recordSlot),
       _recordIdSlot(recordIdSlot),
@@ -621,8 +626,9 @@ ParallelScanStage::ParallelScanStage(const std::shared_ptr<ParallelState>& state
                                      value::SlotVector vars,
                                      PlanYieldPolicy* yieldPolicy,
                                      PlanNodeId nodeId,
-                                     ScanCallbacks callbacks)
-    : PlanStage("pscan"_sd, yieldPolicy, nodeId),
+                                     ScanCallbacks callbacks,
+                                     bool participateInTrialRunTracking)
+    : PlanStage("pscan"_sd, yieldPolicy, nodeId, participateInTrialRunTracking),
       _collUuid(collectionUuid),
       _recordSlot(recordSlot),
       _recordIdSlot(recordIdSlot),
@@ -650,7 +656,8 @@ std::unique_ptr<PlanStage> ParallelScanStage::clone() const {
                                                _vars,
                                                _yieldPolicy,
                                                _commonStats.nodeId,
-                                               _scanCallbacks);
+                                               _scanCallbacks,
+                                               _participateInTrialRunTracking);
 }
 
 void ParallelScanStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/scan.h b/src/mongo/db/exec/sbe/stages/scan.h
index 37462ac5e14..ed138f6302e 100644
--- a/src/mongo/db/exec/sbe/stages/scan.h
+++ b/src/mongo/db/exec/sbe/stages/scan.h
@@ -108,7 +108,8 @@ public:
               PlanYieldPolicy* yieldPolicy,
               PlanNodeId nodeId,
               ScanCallbacks scanCallbacks,
-              bool useRandomCursor = false);
+              bool useRandomCursor = false,
+              bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
@@ -227,7 +228,8 @@ public:
                       value::SlotVector vars,
                       PlanYieldPolicy* yieldPolicy,
                       PlanNodeId nodeId,
-                      ScanCallbacks callbacks);
+                      ScanCallbacks callbacks,
+                      bool participateInTrialRunTracking = true);
 
     ParallelScanStage(const std::shared_ptr<ParallelState>& state,
                       const UUID& collectionUuid,
@@ -241,7 +243,8 @@ public:
                       value::SlotVector vars,
                       PlanYieldPolicy* yieldPolicy,
                       PlanNodeId nodeId,
-                      ScanCallbacks callbacks);
+                      ScanCallbacks callbacks,
+                      bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp
index 5acf73afe8d..0968b0bea68 100644
--- a/src/mongo/db/exec/sbe/stages/sort.cpp
+++ b/src/mongo/db/exec/sbe/stages/sort.cpp
@@ -55,8 +55,9 @@ SortStage::SortStage(std::unique_ptr<PlanStage> input,
                      size_t limit,
                      size_t memoryLimit,
                      bool allowDiskUse,
-                     PlanNodeId planNodeId)
-    : PlanStage("sort"_sd, planNodeId),
+                     PlanNodeId planNodeId,
+                     bool participateInTrialRunTracking)
+    : PlanStage("sort"_sd, planNodeId, participateInTrialRunTracking),
       _obs(std::move(obs)),
       _dirs(std::move(dirs)),
       _vals(std::move(vals)),
@@ -80,7 +81,8 @@ std::unique_ptr<PlanStage> SortStage::clone() const {
                                        _specificStats.limit,
                                        _specificStats.maxMemoryUsageBytes,
                                        _allowDiskUse,
-                                       _commonStats.nodeId);
+                                       _commonStats.nodeId,
+                                       _participateInTrialRunTracking);
 }
 
 void SortStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/sort.h b/src/mongo/db/exec/sbe/stages/sort.h
index 2bfc9e1d9fb..dda9716b75b 100644
--- a/src/mongo/db/exec/sbe/stages/sort.h
+++ b/src/mongo/db/exec/sbe/stages/sort.h
@@ -70,7 +70,8 @@ public:
               size_t limit,
               size_t memoryLimit,
               bool allowDiskUse,
-              PlanNodeId planNodeId);
+              PlanNodeId planNodeId,
+              bool participateInTrialRunTracking = true);
 
     ~SortStage();
 
diff --git a/src/mongo/db/exec/sbe/stages/sorted_merge.cpp b/src/mongo/db/exec/sbe/stages/sorted_merge.cpp
index f0a648f38ad..39cee407a00 100644
--- a/src/mongo/db/exec/sbe/stages/sorted_merge.cpp
+++ b/src/mongo/db/exec/sbe/stages/sorted_merge.cpp
@@ -41,8 +41,9 @@ SortedMergeStage::SortedMergeStage(PlanStage::Vector inputStages,
                                    std::vector<value::SortDirection> dirs,
                                    std::vector<value::SlotVector> inputVals,
                                    value::SlotVector outputVals,
-                                   PlanNodeId planNodeId)
-    : PlanStage("smerge"_sd, planNodeId),
+                                   PlanNodeId planNodeId,
+                                   bool participateInTrialRunTracking)
+    : PlanStage("smerge"_sd, planNodeId, participateInTrialRunTracking),
       _inputKeys(std::move(inputKeys)),
       _dirs(std::move(dirs)),
       _inputVals(std::move(inputVals)),
@@ -69,8 +70,13 @@ std::unique_ptr<PlanStage> SortedMergeStage::clone() const {
     for (auto& child : _children) {
         inputStages.emplace_back(child->clone());
     }
-    return std::make_unique<SortedMergeStage>(
-        std::move(inputStages), _inputKeys, _dirs, _inputVals, _outputVals, _commonStats.nodeId);
+    return std::make_unique<SortedMergeStage>(std::move(inputStages),
+                                              _inputKeys,
+                                              _dirs,
+                                              _inputVals,
+                                              _outputVals,
+                                              _commonStats.nodeId,
+                                              _participateInTrialRunTracking);
 }
 
 void SortedMergeStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/sorted_merge.h b/src/mongo/db/exec/sbe/stages/sorted_merge.h
index 3b87e4c8849..436ddfce080 100644
--- a/src/mongo/db/exec/sbe/stages/sorted_merge.h
+++ b/src/mongo/db/exec/sbe/stages/sorted_merge.h
@@ -61,7 +61,8 @@ public:
                      // Each element of 'inputVals' must be the same size as 'outputVals'.
                      std::vector<value::SlotVector> inputVals,
                      value::SlotVector outputVals,
-                     PlanNodeId planNodeId);
+                     PlanNodeId planNodeId,
+                     bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/spool.cpp b/src/mongo/db/exec/sbe/stages/spool.cpp
index 4550f569b09..47ca744962c 100644
--- a/src/mongo/db/exec/sbe/stages/spool.cpp
+++ b/src/mongo/db/exec/sbe/stages/spool.cpp
@@ -35,14 +35,20 @@ namespace mongo::sbe {
 SpoolEagerProducerStage::SpoolEagerProducerStage(std::unique_ptr<PlanStage> input,
                                                  SpoolId spoolId,
                                                  value::SlotVector vals,
-                                                 PlanNodeId planNodeId)
-    : PlanStage{"espool"_sd, planNodeId}, _spoolId{spoolId}, _vals{std::move(vals)} {
+                                                 PlanNodeId planNodeId,
+                                                 bool participateInTrialRunTracking)
+    : PlanStage{"espool"_sd, planNodeId, participateInTrialRunTracking},
+      _spoolId{spoolId},
+      _vals{std::move(vals)} {
     _children.emplace_back(std::move(input));
 }
 
 std::unique_ptr<PlanStage> SpoolEagerProducerStage::clone() const {
-    return std::make_unique<SpoolEagerProducerStage>(
-        _children[0]->clone(), _spoolId, _vals, _commonStats.nodeId);
+    return std::make_unique<SpoolEagerProducerStage>(_children[0]->clone(),
+                                                     _spoolId,
+                                                     _vals,
+                                                     _commonStats.nodeId,
+                                                     _participateInTrialRunTracking);
 }
 
 void SpoolEagerProducerStage::prepare(CompileCtx& ctx) {
@@ -171,8 +177,9 @@ SpoolLazyProducerStage::SpoolLazyProducerStage(std::unique_ptr<PlanStage> input,
                                                SpoolId spoolId,
                                                value::SlotVector vals,
                                                std::unique_ptr<EExpression> predicate,
-                                               PlanNodeId planNodeId)
-    : PlanStage{"lspool"_sd, planNodeId},
+                                               PlanNodeId planNodeId,
+                                               bool participateInTrialRunTracking)
+    : PlanStage{"lspool"_sd, planNodeId, participateInTrialRunTracking},
       _spoolId{spoolId},
       _vals{std::move(vals)},
       _predicate{std::move(predicate)} {
@@ -180,8 +187,12 @@ SpoolLazyProducerStage::SpoolLazyProducerStage(std::unique_ptr<PlanStage> input,
 }
 
 std::unique_ptr<PlanStage> SpoolLazyProducerStage::clone() const {
-    return std::make_unique<SpoolLazyProducerStage>(
-        _children[0]->clone(), _spoolId, _vals, _predicate->clone(), _commonStats.nodeId);
+    return std::make_unique<SpoolLazyProducerStage>(_children[0]->clone(),
+                                                    _spoolId,
+                                                    _vals,
+                                                    _predicate->clone(),
+                                                    _commonStats.nodeId,
+                                                    _participateInTrialRunTracking);
 }
 
 void SpoolLazyProducerStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/spool.h b/src/mongo/db/exec/sbe/stages/spool.h
index a2dd6f81657..09a453e0e0e 100644
--- a/src/mongo/db/exec/sbe/stages/spool.h
+++ b/src/mongo/db/exec/sbe/stages/spool.h
@@ -56,7 +56,8 @@ public:
     SpoolEagerProducerStage(std::unique_ptr<PlanStage> input,
                             SpoolId spoolId,
                             value::SlotVector vals,
-                            PlanNodeId planNodeId);
+                            PlanNodeId planNodeId,
+                            bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
@@ -109,7 +110,8 @@ public:
                            SpoolId spoolId,
                            value::SlotVector vals,
                            std::unique_ptr<EExpression> predicate,
-                           PlanNodeId planNodeId);
+                           PlanNodeId planNodeId,
+                           bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
@@ -165,13 +167,17 @@ private:
 template <bool IsStack>
 class SpoolConsumerStage final : public PlanStage {
 public:
-    SpoolConsumerStage(SpoolId spoolId, value::SlotVector vals, PlanNodeId planNodeId)
-        : PlanStage{IsStack ? "sspool"_sd : "cspool"_sd, planNodeId},
+    SpoolConsumerStage(SpoolId spoolId,
+                       value::SlotVector vals,
+                       PlanNodeId planNodeId,
+                       bool participateInTrialRunTracking = true)
+        : PlanStage{IsStack ? "sspool"_sd : "cspool"_sd, planNodeId, participateInTrialRunTracking},
           _spoolId{spoolId},
           _vals{std::move(vals)} {}
 
     std::unique_ptr<PlanStage> clone() const {
-        return std::make_unique<SpoolConsumerStage<IsStack>>(_spoolId, _vals, _commonStats.nodeId);
+        return std::make_unique<SpoolConsumerStage<IsStack>>(
+            _spoolId, _vals, _commonStats.nodeId, _participateInTrialRunTracking);
     }
 
     void prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/stages.h b/src/mongo/db/exec/sbe/stages/stages.h
index 59f6746a005..02dd6ae62fb 100644
--- a/src/mongo/db/exec/sbe/stages/stages.h
+++ b/src/mongo/db/exec/sbe/stages/stages.h
@@ -254,7 +254,9 @@ protected:
 template <typename T>
 class CanTrackStats {
 public:
-    CanTrackStats(StringData stageType, PlanNodeId nodeId) : _commonStats(stageType, nodeId) {}
+    CanTrackStats(StringData stageType, PlanNodeId nodeId, bool participateInTrialRunTracking)
+        : _commonStats(stageType, nodeId),
+          _participateInTrialRunTracking(participateInTrialRunTracking) {}
 
     /**
      * Returns a tree of stats. If the stage has any children it must propagate the request for
@@ -414,6 +416,12 @@ protected:
 
     CommonStats _commonStats;
 
+    // Flag which determines whether this node and its children can participate in trial run
+    // tracking. A stage and its children are not eligible for trial run tracking when they are
+    // planned deterministically (that is, the amount of work they perform is independent of
+    // other parts of the tree which are multiplanned).
+    bool _participateInTrialRunTracking{true};
+
 private:
     /**
      * In general, accessors can be accessed only after getNext returns a row. It is most definitely
@@ -422,14 +430,6 @@ private:
      * that feature is retired we can then simply revisit all stages and simplify them.
      */
     bool _slotsAccessible{false};
-
-    /**
-     * Flag which determines whether this node and its children can participate in trial run
-     * tracking. A stage and its children are not eligible for trial run tracking when they are
-     * planned deterministically (that is, the amount of work they perform is independent of
-     * other parts of the tree which are multiplanned).
-     */
-    bool _participateInTrialRunTracking{true};
 };
 
 /**
@@ -496,10 +496,15 @@ class PlanStage : public CanSwitchOperationContext<PlanStage>,
 public:
     using Vector = absl::InlinedVector<std::unique_ptr<PlanStage>, 2>;
 
-    PlanStage(StringData stageType, PlanYieldPolicy* yieldPolicy, PlanNodeId nodeId)
-        : CanTrackStats{stageType, nodeId}, CanInterrupt{yieldPolicy} {}
+    PlanStage(StringData stageType,
+              PlanYieldPolicy* yieldPolicy,
+              PlanNodeId nodeId,
+              bool participateInTrialRunTracking)
+        : CanTrackStats{stageType, nodeId, participateInTrialRunTracking},
+          CanInterrupt{yieldPolicy} {}
 
-    PlanStage(StringData stageType, PlanNodeId nodeId) : PlanStage(stageType, nullptr, nodeId) {}
+    PlanStage(StringData stageType, PlanNodeId nodeId, bool participateInTrialRunTracking)
+        : PlanStage(stageType, nullptr, nodeId, participateInTrialRunTracking) {}
 
     virtual ~PlanStage() = default;
 
diff --git a/src/mongo/db/exec/sbe/stages/traverse.cpp b/src/mongo/db/exec/sbe/stages/traverse.cpp
index d1e0a040b3e..654a1a160fa 100644
--- a/src/mongo/db/exec/sbe/stages/traverse.cpp
+++ b/src/mongo/db/exec/sbe/stages/traverse.cpp
@@ -42,8 +42,9 @@ TraverseStage::TraverseStage(std::unique_ptr<PlanStage> outer,
                              std::unique_ptr<EExpression> foldExpr,
                              std::unique_ptr<EExpression> finalExpr,
                              PlanNodeId planNodeId,
-                             boost::optional<size_t> nestedArraysDepth)
-    : PlanStage("traverse"_sd, planNodeId),
+                             boost::optional<size_t> nestedArraysDepth,
+                             bool participateInTrialRunTracking)
+    : PlanStage("traverse"_sd, planNodeId, participateInTrialRunTracking),
       _inField(inField),
       _outField(outField),
       _outFieldInner(outFieldInner),
@@ -69,7 +70,8 @@ std::unique_ptr<PlanStage> TraverseStage::clone() const {
                                            _fold ? _fold->clone() : nullptr,
                                            _final ? _final->clone() : nullptr,
                                            _commonStats.nodeId,
-                                           _nestedArraysDepth);
+                                           _nestedArraysDepth,
+                                           _participateInTrialRunTracking);
 }
 
 void TraverseStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/traverse.h b/src/mongo/db/exec/sbe/stages/traverse.h
index 2b3fee33a47..09e5dc3dfcf 100644
--- a/src/mongo/db/exec/sbe/stages/traverse.h
+++ b/src/mongo/db/exec/sbe/stages/traverse.h
@@ -74,7 +74,8 @@ public:
                   std::unique_ptr<EExpression> foldExpr,
                   std::unique_ptr<EExpression> finalExpr,
                   PlanNodeId planNodeId,
-                  boost::optional<size_t> nestedArraysDepth);
+                  boost::optional<size_t> nestedArraysDepth,
+                  bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/union.cpp b/src/mongo/db/exec/sbe/stages/union.cpp
index a661e6c579f..2fd6d0b4fc5 100644
--- a/src/mongo/db/exec/sbe/stages/union.cpp
+++ b/src/mongo/db/exec/sbe/stages/union.cpp
@@ -38,8 +38,9 @@ namespace mongo::sbe {
 UnionStage::UnionStage(PlanStage::Vector inputStages,
                        std::vector<value::SlotVector> inputVals,
                        value::SlotVector outputVals,
-                       PlanNodeId planNodeId)
-    : PlanStage("union"_sd, planNodeId),
+                       PlanNodeId planNodeId,
+                       bool participateInTrialRunTracking)
+    : PlanStage("union"_sd, planNodeId, participateInTrialRunTracking),
       _inputVals{std::move(inputVals)},
       _outputVals{std::move(outputVals)} {
     _children = std::move(inputStages);
@@ -57,8 +58,11 @@ std::unique_ptr<PlanStage> UnionStage::clone() const {
     for (auto& child : _children) {
         inputStages.emplace_back(child->clone());
     }
-    return std::make_unique<UnionStage>(
-        std::move(inputStages), _inputVals, _outputVals, _commonStats.nodeId);
+    return std::make_unique<UnionStage>(std::move(inputStages),
+                                        _inputVals,
+                                        _outputVals,
+                                        _commonStats.nodeId,
+                                        _participateInTrialRunTracking);
 }
 
 void UnionStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/union.h b/src/mongo/db/exec/sbe/stages/union.h
index 2ec0ec73df9..b21d5e6caf5 100644
--- a/src/mongo/db/exec/sbe/stages/union.h
+++ b/src/mongo/db/exec/sbe/stages/union.h
@@ -53,7 +53,8 @@ public:
     UnionStage(PlanStage::Vector inputStages,
                std::vector<value::SlotVector> inputVals,
                value::SlotVector outputVals,
-               PlanNodeId planNodeId);
+               PlanNodeId planNodeId,
+               bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/unique.cpp b/src/mongo/db/exec/sbe/stages/unique.cpp
index 355927ff912..c88fa9ab43e 100644
--- a/src/mongo/db/exec/sbe/stages/unique.cpp
+++ b/src/mongo/db/exec/sbe/stages/unique.cpp
@@ -37,13 +37,15 @@ namespace mongo {
 namespace sbe {
 UniqueStage::UniqueStage(std::unique_ptr<PlanStage> input,
                          value::SlotVector keys,
-                         PlanNodeId planNodeId)
-    : PlanStage("unique"_sd, planNodeId), _keySlots(keys) {
+                         PlanNodeId planNodeId,
+                         bool participateInTrialRunTracking)
+    : PlanStage("unique"_sd, planNodeId, participateInTrialRunTracking), _keySlots(keys) {
     _children.emplace_back(std::move(input));
 }
 
 std::unique_ptr<PlanStage> UniqueStage::clone() const {
-    return std::make_unique<UniqueStage>(_children[0]->clone(), _keySlots, _commonStats.nodeId);
+    return std::make_unique<UniqueStage>(
+        _children[0]->clone(), _keySlots, _commonStats.nodeId, _participateInTrialRunTracking);
 }
 
 void UniqueStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/unique.h b/src/mongo/db/exec/sbe/stages/unique.h
index 1165743a0cc..c344cd09d24 100644
--- a/src/mongo/db/exec/sbe/stages/unique.h
+++ b/src/mongo/db/exec/sbe/stages/unique.h
@@ -53,7 +53,10 @@ namespace mongo::sbe {
  */
 class UniqueStage final : public PlanStage {
 public:
-    UniqueStage(std::unique_ptr<PlanStage> input, value::SlotVector keys, PlanNodeId planNodeId);
+    UniqueStage(std::unique_ptr<PlanStage> input,
+                value::SlotVector keys,
+                PlanNodeId planNodeId,
+                bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/stages/unwind.cpp b/src/mongo/db/exec/sbe/stages/unwind.cpp
index b4c5e225adc..7ad10eecb23 100644
--- a/src/mongo/db/exec/sbe/stages/unwind.cpp
+++ b/src/mongo/db/exec/sbe/stages/unwind.cpp
@@ -40,8 +40,9 @@ UnwindStage::UnwindStage(std::unique_ptr<PlanStage> input,
                          value::SlotId outField,
                          value::SlotId outIndex,
                          bool preserveNullAndEmptyArrays,
-                         PlanNodeId planNodeId)
-    : PlanStage("unwind"_sd, planNodeId),
+                         PlanNodeId planNodeId,
+                         bool participateInTrialRunTracking)
+    : PlanStage("unwind"_sd, planNodeId, participateInTrialRunTracking),
       _inField(inField),
       _outField(outField),
       _outIndex(outIndex),
@@ -59,7 +60,8 @@ std::unique_ptr<PlanStage> UnwindStage::clone() const {
                                          _outField,
                                          _outIndex,
                                          _preserveNullAndEmptyArrays,
-                                         _commonStats.nodeId);
+                                         _commonStats.nodeId,
+                                         _participateInTrialRunTracking);
 }
 
 void UnwindStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/unwind.h b/src/mongo/db/exec/sbe/stages/unwind.h
index 049fee4a069..57b28d9c1cf 100644
--- a/src/mongo/db/exec/sbe/stages/unwind.h
+++ b/src/mongo/db/exec/sbe/stages/unwind.h
@@ -52,7 +52,8 @@ public:
                 value::SlotId outField,
                 value::SlotId outIndex,
                 bool preserveNullAndEmptyArrays,
-                PlanNodeId planNodeId);
+                PlanNodeId planNodeId,
+                bool participateInTrialRunTracking = true);
 
     std::unique_ptr<PlanStage> clone() const final;
 
diff --git a/src/mongo/db/exec/sbe/util/spilling.cpp b/src/mongo/db/exec/sbe/util/spilling.cpp
index 45931efec8b..c54f3bfe956 100644
--- a/src/mongo/db/exec/sbe/util/spilling.cpp
+++ b/src/mongo/db/exec/sbe/util/spilling.cpp
@@ -63,8 +63,7 @@ boost::optional<value::MaterializedRow> readFromRecordStore(OperationContext* op
     RecordData record;
     if (rs->findRecord(opCtx, rid, &record)) {
         auto valueReader = BufReader(record.data(), record.size());
-        auto val = value::MaterializedRow::deserializeForSorter(valueReader, {});
-        return val;
+        return value::MaterializedRow::deserializeForSorter(valueReader, {});
     }
     return boost::none;
 }
diff --git a/src/mongo/db/exec/sbe/values/columnar.cpp b/src/mongo/db/exec/sbe/values/columnar.cpp
index 7490d549803..c1bd51f6b69 100644
--- a/src/mongo/db/exec/sbe/values/columnar.cpp
+++ b/src/mongo/db/exec/sbe/values/columnar.cpp
@@ -237,6 +237,24 @@ void addToObjectNoArrays(value::TypeTags tag,
     });
 }
 
+/*
+ * Ensures that the path (stored in 'state') leads to an object and materializes an empty object if
+ * it does not. Assumes that there are no arrays along remaining path (i.e., the components that are
+ * not yet traversed via withNextPathComponent()).
+ *
+ * This function is a no-op when there are no remaining path components.
+ */
+template <class C>
+void materializeObjectNoArrays(AddToDocumentState<C>& state, value::Object& out) {
+    if (state.atLastPathComponent()) {
+        return;
+    }
+
+    state.withNextPathComponent([&](StringData nextPathComponent) {
+        materializeObjectNoArrays(state, *findOrAddObjInObj(nextPathComponent, &out));
+    });
+}
+
 template <class C>
 void addToObject(value::Object& obj, AddToDocumentState<C>& state);
 
@@ -268,23 +286,19 @@ void addToArray(value::Array& arr, AddToDocumentState<C>& state) {
                 for (; insertAt < index; insertAt++) {
                     invariant(insertAt < arr.size());
 
-                    auto [tag, val] = [nextChar, &state]() {
-                        if (nextChar == '|') {
-                            return state.extractAndCopyValue();
+                    if (nextChar == 'o') {
+                        materializeObjectNoArrays(state, *findOrAddObjInArr(insertAt, &arr));
+                    } else if (nextChar == '|') {
+                        auto [tag, val] = state.extractAndCopyValue();
+                        if (state.atLastPathComponent()) {
+                            invariant(arr.getAt(insertAt).first == kPlaceHolderType);
+                            arr.setAt(insertAt, tag, val);
                         } else {
-                            invariant(nextChar == 'o');
-                            return value::makeNewObject();
+                            addToObjectNoArrays(
+                                tag, val, state, *findOrAddObjInArr(insertAt, &arr), 0);
                         }
-                    }();
-                    if (state.atLastPathComponent()) {
-                        // At this point we are inserting a leaf value.
-                        dassert(arr.getAt(insertAt).first == kPlaceHolderType);
-                        arr.setAt(insertAt, tag, val);
                     } else {
-                        // This is valid on initialized elements when the subobject contains more
-                        // than one member.
-                        auto* subObj = findOrAddObjInArr(insertAt, &arr);
-                        addToObjectNoArrays(tag, val, state, *subObj, 0);
+                        MONGO_UNREACHABLE;
                     }
                 }
                 break;
diff --git a/src/mongo/db/exec/sbe/values/columnar_test.cpp b/src/mongo/db/exec/sbe/values/columnar_test.cpp
index 9dc9e7717d0..ebbed88848a 100644
--- a/src/mongo/db/exec/sbe/values/columnar_test.cpp
+++ b/src/mongo/db/exec/sbe/values/columnar_test.cpp
@@ -201,4 +201,11 @@ TEST(ColumnarObjTest, AddNonLeafCellWithArrayInfoToObject) {
     std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "{[o1", {})};
     compareMakeObjWithExpected(cells, fromjson("{a: {b: [{}, {}]}}"));
 }
+
+TEST(ColumnarObjTest, AddLeafCellThenAddSparseSibling) {
+    std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2}),
+                                          makeCellOfIntegers("a", "[o1", {}),
+                                          makeCellOfIntegers("a.c", "[1", {3})};
+    compareMakeObjWithExpected(cells, fromjson("{a: [{b: 1}, {b: 2, c: 3}]}"));
+}
 }  // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp
index cde56e31ba2..5bbdc40170e 100644
--- a/src/mongo/db/exec/sbe/values/value.cpp
+++ b/src/mongo/db/exec/sbe/values/value.cpp
@@ -326,6 +326,9 @@ void releaseValue(TypeTags tag, Value val) noexcept {
         case TypeTags::indexBounds:
             delete getIndexBoundsView(val);
             break;
+        case TypeTags::classicMatchExpresion:
+            delete getClassicMatchExpressionView(val);
+            break;
         default:
             break;
     }
diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h
index 728a0c16634..d0202b0f1c0 100644
--- a/src/mongo/db/exec/sbe/values/value.h
+++ b/src/mongo/db/exec/sbe/values/value.h
@@ -154,6 +154,9 @@ enum class TypeTags : uint8_t {
 
     // Pointer to a IndexBounds object.
     indexBounds,
+
+    // Pointer to a classic engine match expression.
+    classicMatchExpresion,
 };
 
 inline constexpr bool isNumber(TypeTags tag) noexcept {
@@ -1249,6 +1252,10 @@ inline IndexBounds* getIndexBoundsView(Value val) noexcept {
     return reinterpret_cast<IndexBounds*>(val);
 }
 
+inline MatchExpression* getClassicMatchExpressionView(Value val) noexcept {
+    return reinterpret_cast<MatchExpression*>(val);
+}
+
 /**
  * Pattern and flags of Regex are stored in BSON as two C strings written one after another.
  *
@@ -1450,6 +1457,12 @@ inline std::pair<TypeTags, Value> copyValue(TypeTags tag, Value val) {
             return makeCopyCollator(*getCollatorView(val));
         case TypeTags::indexBounds:
             return makeCopyIndexBounds(*getIndexBoundsView(val));
+        case TypeTags::classicMatchExpresion:
+            // Beware: "shallow cloning" a match expression does not copy the underlying BSON. The
+            // original BSON must remain alive for both the original MatchExpression and the clone.
+            return {TypeTags::classicMatchExpresion,
+                    bitcastFrom<const MatchExpression*>(
+                        getClassicMatchExpressionView(val)->shallowClone().release())};
         default:
             break;
     }
diff --git a/src/mongo/db/exec/sbe/values/value_printer.cpp b/src/mongo/db/exec/sbe/values/value_printer.cpp
index 78e655114a3..90a43442329 100644
--- a/src/mongo/db/exec/sbe/values/value_printer.cpp
+++ b/src/mongo/db/exec/sbe/values/value_printer.cpp
@@ -156,6 +156,9 @@ void ValuePrinter<T>::writeTagToStream(TypeTags tag) {
         case TypeTags::indexBounds:
             stream << "indexBounds";
             break;
+        case TypeTags::classicMatchExpresion:
+            stream << "classicMatchExpression";
+            break;
         default:
             stream << "unknown tag";
             break;
@@ -472,6 +475,9 @@ void ValuePrinter<T>::writeValueToStream(TypeTags tag, Value val, size_t depth)
                 getIndexBoundsView(val)->toString(true /* hasNonSimpleCollation */));
             stream << ")";
             break;
+        case TypeTags::classicMatchExpresion:
+            stream << "ClassicMatcher(" << getClassicMatchExpressionView(val)->toString() << ")";
+            break;
         default:
             MONGO_UNREACHABLE;
     }
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index f6a6b35970e..4f9329e7ed6 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -27,7 +27,6 @@
  *    it in the license file.
  */
 
-
 #include "mongo/platform/basic.h"
 
 #include "mongo/db/exec/sbe/expressions/expression.h"
@@ -153,6 +152,8 @@ int Instruction::stackOffset[Instruction::Tags::lastInstruction] = {
     0,   // ret
 
     -1,  // fail
+
+    0,  // applyClassicMatcher
 };
 
 namespace {
@@ -211,17 +212,13 @@ std::string CodeFragment::toString() const {
             case Instruction::cmp3w:
             case Instruction::collCmp3w:
             case Instruction::fillEmpty:
-            case Instruction::fillEmptyConst:
             case Instruction::getField:
-            case Instruction::getFieldConst:
             case Instruction::getElement:
             case Instruction::getArraySize:
             case Instruction::collComparisonKey:
             case Instruction::getFieldOrElement:
             case Instruction::traverseP:
-            case Instruction::traversePConst:
             case Instruction::traverseF:
-            case Instruction::traverseFConst:
             case Instruction::setField:
             case Instruction::aggSum:
             case Instruction::aggMin:
@@ -249,9 +246,15 @@ std::string CodeFragment::toString() const {
                 break;
             }
             // Instructions with a single integer argument.
+            case Instruction::pushLocalLambda:
+            case Instruction::traversePConst: {
+                auto offset = readFromMemory<int>(pcPointer);
+                pcPointer += sizeof(offset);
+                ss << "offset: " << offset;
+                break;
+            }
             case Instruction::pushLocalVal:
-            case Instruction::pushMoveLocalVal:
-            case Instruction::pushLocalLambda: {
+            case Instruction::pushMoveLocalVal: {
                 auto arg = readFromMemory<int>(pcPointer);
                 pcPointer += sizeof(arg);
                 ss << "arg: " << arg;
@@ -266,6 +269,21 @@ std::string CodeFragment::toString() const {
                 break;
             }
             // Instructions with other kinds of arguments.
+            case Instruction::traverseFConst: {
+                auto k = readFromMemory<Instruction::Constants>(pcPointer);
+                pcPointer += sizeof(k);
+                auto offset = readFromMemory<int>(pcPointer);
+                pcPointer += sizeof(offset);
+                ss << "k: " << Instruction::toStringConstants(k) << ", offset: " << offset;
+                break;
+            }
+            case Instruction::fillEmptyConst: {
+                auto k = readFromMemory<Instruction::Constants>(pcPointer);
+                pcPointer += sizeof(k);
+                ss << "k: " << Instruction::toStringConstants(k);
+                break;
+            }
+            case Instruction::getFieldConst:
             case Instruction::pushConstVal: {
                 auto tag = readFromMemory<value::TypeTags>(pcPointer);
                 pcPointer += sizeof(tag);
@@ -281,6 +299,12 @@ std::string CodeFragment::toString() const {
                 ss << "accessor: " << static_cast<void*>(accessor);
                 break;
             }
+            case Instruction::applyClassicMatcher: {
+                const auto* matcher = readFromMemory<const MatchExpression*>(pcPointer);
+                pcPointer += sizeof(matcher);
+                ss << "matcher: " << static_cast<const void*>(matcher);
+                break;
+            }
             case Instruction::numConvert: {
                 auto tag = readFromMemory<value::TypeTags>(pcPointer);
                 pcPointer += sizeof(tag);
@@ -446,6 +470,17 @@ void CodeFragment::appendNumericConvert(value::TypeTags targetTag) {
     offset += writeToMemory(offset, targetTag);
 }
 
+void CodeFragment::appendApplyClassicMatcher(const MatchExpression* matcher) {
+    Instruction i;
+    i.tag = Instruction::applyClassicMatcher;
+    adjustStackSimple(i);
+
+    auto offset = allocateSpace(sizeof(Instruction) + sizeof(matcher));
+
+    offset += writeToMemory(offset, i);
+    offset += writeToMemory(offset, matcher);
+}
+
 void CodeFragment::appendSub() {
     appendSimpleInstruction(Instruction::sub);
 }
@@ -5848,6 +5883,31 @@ void ByteCode::runInternal(const CodeFragment* code, int64_t position) {
 
                     break;
                 }
+                case Instruction::applyClassicMatcher: {
+                    const auto* matcher = readFromMemory<const MatchExpression*>(pcPointer);
+                    pcPointer += sizeof(matcher);
+
+                    auto [ownedObj, tagObj, valObj] = getFromStack(0);
+
+                    BSONObj bsonObjForMatching;
+                    if (tagObj == value::TypeTags::Object) {
+                        BSONObjBuilder builder;
+                        sbe::bson::convertToBsonObj(builder, sbe::value::getObjectView(valObj));
+                        bsonObjForMatching = builder.obj();
+                    } else if (tagObj == value::TypeTags::bsonObject) {
+                        auto bson = value::getRawPointerView(valObj);
+                        bsonObjForMatching = BSONObj(bson);
+                    } else {
+                        MONGO_UNREACHABLE_TASSERT(6681402);
+                    }
+
+                    bool res = matcher->matchesBSON(bsonObjForMatching);
+                    if (ownedObj) {
+                        value::releaseValue(tagObj, valObj);
+                    }
+                    topStack(false, value::TypeTags::Boolean, value::bitcastFrom<bool>(res));
+                    break;
+                }
                 default:
                     MONGO_UNREACHABLE;
             }
diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h
index 56d708fe1a1..2fec8265bfd 100644
--- a/src/mongo/db/exec/sbe/vm/vm.h
+++ b/src/mongo/db/exec/sbe/vm/vm.h
@@ -321,6 +321,8 @@ struct Instruction {
 
         fail,
 
+        applyClassicMatcher,  // Instruction which calls into the classic engine MatchExpression.
+
         lastInstruction  // this is just a marker used to calculate number of instructions
     };
 
@@ -330,6 +332,19 @@ struct Instruction {
         False,
     };
 
+    static const char* toStringConstants(Constants k) {
+        switch (k) {
+            case Null:
+                return "Null";
+            case True:
+                return "True";
+            case False:
+                return "False";
+            default:
+                return "unknown";
+        }
+    }
+
     // Make sure that values in this arrays are always in-sync with the enum.
     static int stackOffset[];
 
@@ -481,6 +496,8 @@ struct Instruction {
                 return "ret";
             case fail:
                 return "fail";
+            case applyClassicMatcher:
+                return "applyClassicMatcher";
             default:
                 return "unrecognized";
         }
@@ -769,9 +786,13 @@ public:
         appendSimpleInstruction(Instruction::fail);
     }
     void appendNumericConvert(value::TypeTags targetTag);
+    void appendApplyClassicMatcher(const MatchExpression*);
 
     void fixup(int offset);
 
+    // For printing from an interactive debugger.
+    std::string toString() const;
+
 private:
     void appendSimpleInstruction(Instruction::Tags tag);
     auto allocateSpace(size_t size) {
@@ -784,9 +805,6 @@ private:
     void copyCodeAndFixup(CodeFragment&& from);
 
 private:
-    // For printing from an interactive debugger.
-    std::string toString() const;
-
     absl::InlinedVector<uint8_t, 16> _instrs;
 
     /**
diff --git a/src/mongo/db/exec/update_stage.cpp b/src/mongo/db/exec/update_stage.cpp
index 74dc3aaba5a..27304f16289 100644
--- a/src/mongo/db/exec/update_stage.cpp
+++ b/src/mongo/db/exec/update_stage.cpp
@@ -461,24 +461,41 @@ PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) {
 
         bool writeToOrphan = false;
         if (!_params.request->explain() && _isUserInitiatedWrite) {
-            const auto action = _preWriteFilter.computeAction(member->doc.value());
-            if (action == write_stage_common::PreWriteFilter::Action::kSkip) {
-                LOGV2_DEBUG(
-                    5983200,
-                    3,
-                    "Skipping update operation to orphan document to prevent a wrong change "
-                    "stream event",
-                    "namespace"_attr = collection()->ns(),
-                    "record"_attr = member->doc.value());
-                return PlanStage::NEED_TIME;
-            } else if (action == write_stage_common::PreWriteFilter::Action::kWriteAsFromMigrate) {
-                LOGV2_DEBUG(6184701,
-                            3,
-                            "Marking update operation to orphan document with the fromMigrate flag "
-                            "to prevent a wrong change stream event",
-                            "namespace"_attr = collection()->ns(),
-                            "record"_attr = member->doc.value());
-                writeToOrphan = true;
+            try {
+                const auto action = _preWriteFilter.computeAction(member->doc.value());
+                if (action == write_stage_common::PreWriteFilter::Action::kSkip) {
+                    LOGV2_DEBUG(
+                        5983200,
+                        3,
+                        "Skipping update operation to orphan document to prevent a wrong change "
+                        "stream event",
+                        "namespace"_attr = collection()->ns(),
+                        "record"_attr = member->doc.value());
+                    return PlanStage::NEED_TIME;
+                } else if (action ==
+                           write_stage_common::PreWriteFilter::Action::kWriteAsFromMigrate) {
+                    LOGV2_DEBUG(
+                        6184701,
+                        3,
+                        "Marking update operation to orphan document with the fromMigrate flag "
+                        "to prevent a wrong change stream event",
+                        "namespace"_attr = collection()->ns(),
+                        "record"_attr = member->doc.value());
+                    writeToOrphan = true;
+                }
+            } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+                if (ex->getVersionReceived() == ChunkVersion::IGNORED() &&
+                    ex->getCriticalSectionSignal()) {
+                    // If ChunkVersion is IGNORED and we encountered a critical section, then yield,
+                    // wait for critical section to finish and then we'll resume the write from the
+                    // point we had left. We do this to prevent large multi-writes from repeatedly
+                    // failing due to StaleConfig and exhausting the mongos retry attempts.
+                    planExecutorShardingCriticalSectionFuture(opCtx()) =
+                        ex->getCriticalSectionSignal();
+                    memberFreer.dismiss();  // Keep this member around so we can retry deleting it.
+                    return prepareToRetryWSM(id, out);
+                }
+                throw;
             }
         }
 
@@ -508,6 +525,18 @@ PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) {
         } catch (const WriteConflictException&) {
             memberFreer.dismiss();  // Keep this member around so we can retry updating it.
             return prepareToRetryWSM(id, out);
+        } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+            if (ex->getVersionReceived() == ChunkVersion::IGNORED() &&
+                ex->getCriticalSectionSignal()) {
+                // If ChunkVersion is IGNORED and we encountered a critical section, then yield,
+                // wait for critical section to finish and then we'll resume the write from the
+                // point we had left. We do this to prevent large multi-writes from repeatedly
+                // failing due to StaleConfig and exhausting the mongos retry attempts.
+                planExecutorShardingCriticalSectionFuture(opCtx()) = ex->getCriticalSectionSignal();
+                memberFreer.dismiss();  // Keep this member around so we can retry updating it.
+                return prepareToRetryWSM(id, out);
+            }
+            throw;
         }
 
         // Set member's obj to be the doc we want to return.
diff --git a/src/mongo/db/exec/write_stage_common.cpp b/src/mongo/db/exec/write_stage_common.cpp
index 0a1ed4179aa..3d885d9d50e 100644
--- a/src/mongo/db/exec/write_stage_common.cpp
+++ b/src/mongo/db/exec/write_stage_common.cpp
@@ -46,15 +46,6 @@
 
 namespace mongo {
 
-namespace {
-
-bool computeIsStandaloneOrPrimary(OperationContext* opCtx) {
-    const auto replCoord{repl::ReplicationCoordinator::get(opCtx)};
-    return replCoord->canAcceptWritesForDatabase(opCtx, "admin");
-}
-
-}  // namespace
-
 namespace write_stage_common {
 
 PreWriteFilter::PreWriteFilter(OperationContext* opCtx, NamespaceString nss)
@@ -65,14 +56,23 @@ PreWriteFilter::PreWriteFilter(OperationContext* opCtx, NamespaceString nss)
           return fcv.isVersionInitialized() &&
               feature_flags::gFeatureFlagNoChangeStreamEventsDueToOrphans.isEnabled(fcv);
       }()),
-      _isStandaloneOrPrimary(computeIsStandaloneOrPrimary(_opCtx)) {}
+      _skipFiltering([&] {
+          // Always allow writes on replica sets.
+          if (serverGlobalParams.clusterRole == ClusterRole::None) {
+              return true;
+          }
+
+          // Always allow writes on standalone and secondary nodes.
+          const auto replCoord{repl::ReplicationCoordinator::get(opCtx)};
+          return !replCoord->canAcceptWritesForDatabase(opCtx, NamespaceString::kAdminDb);
+      }()) {}
 
 PreWriteFilter::Action PreWriteFilter::computeAction(const Document& doc) {
     // Skip the checks if the Filter is not enabled.
     if (!_isEnabled)
         return Action::kWrite;
 
-    if (!_isStandaloneOrPrimary) {
+    if (_skipFiltering) {
         // Secondaries do not apply any filtering logic as the primary already did.
         return Action::kWrite;
     }
diff --git a/src/mongo/db/exec/write_stage_common.h b/src/mongo/db/exec/write_stage_common.h
index 3eff70da081..5628822efff 100644
--- a/src/mongo/db/exec/write_stage_common.h
+++ b/src/mongo/db/exec/write_stage_common.h
@@ -80,7 +80,7 @@ private:
     OperationContext* _opCtx;
     NamespaceString _nss;
     const bool _isEnabled;
-    const bool _isStandaloneOrPrimary;
+    const bool _skipFiltering;
     std::unique_ptr<ShardFilterer> _shardFilterer;
 };
 
diff --git a/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp b/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
index df60b317eb2..faa05c2b63e 100644
--- a/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
+++ b/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
@@ -143,42 +143,49 @@ auto startExhaustQuery(
     int queryOptions = 0,
     Milliseconds awaitDataTimeoutMS = Milliseconds(5000),
     const boost::optional<repl::OpTime>& lastKnownCommittedOpTime = boost::none) {
-    queryOptions = queryOptions | QueryOption_Exhaust;
-    auto queryThread =
-        stdx::async(stdx::launch::async,
-                    [&queryCursor,
-                     queryConnection,
-                     queryOptions,
-                     awaitDataTimeoutMS,
-                     lastKnownCommittedOpTime] {
-                        const auto projSpec = BSON("_id" << 0 << "a" << 1);
-                        // Issue the initial 'find' with a batchSize of 2 and the exhaust flag set.
-                        // We then iterate through the first batch and confirm that the results are
-                        // as expected.
-                        queryCursor = queryConnection->query_DEPRECATED(
-                            testNSS, BSONObj{}, Query(), 0, 0, &projSpec, queryOptions, 2);
-                        for (int i = 0; i < 2; ++i) {
-                            ASSERT_BSONOBJ_EQ(queryCursor->nextSafe(), BSON("a" << i));
-                        }
-                        // Having exhausted the two results returned by the initial find, we set the
-                        // batchSize to 1 and issue a single getMore via DBClientCursor::more().
-                        // Because the 'exhaust' flag is set, the server will generate a series of
-                        // internal getMores and stream them back to the client until the cursor is
-                        // exhausted, without the client sending any further getMore requests. We
-                        // expect this request to hang at the
-                        // 'waitWithPinnedCursorDuringGetMoreBatch' failpoint.
-                        queryCursor->setBatchSize(1);
-                        if ((queryOptions & QueryOption_CursorTailable) &&
-                            (queryOptions & QueryOption_AwaitData)) {
-                            queryCursor->setAwaitDataTimeoutMS(awaitDataTimeoutMS);
-                            if (lastKnownCommittedOpTime) {
-                                auto term = lastKnownCommittedOpTime.get().getTerm();
-                                queryCursor->setCurrentTermAndLastCommittedOpTime(
-                                    term, lastKnownCommittedOpTime);
-                            }
-                        }
-                        ASSERT(queryCursor->more());
-                    });
+    auto queryThread = stdx::async(
+        stdx::launch::async,
+        [&queryCursor,
+         queryConnection,
+         queryOptions,
+         awaitDataTimeoutMS,
+         lastKnownCommittedOpTime] {
+            const auto projSpec = BSON("_id" << 0 << "a" << 1);
+            // Issue the initial 'find' with a batchSize of 2 and the exhaust flag set.
+            // We then iterate through the first batch and confirm that the results are
+            // as expected.
+            FindCommandRequest findCmd{testNSS};
+            findCmd.setProjection(projSpec);
+            findCmd.setBatchSize(2);
+            if (queryOptions & QueryOption_CursorTailable) {
+                findCmd.setTailable(true);
+            }
+            if (queryOptions & QueryOption_AwaitData) {
+                findCmd.setAwaitData(true);
+            }
+
+            queryCursor = queryConnection->find(findCmd, ReadPreferenceSetting{}, ExhaustMode::kOn);
+            for (int i = 0; i < 2; ++i) {
+                ASSERT_BSONOBJ_EQ(queryCursor->nextSafe(), BSON("a" << i));
+            }
+            // Having exhausted the two results returned by the initial find, we set the
+            // batchSize to 1 and issue a single getMore via DBClientCursor::more().
+            // Because the 'exhaust' flag is set, the server will generate a series of
+            // internal getMores and stream them back to the client until the cursor is
+            // exhausted, without the client sending any further getMore requests. We
+            // expect this request to hang at the
+            // 'waitWithPinnedCursorDuringGetMoreBatch' failpoint.
+            queryCursor->setBatchSize(1);
+            if (findCmd.getTailable() && findCmd.getAwaitData()) {
+                queryCursor->setAwaitDataTimeoutMS(awaitDataTimeoutMS);
+                if (lastKnownCommittedOpTime) {
+                    auto term = lastKnownCommittedOpTime.get().getTerm();
+                    queryCursor->setCurrentTermAndLastCommittedOpTime(term,
+                                                                      lastKnownCommittedOpTime);
+                }
+            }
+            ASSERT(queryCursor->more());
+        });
 
     // Wait until the parallel operation initializes its cursor.
     const auto startTime = clock->now();
diff --git a/src/mongo/db/fle_crud.cpp b/src/mongo/db/fle_crud.cpp
index d8f88c80991..c9ee8496652 100644
--- a/src/mongo/db/fle_crud.cpp
+++ b/src/mongo/db/fle_crud.cpp
@@ -46,6 +46,7 @@
 #include "mongo/db/query/collation/collator_factory_interface.h"
 #include "mongo/db/query/find_command_gen.h"
 #include "mongo/db/query/fle/server_rewrite.h"
+#include "mongo/db/repl/repl_client_info.h"
 #include "mongo/db/service_context.h"
 #include "mongo/db/transaction_api.h"
 #include "mongo/idl/idl_parser.h"
@@ -96,21 +97,24 @@ void appendSingleStatusToWriteErrors(const Status& status,
     replyBase->setWriteErrors(errors);
 }
 
-void replyToResponse(write_ops::WriteCommandReplyBase* replyBase,
+void replyToResponse(OperationContext* opCtx,
+                     write_ops::WriteCommandReplyBase* replyBase,
                      BatchedCommandResponse* response) {
     response->setStatus(Status::OK());
     response->setN(replyBase->getN());
-    if (replyBase->getElectionId()) {
-        response->setElectionId(replyBase->getElectionId().value());
-    }
-    if (replyBase->getOpTime()) {
-        response->setLastOp(replyBase->getOpTime().value());
-    }
     if (replyBase->getWriteErrors()) {
         for (const auto& error : *replyBase->getWriteErrors()) {
             response->addToErrDetails(error);
         }
     }
+
+    // Update the OpTime for the reply to current OpTime
+    //
+    // The OpTime in the reply reflects the OpTime of when the request was run, not when it was
+    // committed. The Transaction API propagates the OpTime from the commit transaction onto the
+    // current thread so grab it from TLS and change the OpTime on the reply.
+    //
+    response->setLastOp(repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp());
 }
 
 void responseToReply(const BatchedCommandResponse& response,
@@ -188,16 +192,20 @@ std::pair<FLEBatchResult, write_ops::InsertCommandReply> processInsert(
     auto edcNss = insertRequest.getNamespace();
     auto ei = insertRequest.getEncryptionInformation().get();
 
+    bool bypassDocumentValidation =
+        insertRequest.getWriteCommandRequestBase().getBypassDocumentValidation();
+
     auto efc = EncryptionInformationHelpers::getAndValidateSchema(edcNss, ei);
 
     auto documents = insertRequest.getDocuments();
     // TODO - how to check if a document will be too large???
+
     uassert(6371202,
             "Only single insert batches are supported in Queryable Encryption",
             documents.size() == 1);
 
     auto document = documents[0];
-    EDCServerCollection::validateEncryptedFieldInfo(document, efc);
+    EDCServerCollection::validateEncryptedFieldInfo(document, efc, bypassDocumentValidation);
     auto serverPayload = std::make_shared<std::vector<EDCServerPayloadInfo>>(
         EDCServerCollection::getEncryptedFieldInfo(document));
 
@@ -221,8 +229,8 @@ std::pair<FLEBatchResult, write_ops::InsertCommandReply> processInsert(
 
     auto swResult = trun->runNoThrow(
         opCtx,
-        [sharedInsertBlock, reply, ownedDocument](const txn_api::TransactionClient& txnClient,
-                                                  ExecutorPtr txnExec) {
+        [sharedInsertBlock, reply, ownedDocument, bypassDocumentValidation](
+            const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) {
             FLEQueryInterfaceImpl queryImpl(txnClient, getGlobalServiceContext());
 
             auto [edcNss2, efc2, serverPayload2, stmtId2] = *sharedInsertBlock.get();
@@ -232,8 +240,13 @@ std::pair<FLEBatchResult, write_ops::InsertCommandReply> processInsert(
                 fleCrudHangPreInsert.pauseWhileSet();
             }
 
-            *reply = uassertStatusOK(processInsert(
-                &queryImpl, edcNss2, *serverPayload2.get(), efc2, stmtId2, ownedDocument));
+            *reply = uassertStatusOK(processInsert(&queryImpl,
+                                                   edcNss2,
+                                                   *serverPayload2.get(),
+                                                   efc2,
+                                                   stmtId2,
+                                                   ownedDocument,
+                                                   bypassDocumentValidation));
 
             if (MONGO_unlikely(fleCrudHangInsert.shouldFail())) {
                 LOGV2(6371903, "Hanging due to fleCrudHangInsert fail point");
@@ -439,7 +452,8 @@ void processFieldsForInsert(FLEQueryInterface* queryImpl,
                             const NamespaceString& edcNss,
                             std::vector<EDCServerPayloadInfo>& serverPayload,
                             const EncryptedFieldConfig& efc,
-                            int32_t* pStmtId) {
+                            int32_t* pStmtId,
+                            bool bypassDocumentValidation) {
 
     NamespaceString nssEsc(edcNss.db(), efc.getEscCollection().get());
 
@@ -507,7 +521,8 @@ void processFieldsForInsert(FLEQueryInterface* queryImpl,
             ECOCCollection::generateDocument(payload.fieldPathName,
                                              payload.payload.getEncryptedTokens()),
             pStmtId,
-            false));
+            false,
+            bypassDocumentValidation));
         checkWriteErrors(ecocInsertReply);
     }
 }
@@ -717,9 +732,11 @@ StatusWith<write_ops::InsertCommandReply> processInsert(
     std::vector<EDCServerPayloadInfo>& serverPayload,
     const EncryptedFieldConfig& efc,
     int32_t stmtId,
-    BSONObj document) {
+    BSONObj document,
+    bool bypassDocumentValidation) {
 
-    processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+    processFieldsForInsert(
+        queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
 
     auto finalDoc = EDCServerCollection::finalizeForInsert(document, serverPayload);
 
@@ -790,6 +807,9 @@ write_ops::UpdateCommandReply processUpdate(FLEQueryInterface* queryImpl,
     auto tokenMap = EncryptionInformationHelpers::getDeleteTokens(edcNss, ei);
     const auto updateOpEntry = updateRequest.getUpdates()[0];
 
+    auto bypassDocumentValidation =
+        updateRequest.getWriteCommandRequestBase().getBypassDocumentValidation();
+
     const auto updateModification = updateOpEntry.getU();
 
     int32_t stmtId = getStmtIdForWriteAt(updateRequest, 0);
@@ -797,16 +817,26 @@ write_ops::UpdateCommandReply processUpdate(FLEQueryInterface* queryImpl,
     // Step 1 ----
     std::vector<EDCServerPayloadInfo> serverPayload;
     auto newUpdateOpEntry = updateRequest.getUpdates()[0];
-    newUpdateOpEntry.setQ(fle::rewriteEncryptedFilterInsideTxn(
-        queryImpl, updateRequest.getDbName(), efc, expCtx, newUpdateOpEntry.getQ()));
+
+    auto highCardinalityModeAllowed = newUpdateOpEntry.getUpsert()
+        ? fle::HighCardinalityModeAllowed::kDisallow
+        : fle::HighCardinalityModeAllowed::kAllow;
+
+    newUpdateOpEntry.setQ(fle::rewriteEncryptedFilterInsideTxn(queryImpl,
+                                                               updateRequest.getDbName(),
+                                                               efc,
+                                                               expCtx,
+                                                               newUpdateOpEntry.getQ(),
+                                                               highCardinalityModeAllowed));
 
     if (updateModification.type() == write_ops::UpdateModification::Type::kModifier) {
         auto updateModifier = updateModification.getUpdateModifier();
         auto setObject = updateModifier.getObjectField("$set");
-        EDCServerCollection::validateEncryptedFieldInfo(setObject, efc);
+        EDCServerCollection::validateEncryptedFieldInfo(setObject, efc, bypassDocumentValidation);
         serverPayload = EDCServerCollection::getEncryptedFieldInfo(updateModifier);
 
-        processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+        processFieldsForInsert(
+            queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
 
         // Step 2 ----
         auto pushUpdate = EDCServerCollection::finalizeForUpdate(updateModifier, serverPayload);
@@ -815,10 +845,12 @@ write_ops::UpdateCommandReply processUpdate(FLEQueryInterface* queryImpl,
             pushUpdate, write_ops::UpdateModification::ClassicTag(), false));
     } else {
         auto replacementDocument = updateModification.getUpdateReplacement();
-        EDCServerCollection::validateEncryptedFieldInfo(replacementDocument, efc);
+        EDCServerCollection::validateEncryptedFieldInfo(
+            replacementDocument, efc, bypassDocumentValidation);
         serverPayload = EDCServerCollection::getEncryptedFieldInfo(replacementDocument);
 
-        processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+        processFieldsForInsert(
+            queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
 
         // Step 2 ----
         auto safeContentReplace =
@@ -833,6 +865,8 @@ write_ops::UpdateCommandReply processUpdate(FLEQueryInterface* queryImpl,
     newUpdateRequest.setUpdates({newUpdateOpEntry});
     newUpdateRequest.getWriteCommandRequestBase().setStmtIds(boost::none);
     newUpdateRequest.getWriteCommandRequestBase().setStmtId(stmtId);
+    newUpdateRequest.getWriteCommandRequestBase().setBypassDocumentValidation(
+        bypassDocumentValidation);
     ++stmtId;
 
     auto [updateReply, originalDocument] =
@@ -890,6 +924,10 @@ FLEBatchResult processFLEBatch(OperationContext* opCtx,
                                BatchedCommandResponse* response,
                                boost::optional<OID> targetEpoch) {
 
+    if (request.getWriteCommandRequestBase().getEncryptionInformation()->getCrudProcessed()) {
+        return FLEBatchResult::kNotProcessed;
+    }
+
     // TODO (SERVER-65077): Remove FCV check once 6.0 is released
     uassert(6371209,
             "Queryable Encryption is only supported when FCV supports 6.0",
@@ -904,7 +942,7 @@ FLEBatchResult processFLEBatch(OperationContext* opCtx,
             return FLEBatchResult::kNotProcessed;
         }
 
-        replyToResponse(&insertReply.getWriteCommandReplyBase(), response);
+        replyToResponse(opCtx, &insertReply.getWriteCommandReplyBase(), response);
 
         return FLEBatchResult::kProcessed;
     } else if (request.getBatchType() == BatchedCommandRequest::BatchType_Delete) {
@@ -913,7 +951,7 @@ FLEBatchResult processFLEBatch(OperationContext* opCtx,
 
         auto deleteReply = processDelete(opCtx, deleteRequest, &getTransactionWithRetriesForMongoS);
 
-        replyToResponse(&deleteReply.getWriteCommandReplyBase(), response);
+        replyToResponse(opCtx, &deleteReply.getWriteCommandReplyBase(), response);
         return FLEBatchResult::kProcessed;
 
     } else if (request.getBatchType() == BatchedCommandRequest::BatchType_Update) {
@@ -922,7 +960,7 @@ FLEBatchResult processFLEBatch(OperationContext* opCtx,
 
         auto updateReply = processUpdate(opCtx, updateRequest, &getTransactionWithRetriesForMongoS);
 
-        replyToResponse(&updateReply.getWriteCommandReplyBase(), response);
+        replyToResponse(opCtx, &updateReply.getWriteCommandReplyBase(), response);
 
         response->setNModified(updateReply.getNModified());
 
@@ -968,19 +1006,25 @@ std::unique_ptr<BatchedCommandRequest> processFLEBatchExplain(
                                            request.getNS(),
                                            deleteRequest.getEncryptionInformation().get(),
                                            newDeleteOp.getQ(),
-                                           &getTransactionWithRetriesForMongoS));
+                                           &getTransactionWithRetriesForMongoS,
+                                           fle::HighCardinalityModeAllowed::kAllow));
         deleteRequest.setDeletes({newDeleteOp});
         deleteRequest.getWriteCommandRequestBase().setEncryptionInformation(boost::none);
         return std::make_unique<BatchedCommandRequest>(deleteRequest);
     } else if (request.getBatchType() == BatchedCommandRequest::BatchType_Update) {
         auto updateRequest = request.getUpdateRequest();
         auto newUpdateOp = updateRequest.getUpdates()[0];
+        auto highCardinalityModeAllowed = newUpdateOp.getUpsert()
+            ? fle::HighCardinalityModeAllowed::kDisallow
+            : fle::HighCardinalityModeAllowed::kAllow;
+
         newUpdateOp.setQ(fle::rewriteQuery(opCtx,
                                            getExpCtx(newUpdateOp),
                                            request.getNS(),
                                            updateRequest.getEncryptionInformation().get(),
                                            newUpdateOp.getQ(),
-                                           &getTransactionWithRetriesForMongoS));
+                                           &getTransactionWithRetriesForMongoS,
+                                           highCardinalityModeAllowed));
         updateRequest.setUpdates({newUpdateOp});
         updateRequest.getWriteCommandRequestBase().setEncryptionInformation(boost::none);
         return std::make_unique<BatchedCommandRequest>(updateRequest);
@@ -1003,10 +1047,22 @@ write_ops::FindAndModifyCommandReply processFindAndModify(
 
     auto newFindAndModifyRequest = findAndModifyRequest;
 
+    const auto bypassDocumentValidation =
+        findAndModifyRequest.getBypassDocumentValidation().value_or(false);
+
     // Step 0 ----
     // Rewrite filter
-    newFindAndModifyRequest.setQuery(fle::rewriteEncryptedFilterInsideTxn(
-        queryImpl, edcNss.db(), efc, expCtx, findAndModifyRequest.getQuery()));
+    auto highCardinalityModeAllowed = findAndModifyRequest.getUpsert().value_or(false)
+        ? fle::HighCardinalityModeAllowed::kDisallow
+        : fle::HighCardinalityModeAllowed::kAllow;
+
+    newFindAndModifyRequest.setQuery(
+        fle::rewriteEncryptedFilterInsideTxn(queryImpl,
+                                             edcNss.db(),
+                                             efc,
+                                             expCtx,
+                                             findAndModifyRequest.getQuery(),
+                                             highCardinalityModeAllowed));
 
     // Make sure not to inherit the command's writeConcern, this should be set at the transaction
     // level.
@@ -1023,9 +1079,11 @@ write_ops::FindAndModifyCommandReply processFindAndModify(
         if (updateModification.type() == write_ops::UpdateModification::Type::kModifier) {
             auto updateModifier = updateModification.getUpdateModifier();
             auto setObject = updateModifier.getObjectField("$set");
-            EDCServerCollection::validateEncryptedFieldInfo(setObject, efc);
+            EDCServerCollection::validateEncryptedFieldInfo(
+                setObject, efc, bypassDocumentValidation);
             serverPayload = EDCServerCollection::getEncryptedFieldInfo(updateModifier);
-            processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+            processFieldsForInsert(
+                queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
 
             auto pushUpdate = EDCServerCollection::finalizeForUpdate(updateModifier, serverPayload);
 
@@ -1034,10 +1092,12 @@ write_ops::FindAndModifyCommandReply processFindAndModify(
                 pushUpdate, write_ops::UpdateModification::ClassicTag(), false);
         } else {
             auto replacementDocument = updateModification.getUpdateReplacement();
-            EDCServerCollection::validateEncryptedFieldInfo(replacementDocument, efc);
+            EDCServerCollection::validateEncryptedFieldInfo(
+                replacementDocument, efc, bypassDocumentValidation);
             serverPayload = EDCServerCollection::getEncryptedFieldInfo(replacementDocument);
 
-            processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+            processFieldsForInsert(
+                queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
 
             // Step 2 ----
             auto safeContentReplace =
@@ -1129,8 +1189,17 @@ write_ops::FindAndModifyCommandRequest processFindAndModifyExplain(
     auto efc = EncryptionInformationHelpers::getAndValidateSchema(edcNss, ei);
 
     auto newFindAndModifyRequest = findAndModifyRequest;
-    newFindAndModifyRequest.setQuery(fle::rewriteEncryptedFilterInsideTxn(
-        queryImpl, edcNss.db(), efc, expCtx, findAndModifyRequest.getQuery()));
+    auto highCardinalityModeAllowed = findAndModifyRequest.getUpsert().value_or(false)
+        ? fle::HighCardinalityModeAllowed::kDisallow
+        : fle::HighCardinalityModeAllowed::kAllow;
+
+    newFindAndModifyRequest.setQuery(
+        fle::rewriteEncryptedFilterInsideTxn(queryImpl,
+                                             edcNss.db(),
+                                             efc,
+                                             expCtx,
+                                             findAndModifyRequest.getQuery(),
+                                             highCardinalityModeAllowed));
 
     newFindAndModifyRequest.setEncryptionInformation(boost::none);
     return newFindAndModifyRequest;
@@ -1232,10 +1301,23 @@ uint64_t FLEQueryInterfaceImpl::countDocuments(const NamespaceString& nss) {
 }
 
 StatusWith<write_ops::InsertCommandReply> FLEQueryInterfaceImpl::insertDocument(
-    const NamespaceString& nss, BSONObj obj, StmtId* pStmtId, bool translateDuplicateKey) {
+    const NamespaceString& nss,
+    BSONObj obj,
+    StmtId* pStmtId,
+    bool translateDuplicateKey,
+    bool bypassDocumentValidation) {
     write_ops::InsertCommandRequest insertRequest(nss);
     insertRequest.setDocuments({obj});
 
+    EncryptionInformation encryptionInformation;
+    encryptionInformation.setCrudProcessed(true);
+
+    // We need to set an empty BSON object here for the schema.
+    encryptionInformation.setSchema(BSONObj());
+    insertRequest.getWriteCommandRequestBase().setEncryptionInformation(encryptionInformation);
+    insertRequest.getWriteCommandRequestBase().setBypassDocumentValidation(
+        bypassDocumentValidation);
+
     int32_t stmtId = *pStmtId;
     if (stmtId != kUninitializedStmtId) {
         (*pStmtId)++;
@@ -1320,6 +1402,7 @@ std::pair<write_ops::UpdateCommandReply, BSONObj> FLEQueryInterfaceImpl::updateW
     findAndModifyRequest.setLet(
         mergeLetAndCVariables(updateRequest.getLet(), updateOpEntry.getC()));
     findAndModifyRequest.setStmtId(updateRequest.getStmtId());
+    findAndModifyRequest.setBypassDocumentValidation(updateRequest.getBypassDocumentValidation());
 
     auto ei2 = ei;
     ei2.setCrudProcessed(true);
@@ -1361,9 +1444,15 @@ std::pair<write_ops::UpdateCommandReply, BSONObj> FLEQueryInterfaceImpl::updateW
 }
 
 write_ops::UpdateCommandReply FLEQueryInterfaceImpl::update(
-    const NamespaceString& nss,
-    int32_t stmtId,
-    const write_ops::UpdateCommandRequest& updateRequest) {
+    const NamespaceString& nss, int32_t stmtId, write_ops::UpdateCommandRequest& updateRequest) {
+
+    invariant(!updateRequest.getWriteCommandRequestBase().getEncryptionInformation());
+
+    EncryptionInformation encryptionInformation;
+    encryptionInformation.setCrudProcessed(true);
+
+    encryptionInformation.setSchema(BSONObj());
+    updateRequest.getWriteCommandRequestBase().setEncryptionInformation(encryptionInformation);
 
     dassert(updateRequest.getStmtIds().value_or(std::vector<int32_t>()).empty());
 
@@ -1401,7 +1490,6 @@ std::vector<BSONObj> FLEQueryInterfaceImpl::findDocuments(const NamespaceString&
                                                           BSONObj filter) {
     FindCommandRequest find(nss);
     find.setFilter(filter);
-    find.setSingleBatch(true);
 
     // Throws on error
     return _txnClient.exhaustiveFind(find).get();
diff --git a/src/mongo/db/fle_crud.h b/src/mongo/db/fle_crud.h
index 738e85b8996..7c8d93ae1f9 100644
--- a/src/mongo/db/fle_crud.h
+++ b/src/mongo/db/fle_crud.h
@@ -261,7 +261,11 @@ public:
      * FLEStateCollectionContention instead.
      */
     virtual StatusWith<write_ops::InsertCommandReply> insertDocument(
-        const NamespaceString& nss, BSONObj obj, StmtId* pStmtId, bool translateDuplicateKey) = 0;
+        const NamespaceString& nss,
+        BSONObj obj,
+        StmtId* pStmtId,
+        bool translateDuplicateKey,
+        bool bypassDocumentValidation = false) = 0;
 
     /**
      * Delete a single document with the given query.
@@ -294,7 +298,7 @@ public:
     virtual write_ops::UpdateCommandReply update(
         const NamespaceString& nss,
         int32_t stmtId,
-        const write_ops::UpdateCommandRequest& updateRequest) = 0;
+        write_ops::UpdateCommandRequest& updateRequest) = 0;
 
     /**
      * Do a single findAndModify request.
@@ -325,10 +329,12 @@ public:
 
     uint64_t countDocuments(const NamespaceString& nss) final;
 
-    StatusWith<write_ops::InsertCommandReply> insertDocument(const NamespaceString& nss,
-                                                             BSONObj obj,
-                                                             int32_t* pStmtId,
-                                                             bool translateDuplicateKey) final;
+    StatusWith<write_ops::InsertCommandReply> insertDocument(
+        const NamespaceString& nss,
+        BSONObj obj,
+        int32_t* pStmtId,
+        bool translateDuplicateKey,
+        bool bypassDocumentValidation = false) final;
 
     std::pair<write_ops::DeleteCommandReply, BSONObj> deleteWithPreimage(
         const NamespaceString& nss,
@@ -340,10 +346,9 @@ public:
         const EncryptionInformation& ei,
         const write_ops::UpdateCommandRequest& updateRequest) final;
 
-    write_ops::UpdateCommandReply update(
-        const NamespaceString& nss,
-        int32_t stmtId,
-        const write_ops::UpdateCommandRequest& updateRequest) final;
+    write_ops::UpdateCommandReply update(const NamespaceString& nss,
+                                         int32_t stmtId,
+                                         write_ops::UpdateCommandRequest& updateRequest) final;
 
     write_ops::FindAndModifyCommandReply findAndModify(
         const NamespaceString& nss,
@@ -408,7 +413,8 @@ StatusWith<write_ops::InsertCommandReply> processInsert(
     std::vector<EDCServerPayloadInfo>& serverPayload,
     const EncryptedFieldConfig& efc,
     int32_t stmtId,
-    BSONObj document);
+    BSONObj document,
+    bool bypassDocumentValidation = false);
 
 /**
  * Process a FLE delete with the query interface
diff --git a/src/mongo/db/fle_crud_mongod.cpp b/src/mongo/db/fle_crud_mongod.cpp
index 68327133c88..1e488f1f65a 100644
--- a/src/mongo/db/fle_crud_mongod.cpp
+++ b/src/mongo/db/fle_crud_mongod.cpp
@@ -284,7 +284,13 @@ BSONObj processFLEWriteExplainD(OperationContext* opCtx,
                                 const BSONObj& query) {
     auto expCtx = make_intrusive<ExpressionContext>(
         opCtx, fle::collatorFromBSON(opCtx, collation), nss, runtimeConstants, letParameters);
-    return fle::rewriteQuery(opCtx, expCtx, nss, info, query, &getTransactionWithRetriesForMongoD);
+    return fle::rewriteQuery(opCtx,
+                             expCtx,
+                             nss,
+                             info,
+                             query,
+                             &getTransactionWithRetriesForMongoD,
+                             fle::HighCardinalityModeAllowed::kAllow);
 }
 
 std::pair<write_ops::FindAndModifyCommandRequest, OpMsgRequest>
diff --git a/src/mongo/db/fle_crud_test.cpp b/src/mongo/db/fle_crud_test.cpp
index 527dd5bca11..0a5d7dfc37c 100644
--- a/src/mongo/db/fle_crud_test.cpp
+++ b/src/mongo/db/fle_crud_test.cpp
@@ -27,6 +27,7 @@
  *    it in the license file.
  */
 
+#include "mongo/base/error_codes.h"
 #include "mongo/platform/basic.h"
 
 #include <algorithm>
@@ -153,8 +154,12 @@ protected:
 
     void assertDocumentCounts(uint64_t edc, uint64_t esc, uint64_t ecc, uint64_t ecoc);
 
-    void doSingleInsert(int id, BSONElement element);
-    void doSingleInsert(int id, BSONObj obj);
+    void testValidateEncryptedFieldInfo(BSONObj obj, bool bypassValidation);
+
+    void testValidateTags(BSONObj obj);
+
+    void doSingleInsert(int id, BSONElement element, bool bypassDocumentValidation = false);
+    void doSingleInsert(int id, BSONObj obj, bool bypassDocumentValidation = false);
 
     void doSingleInsertWithContention(
         int id, BSONElement element, int64_t cm, uint64_t cf, EncryptedFieldConfig efc);
@@ -406,7 +411,7 @@ void FleCrudTest::doSingleWideInsert(int id, uint64_t fieldCount, ValueGenerator
 
     auto efc = getTestEncryptedFieldConfig();
 
-    uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result));
+    uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result, false));
 }
 
 
@@ -451,7 +456,16 @@ std::vector<char> generateSinglePlaceholder(BSONElement value, int64_t cm = 0) {
     return v;
 }
 
-void FleCrudTest::doSingleInsert(int id, BSONElement element) {
+void FleCrudTest::testValidateEncryptedFieldInfo(BSONObj obj, bool bypassValidation) {
+    auto efc = getTestEncryptedFieldConfig();
+    EDCServerCollection::validateEncryptedFieldInfo(obj, efc, bypassValidation);
+}
+
+void FleCrudTest::testValidateTags(BSONObj obj) {
+    FLEClientCrypto::validateTagsArray(obj);
+}
+
+void FleCrudTest::doSingleInsert(int id, BSONElement element, bool bypassDocumentValidation) {
     auto buf = generateSinglePlaceholder(element);
     BSONObjBuilder builder;
     builder.append("_id", id);
@@ -467,10 +481,10 @@ void FleCrudTest::doSingleInsert(int id, BSONElement element) {
 
     auto efc = getTestEncryptedFieldConfig();
 
-    uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result));
+    uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result, false));
 }
 
-void FleCrudTest::doSingleInsert(int id, BSONObj obj) {
+void FleCrudTest::doSingleInsert(int id, BSONObj obj, bool bypassDocumentValidation) {
     doSingleInsert(id, obj.firstElement());
 }
 
@@ -490,7 +504,7 @@ void FleCrudTest::doSingleInsertWithContention(
 
     auto serverPayload = EDCServerCollection::getEncryptedFieldInfo(result);
 
-    uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result));
+    uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result, false));
 }
 
 void FleCrudTest::doSingleInsertWithContention(
@@ -890,7 +904,6 @@ TEST_F(FleCrudTest, UpdateOneSameValue) {
                                 << "secret"));
 }
 
-
 // Update one document with replacement
 TEST_F(FleCrudTest, UpdateOneReplace) {
 
@@ -956,7 +969,16 @@ TEST_F(FleCrudTest, SetSafeContent) {
     builder.append("$set", BSON(kSafeContent << "foo"));
     auto result = builder.obj();
 
-    ASSERT_THROWS_CODE(doSingleUpdateWithUpdateDoc(1, result), DBException, 6371507);
+    ASSERT_THROWS_CODE(doSingleUpdateWithUpdateDoc(1, result), DBException, 6666200);
+}
+
+// Test that EDCServerCollection::validateEncryptedFieldInfo checks that the
+// safeContent cannot be present in the BSON obj.
+TEST_F(FleCrudTest, testValidateEncryptedFieldConfig) {
+    testValidateEncryptedFieldInfo(BSON(kSafeContent << "secret"), true);
+    ASSERT_THROWS_CODE(testValidateEncryptedFieldInfo(BSON(kSafeContent << "secret"), false),
+                       DBException,
+                       6666200);
 }
 
 // Update one document via findAndModify
@@ -1038,6 +1060,11 @@ TEST_F(FleCrudTest, FindAndModify_RenameSafeContent) {
     ASSERT_THROWS_CODE(doFindAndModify(req), DBException, 6371506);
 }
 
+TEST_F(FleCrudTest, validateTagsTest) {
+    testValidateTags(BSON(kSafeContent << BSON_ARRAY(123)));
+    ASSERT_THROWS_CODE(testValidateTags(BSON(kSafeContent << "foo")), DBException, 6371507);
+}
+
 // Mess with __safeContent__ and ensure the update errors
 TEST_F(FleCrudTest, FindAndModify_SetSafeContent) {
     doSingleInsert(1,
@@ -1056,8 +1083,7 @@ TEST_F(FleCrudTest, FindAndModify_SetSafeContent) {
     req.setUpdate(
         write_ops::UpdateModification(result, write_ops::UpdateModification::ClassicTag{}, false));
 
-
-    ASSERT_THROWS_CODE(doFindAndModify(req), DBException, 6371507);
+    ASSERT_THROWS_CODE(doFindAndModify(req), DBException, 6666200);
 }
 
 TEST_F(FleTagsTest, InsertOne) {
@@ -1199,7 +1225,7 @@ TEST_F(FleTagsTest, MemoryLimit) {
     doSingleInsert(10, doc);
 
     // readTags returns 11 tags which does exceed memory limit.
-    ASSERT_THROWS_CODE(readTags(doc), DBException, 6401800);
+    ASSERT_THROWS_CODE(readTags(doc), DBException, ErrorCodes::FLEMaxTagLimitExceeded);
 
     doSingleDelete(5);
 
diff --git a/src/mongo/db/fle_query_interface_mock.cpp b/src/mongo/db/fle_query_interface_mock.cpp
index 2aeb39788dd..b5ca4e1e9cd 100644
--- a/src/mongo/db/fle_query_interface_mock.cpp
+++ b/src/mongo/db/fle_query_interface_mock.cpp
@@ -54,7 +54,11 @@ uint64_t FLEQueryInterfaceMock::countDocuments(const NamespaceString& nss) {
 }
 
 StatusWith<write_ops::InsertCommandReply> FLEQueryInterfaceMock::insertDocument(
-    const NamespaceString& nss, BSONObj obj, StmtId* pStmtId, bool translateDuplicateKey) {
+    const NamespaceString& nss,
+    BSONObj obj,
+    StmtId* pStmtId,
+    bool translateDuplicateKey,
+    bool bypassDocumentValidation) {
     repl::TimestampedBSONObj tb;
     tb.obj = obj;
 
@@ -132,9 +136,7 @@ std::pair<write_ops::UpdateCommandReply, BSONObj> FLEQueryInterfaceMock::updateW
 }
 
 write_ops::UpdateCommandReply FLEQueryInterfaceMock::update(
-    const NamespaceString& nss,
-    int32_t stmtId,
-    const write_ops::UpdateCommandRequest& updateRequest) {
+    const NamespaceString& nss, int32_t stmtId, write_ops::UpdateCommandRequest& updateRequest) {
     auto [reply, _] = updateWithPreimage(nss, EncryptionInformation(), updateRequest);
     return reply;
 }
diff --git a/src/mongo/db/fle_query_interface_mock.h b/src/mongo/db/fle_query_interface_mock.h
index 229d2c08dfe..a89fc71ce1e 100644
--- a/src/mongo/db/fle_query_interface_mock.h
+++ b/src/mongo/db/fle_query_interface_mock.h
@@ -47,10 +47,12 @@ public:
 
     uint64_t countDocuments(const NamespaceString& nss) final;
 
-    StatusWith<write_ops::InsertCommandReply> insertDocument(const NamespaceString& nss,
-                                                             BSONObj obj,
-                                                             StmtId* pStmtId,
-                                                             bool translateDuplicateKey) final;
+    StatusWith<write_ops::InsertCommandReply> insertDocument(
+        const NamespaceString& nss,
+        BSONObj obj,
+        StmtId* pStmtId,
+        bool translateDuplicateKey,
+        bool bypassDocumentValidation = false) final;
 
     std::pair<write_ops::DeleteCommandReply, BSONObj> deleteWithPreimage(
         const NamespaceString& nss,
@@ -62,10 +64,9 @@ public:
         const EncryptionInformation& ei,
         const write_ops::UpdateCommandRequest& updateRequest) final;
 
-    write_ops::UpdateCommandReply update(
-        const NamespaceString& nss,
-        int32_t stmtId,
-        const write_ops::UpdateCommandRequest& updateRequest) final;
+    write_ops::UpdateCommandReply update(const NamespaceString& nss,
+                                         int32_t stmtId,
+                                         write_ops::UpdateCommandRequest& updateRequest) final;
 
     write_ops::FindAndModifyCommandReply findAndModify(
         const NamespaceString& nss,
diff --git a/src/mongo/db/geo/geoparser.cpp b/src/mongo/db/geo/geoparser.cpp
index 57e2fbee611..893d7832b18 100644
--- a/src/mongo/db/geo/geoparser.cpp
+++ b/src/mongo/db/geo/geoparser.cpp
@@ -52,16 +52,21 @@ namespace mongo {
 namespace dps = ::mongo::dotted_path_support;
 
 static Status parseFlatPoint(const BSONElement& elem, Point* out, bool allowAddlFields = false) {
-    if (!elem.isABSONObj())
-        return BAD_VALUE("Point must be an array or object");
+    if (!elem.isABSONObj()) {
+        return BAD_VALUE("Point must be an array or object, instead got type "
+                         << typeName(elem.type()));
+    }
+
     BSONObjIterator it(elem.Obj());
     BSONElement x = it.next();
     if (!x.isNumber()) {
-        return BAD_VALUE("Point must only contain numeric elements");
+        return BAD_VALUE("Point must only contain numeric elements, instead got type "
+                         << typeName(x.type()));
     }
     BSONElement y = it.next();
     if (!y.isNumber()) {
-        return BAD_VALUE("Point must only contain numeric elements");
+        return BAD_VALUE("Point must only contain numeric elements, instead got type "
+                         << typeName(y.type()));
     }
     if (!allowAddlFields && it.more()) {
         return BAD_VALUE("Point must only contain two numeric elements");
@@ -86,7 +91,7 @@ static Status coordToPoint(double lng, double lat, S2Point* out) {
     // We don't rely on drem to clean up non-sane points.  We just don't let them become
     // spherical.
     if (!isValidLngLat(lng, lat))
-        return BAD_VALUE("longitude/latitude is out of bounds, lng: " << lng << " lat: " << lat);
+        return BAD_VALUE("Longitude/latitude is out of bounds, lng: " << lng << " lat: " << lat);
     // Note that it's (lat, lng) for S2 but (lng, lat) for MongoDB.
     S2LatLng ll = S2LatLng::FromDegrees(lat, lng).Normalized();
     // This shouldn't happen since we should only have valid lng/lats.
@@ -101,7 +106,8 @@ static Status coordToPoint(double lng, double lat, S2Point* out) {
 
 static Status parseGeoJSONCoordinate(const BSONElement& elem, S2Point* out) {
     if (Array != elem.type()) {
-        return BAD_VALUE("GeoJSON coordinates must be an array");
+        return BAD_VALUE("GeoJSON coordinates must be an array, instead got type "
+                         << typeName(elem.type()));
     }
     Point p;
     // GeoJSON allows extra elements, e.g. altitude.
@@ -116,7 +122,8 @@ static Status parseGeoJSONCoordinate(const BSONElement& elem, S2Point* out) {
 // "coordinates": [ [100.0, 0.0], [101.0, 1.0] ]
 static Status parseArrayOfCoordinates(const BSONElement& elem, vector<S2Point>* out) {
     if (Array != elem.type()) {
-        return BAD_VALUE("GeoJSON coordinates must be an array of coordinates");
+        return BAD_VALUE("GeoJSON coordinates must be an array of coordinates, instead got type "
+                         << typeName(elem.type()));
     }
     BSONObjIterator it(elem.Obj());
     // Iterate all coordinates in array
@@ -146,7 +153,8 @@ static Status isLoopClosed(const vector<S2Point>& loop, const BSONElement loopEl
     }
 
     if (loop[0] != loop[loop.size() - 1]) {
-        return BAD_VALUE("Loop is not closed: " << loopElt.toString(false));
+        return BAD_VALUE("Loop is not closed, first vertex does not equal last vertex: "
+                         << loopElt.toString(false));
     }
 
     return Status::OK();
@@ -156,7 +164,8 @@ static Status parseGeoJSONPolygonCoordinates(const BSONElement& elem,
                                              bool skipValidation,
                                              S2Polygon* out) {
     if (Array != elem.type()) {
-        return BAD_VALUE("Polygon coordinates must be an array");
+        return BAD_VALUE("Polygon coordinates must be an array, instead got type "
+                         << typeName(elem.type()));
     }
 
     std::vector<std::unique_ptr<S2Loop>> loops;
@@ -184,8 +193,9 @@ static Status parseGeoJSONPolygonCoordinates(const BSONElement& elem,
 
         // At least 3 vertices.
         if (points.size() < 3) {
-            return BAD_VALUE(
-                "Loop must have at least 3 different vertices: " << coordinateElt.toString(false));
+            return BAD_VALUE("Loop must have at least 3 different vertices, "
+                             << points.size() << " unique vertices were provided: "
+                             << coordinateElt.toString(false));
         }
 
         loops.push_back(std::make_unique<S2Loop>(points));
@@ -266,15 +276,17 @@ static Status parseGeoJSONPolygonCoordinates(const BSONElement& elem,
 }
 
 static Status parseBigSimplePolygonCoordinates(const BSONElement& elem, BigSimplePolygon* out) {
-    if (Array != elem.type())
-        return BAD_VALUE("Coordinates of polygon must be an array");
+    if (Array != elem.type()) {
+        return BAD_VALUE("Coordinates of polygon must be an array, instead got type "
+                         << typeName(elem.type()));
+    }
 
 
     const vector<BSONElement>& coordinates = elem.Array();
     // Only one loop is allowed in a BigSimplePolygon
     if (coordinates.size() != 1) {
-        return BAD_VALUE(
-            "Only one simple loop is allowed in a big polygon: " << elem.toString(false));
+        return BAD_VALUE("Only one simple loop is allowed in a big polygon, instead provided "
+                         << coordinates.size() << " loops: " << elem.toString(false));
     }
 
     vector<S2Point> exteriorVertices;
@@ -297,7 +309,9 @@ static Status parseBigSimplePolygonCoordinates(const BSONElement& elem, BigSimpl
 
     // At least 3 vertices.
     if (exteriorVertices.size() < 3) {
-        return BAD_VALUE("Loop must have at least 3 different vertices: " << elem.toString(false));
+        return BAD_VALUE("Loop must have at least 3 different vertices, "
+                         << exteriorVertices.size()
+                         << " unique vertices were provided: " << elem.toString(false));
     }
 
     std::unique_ptr<S2Loop> loop(new S2Loop(exteriorVertices));
@@ -326,8 +340,10 @@ static Status parseGeoJSONCRS(const BSONObj& obj, CRS* crs, bool allowStrictSphe
         return Status::OK();
     }
 
-    if (!crsElt.isABSONObj())
-        return BAD_VALUE("GeoJSON CRS must be an object");
+    if (!crsElt.isABSONObj()) {
+        return BAD_VALUE("GeoJSON CRS must be an object, instead got type "
+                         << typeName(crsElt.type()));
+    }
     BSONObj crsObj = crsElt.embeddedObject();
 
     // "type": "name"
@@ -336,17 +352,22 @@ static Status parseGeoJSONCRS(const BSONObj& obj, CRS* crs, bool allowStrictSphe
 
     // "properties"
     BSONElement propertiesElt = crsObj["properties"];
-    if (!propertiesElt.isABSONObj())
-        return BAD_VALUE("CRS must have field \"properties\" which is an object");
+    if (!propertiesElt.isABSONObj()) {
+        return BAD_VALUE("CRS must have field \"properties\" which is an object, instead got type "
+                         << typeName(propertiesElt.type()));
+    }
     BSONObj propertiesObj = propertiesElt.embeddedObject();
-    if (String != propertiesObj["name"].type())
-        return BAD_VALUE("In CRS, \"properties.name\" must be a string");
+    if (String != propertiesObj["name"].type()) {
+        return BAD_VALUE("In CRS, \"properties.name\" must be a string, instead got type "
+                         << typeName(propertiesObj["name"].type()));
+    }
+
     const string& name = propertiesObj["name"].String();
     if (CRS_CRS84 == name || CRS_EPSG_4326 == name) {
         *crs = SPHERE;
     } else if (CRS_STRICT_WINDING == name) {
         if (!allowStrictSphere) {
-            return BAD_VALUE("Strict winding order is only supported by polygon");
+            return BAD_VALUE("Strict winding order CRS is only supported by polygon");
         }
         *crs = STRICT_SPHERE;
     } else {
@@ -369,8 +390,8 @@ static Status parseGeoJSONLineCoordinates(const BSONElement& elem,
     eraseDuplicatePoints(&vertices);
     if (!skipValidation) {
         if (vertices.size() < 2)
-            return BAD_VALUE(
-                "GeoJSON LineString must have at least 2 vertices: " << elem.toString(false));
+            return BAD_VALUE("GeoJSON LineString must have at least 2 vertices, instead got "
+                             << vertices.size() << " vertices: " << elem.toString(false));
 
         string err;
         if (!S2Polyline::IsValid(vertices, &err))
@@ -384,9 +405,10 @@ static Status parseGeoJSONLineCoordinates(const BSONElement& elem,
 // Parse legacy point or GeoJSON point, used by geo near.
 // Only stored legacy points allow additional fields.
 Status parsePoint(const BSONElement& elem, PointWithCRS* out, bool allowAddlFields) {
-    if (!elem.isABSONObj())
-        return BAD_VALUE("Point must be an array or object");
-
+    if (!elem.isABSONObj()) {
+        return BAD_VALUE("Point must be an array or object, instead got type "
+                         << typeName(elem.type()));
+    }
     BSONObj obj = elem.Obj();
     // location: [1, 2] or location: {x: 1, y:2}
     if (Array == elem.type() || obj.firstElement().isNumber()) {
@@ -439,7 +461,8 @@ Status GeoParser::parseLegacyPolygon(const BSONObj& obj, PolygonWithCRS* out) {
         points.push_back(p);
     }
     if (points.size() < 3)
-        return BAD_VALUE("Polygon must have at least 3 points");
+        return BAD_VALUE("Polygon must have at least 3 points, instead got " << points.size()
+                                                                             << " vertices");
     out->oldPolygon.init(points);
     out->crs = FLAT;
     return Status::OK();
@@ -461,7 +484,7 @@ Status GeoParser::parseGeoJSONPoint(const BSONObj& obj, PointWithCRS* out) {
     // Projection
     out->crs = FLAT;
     if (!ShapeProjection::supportsProject(*out, SPHERE))
-        return BAD_VALUE("longitude/latitude is out of bounds, lng: " << out->oldPoint.x << " lat: "
+        return BAD_VALUE("Longitude/latitude is out of bounds, lng: " << out->oldPoint.x << " lat: "
                                                                       << out->oldPoint.y);
     ShapeProjection::projectInto(out, SPHERE);
     return Status::OK();
@@ -534,8 +557,11 @@ Status GeoParser::parseMultiLine(const BSONObj& obj, bool skipValidation, MultiL
         return status;
 
     BSONElement coordElt = dps::extractElementAtPath(obj, GEOJSON_COORDINATES);
-    if (Array != coordElt.type())
-        return BAD_VALUE("MultiLineString coordinates must be an array");
+    if (Array != coordElt.type()) {
+        return BAD_VALUE("MultiLineString coordinates must be an array, instead got type "
+                         << typeName(coordElt.type()));
+    }
+
 
     out->lines.clear();
     auto& lines = out->lines;
@@ -564,9 +590,10 @@ Status GeoParser::parseMultiPolygon(const BSONObj& obj,
         return status;
 
     BSONElement coordElt = dps::extractElementAtPath(obj, GEOJSON_COORDINATES);
-    if (Array != coordElt.type())
-        return BAD_VALUE("MultiPolygon coordinates must be an array");
-
+    if (Array != coordElt.type()) {
+        return BAD_VALUE("MultiPolygon coordinates must be an array, instead got type "
+                         << typeName(coordElt.type()));
+    }
     out->polygons.clear();
     auto& polygons = out->polygons;
 
@@ -597,11 +624,11 @@ Status GeoParser::parseLegacyCenter(const BSONObj& obj, CapWithCRS* out) {
     BSONElement radius = objIt.next();
     // radius >= 0 and is not NaN
     if (!radius.isNumber() || !(radius.number() >= 0))
-        return BAD_VALUE("radius must be a non-negative number");
+        return BAD_VALUE("Radius must be a non-negative number: " << radius.toString(false));
 
     // No more
     if (objIt.more())
-        return BAD_VALUE("Only 2 fields allowed for circular region");
+        return BAD_VALUE("Only 2 fields allowed for circular region, but more were provided");
 
     out->circle.radius = radius.number();
     out->crs = FLAT;
@@ -627,13 +654,15 @@ Status GeoParser::parseCenterSphere(const BSONObj& obj, CapWithCRS* out) {
     // Radius
     BSONElement radiusElt = objIt.next();
     // radius >= 0 and is not NaN
-    if (!radiusElt.isNumber() || !(radiusElt.number() >= 0))
-        return BAD_VALUE("radius must be a non-negative number");
+    if (!radiusElt.isNumber() || !(radiusElt.number() >= 0)) {
+        return BAD_VALUE("Radius must be a non-negative number: " << radiusElt.toString(false));
+    }
+
     double radius = radiusElt.number();
 
     // No more elements
     if (objIt.more())
-        return BAD_VALUE("Only 2 fields allowed for circular region");
+        return BAD_VALUE("Only 2 fields allowed for circular region, but more were provided");
 
     out->cap = S2Cap::FromAxisAngle(centerPoint, S1Angle::Radians(radius));
     out->circle.radius = radius;
@@ -656,16 +685,20 @@ Status GeoParser::parseGeometryCollection(const BSONObj& obj,
                                           bool skipValidation,
                                           GeometryCollection* out) {
     BSONElement coordElt = dps::extractElementAtPath(obj, GEOJSON_GEOMETRIES);
-    if (Array != coordElt.type())
-        return BAD_VALUE("GeometryCollection geometries must be an array");
-
+    if (Array != coordElt.type()) {
+        return BAD_VALUE("GeometryCollection geometries must be an array, instead got type "
+                         << typeName(coordElt.type()));
+    }
     const vector<BSONElement>& geometries = coordElt.Array();
     if (0 == geometries.size())
         return BAD_VALUE("GeometryCollection geometries must have at least 1 element");
 
     for (size_t i = 0; i < geometries.size(); ++i) {
         if (Object != geometries[i].type())
-            return BAD_VALUE("Element " << i << " of \"geometries\" is not an object");
+            return BAD_VALUE("Element " << i
+                                        << " of \"geometries\" must be an object, instead got type "
+                                        << typeName(geometries[i].type()) << ": "
+                                        << geometries[i].toString(false));
 
         const BSONObj& geoObj = geometries[i].Obj();
         GeoJSONType type = parseGeoJSONType(geoObj);
diff --git a/src/mongo/db/geo/hash.cpp b/src/mongo/db/geo/hash.cpp
index c2b7009a86d..86f47847b2e 100644
--- a/src/mongo/db/geo/hash.cpp
+++ b/src/mongo/db/geo/hash.cpp
@@ -154,16 +154,37 @@ void GeoHash::initFromString(const char* s) {
             setBit(i, 1);
 }
 
+namespace {
+// Extends a 32 bit value into a 64 bit value interleaved with zeros.
+std::uint64_t interleaveWithZeros(std::uint32_t input) {
+    // The following example is an extension to 32-bits of the following bit manipulation for 16-bit
+    // numbers.
+    //
+    //    0000 0000 0000 0000  abcd efgh ijkl mnop
+    // -> 0000 0000 abcd efgh  0000 0000 ijkl mnop
+    // -> 0000 abcd 0000 efgh  0000 ijkl 0000 mnop
+    // -> 00ab 00cd 00ef 00gh  00ij 00kl 00mn 00op
+    // -> 0a0b 0c0d 0e0f 0g0h  0i0j 0k0l 0m0n 0o0p
+    uint64_t word = input;
+    word = (word ^ (word << 16)) & 0x0000ffff0000ffff;
+    word = (word ^ (word << 8)) & 0x00ff00ff00ff00ff;
+    word = (word ^ (word << 4)) & 0x0f0f0f0f0f0f0f0f;
+    word = (word ^ (word << 2)) & 0x3333333333333333;
+    word = (word ^ (word << 1)) & 0x5555555555555555;
+    return word;
+}
+}  // namespace
+
 GeoHash::GeoHash(unsigned x, unsigned y, unsigned bits) {
     verify(bits <= 32);
-    _hash = 0;
     _bits = bits;
-    for (unsigned i = 0; i < bits; i++) {
-        if (isBitSet(x, i))
-            _hash |= mask64For(i * 2);
-        if (isBitSet(y, i))
-            _hash |= mask64For((i * 2) + 1);
-    }
+    auto interleavedX = interleaveWithZeros(x);
+    auto interleavedY = interleaveWithZeros(y);
+    auto fullHash = (interleavedX << 1) | interleavedY;
+    // bits * 2 number of significant bits set to 1 leaving the rest set at 0.
+    auto bitMask = (std::numeric_limits<std::uint64_t>::max() << (64 - (bits * 2)));
+    fullHash = fullHash & bitMask;
+    _hash = fullHash;
 }
 
 GeoHash::GeoHash(const GeoHash& old) {
diff --git a/src/mongo/db/geo/hash_test.cpp b/src/mongo/db/geo/hash_test.cpp
index d73c983bc06..e9c9ed83cab 100644
--- a/src/mongo/db/geo/hash_test.cpp
+++ b/src/mongo/db/geo/hash_test.cpp
@@ -148,6 +148,74 @@ TEST(GeoHash, UnhashFastMatchesUnhashSlow) {
     }
 }
 
+TEST(GeoHash, HashAndUnhash) {
+    PseudoRandom random(12345);
+    for (int i = 0; i < 1'000; i++) {
+        auto x = random.nextInt32();
+        auto y = random.nextInt32();
+        auto hash = GeoHash(x, y, 32);
+        unsigned int unhashedX, unhashedY;
+        hash.unhash(&unhashedX, &unhashedY);
+        ASSERT_EQ(x, unhashedX);
+        ASSERT_EQ(y, unhashedY);
+    }
+}
+
+TEST(GeoHash, HashCropsBits) {
+    // The following numbers were generated with this code snippet on Linux and hardcoded.
+    // PseudoRandom random(12345);
+    {
+        auto x = -2067174821;
+        auto y = 1127948890;
+        auto bits = 1;
+        auto hash = GeoHash(x, y, bits);
+        ASSERT_EQ(hash.toString(), "10");
+    }
+    {
+        auto x = -847616485;
+        auto y = -2132331508;
+        auto bits = 3;
+        auto hash = GeoHash(x, y, bits);
+        ASSERT_EQ(hash.toString(), "111000");
+    }
+    {
+        auto x = -818733575;
+        auto y = -721367113;
+        auto bits = 6;
+        auto hash = GeoHash(x, y, bits);
+        ASSERT_EQ(hash.toString(), "111100011011");
+    }
+    {
+        auto x = 1272197554;
+        auto y = 1923758992;
+        auto bits = 15;
+        auto hash = GeoHash(x, y, bits);
+        ASSERT_EQ(hash.toString(), "001101011000111011100110011001");
+    }
+    {
+        auto x = -1516163863;
+        auto y = -158391651;
+        auto bits = 23;
+        auto hash = GeoHash(x, y, bits);
+        ASSERT_EQ(hash.toString(), "1101110100110110110010000101011100001100101001");
+    }
+    {
+        auto x = -1665346465;
+        auto y = 1063852771;
+        auto bits = 30;
+        auto hash = GeoHash(x, y, bits);
+        ASSERT_EQ(hash.toString(), "100001111111010110011110111000011010001101100100011101101010");
+    }
+    {
+        auto x = 327397251;
+        auto y = 471329956;
+        auto bits = 32;
+        auto hash = GeoHash(x, y, bits);
+        ASSERT_EQ(hash.toString(),
+                  "0000001101011010100000010001111111011100111110101100010000011010");
+    }
+}
+
 TEST(GeoHashConvertor, EdgeLength) {
     const double kError = 10E-15;
     GeoHashConverter::Parameters params{};
diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp
index 845e65c4b65..0e8fb0d6208 100644
--- a/src/mongo/db/index/expression_keys_private.cpp
+++ b/src/mongo/db/index/expression_keys_private.cpp
@@ -433,8 +433,9 @@ void ExpressionKeysPrivate::validateDocumentCommon(const CollectionPtr& collecti
                                                    const BSONObj& keyPattern) {
     // If we have a timeseries collection, check that indexed metric fields do not have expanded
     // array values
-    if (auto tsOptions = collection->getTimeseriesOptions();
-        tsOptions && feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV()) {
+    if (auto tsOptions = collection->getTimeseriesOptions(); tsOptions &&
+        feature_flags::gTimeseriesMetricIndexes.isEnabled(
+            serverGlobalParams.featureCompatibility)) {
         // Each user metric field will be included twice, as both control.min.<field> and
         // control.max.<field>, so we'll want to keep track that we've checked data.<field> to avoid
         // scanning it twice. The time field can be excluded as it is guaranteed to be a date at
diff --git a/src/mongo/db/index/index_descriptor.h b/src/mongo/db/index/index_descriptor.h
index f4f48cb35b9..115fc8b1d17 100644
--- a/src/mongo/db/index/index_descriptor.h
+++ b/src/mongo/db/index/index_descriptor.h
@@ -89,6 +89,7 @@ public:
     static constexpr StringData kWeightsFieldName = "weights"_sd;
     static constexpr StringData kOriginalSpecFieldName = "originalSpec"_sd;
     static constexpr StringData kPrepareUniqueFieldName = "prepareUnique"_sd;
+    static constexpr StringData kClusteredFieldName = "clustered"_sd;
 
     /**
      * infoObj is a copy of the index-describing BSONObj contained in the catalog.
diff --git a/src/mongo/db/index_build_entry_helpers.cpp b/src/mongo/db/index_build_entry_helpers.cpp
index 5c90ce38388..b51ed281b3a 100644
--- a/src/mongo/db/index_build_entry_helpers.cpp
+++ b/src/mongo/db/index_build_entry_helpers.cpp
@@ -289,13 +289,6 @@ StatusWith<IndexBuildEntry> getIndexBuildEntry(OperationContext* opCtx, UUID ind
     // build entry from the config db collection.
     hangBeforeGettingIndexBuildEntry.pauseWhileSet(Interruptible::notInterruptible());
 
-    if (!collection.getDb()) {
-        str::stream ss;
-        ss << "Cannot read " << NamespaceString::kIndexBuildEntryNamespace.ns()
-           << ". Database not found: " << NamespaceString::kIndexBuildEntryNamespace.db();
-        return Status(ErrorCodes::NamespaceNotFound, ss);
-    }
-
     if (!collection) {
         str::stream ss;
         ss << "Collection not found: " << NamespaceString::kIndexBuildEntryNamespace.ns();
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index d00dcab50d6..5b954baa536 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -90,8 +90,9 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeBuildingIndexSecond);
 MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeWaitingUntilMajorityOpTime);
 MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild);
 
-IndexBuildsCoordinator::ActiveIndexBuildsSSS::ActiveIndexBuildsSSS()
-    : ServerStatusSection("activeIndexBuilds"),
+IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS()
+    : ServerStatusSection("indexBuilds"),
+      registered(0),
       scanCollection(0),
       drainSideWritesTable(0),
       drainSideWritesTablePreCommit(0),
@@ -550,15 +551,15 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
         // 1) Drop all unfinished indexes.
         // 2) Start, but do not complete the index build process.
         WriteUnitOfWork wuow(opCtx);
-        auto indexCatalog = collection.getWritableCollection()->getIndexCatalog();
+        auto indexCatalog = collection.getWritableCollection(opCtx)->getIndexCatalog();
 
 
         for (size_t i = 0; i < indexNames.size(); i++) {
             auto descriptor = indexCatalog->findIndexByName(
                 opCtx, indexNames[i], IndexCatalog::InclusionPolicy::kReady);
             if (descriptor) {
-                Status s =
-                    indexCatalog->dropIndex(opCtx, collection.getWritableCollection(), descriptor);
+                Status s = indexCatalog->dropIndex(
+                    opCtx, collection.getWritableCollection(opCtx), descriptor);
                 if (!s.isOK()) {
                     return s;
                 }
@@ -598,7 +599,7 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
                     IndexCatalog::InclusionPolicy::kFrozen);
             if (descriptor) {
                 Status s = indexCatalog->dropUnfinishedIndex(
-                    opCtx, collection.getWritableCollection(), descriptor);
+                    opCtx, collection.getWritableCollection(opCtx), descriptor);
                 if (!s.isOK()) {
                     return s;
                 }
@@ -609,7 +610,7 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
                 catalog::removeIndex(
                     opCtx,
                     indexNames[i],
-                    collection.getWritableCollection(),
+                    collection.getWritableCollection(opCtx),
                     nullptr /* ident */,
                     // Unfinished or partially dropped indexes do not need two-phase drop b/c the
                     // incomplete index will never be recovered. This is an optimization that will
@@ -621,7 +622,7 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
         // We need to initialize the collection to rebuild the indexes. The collection may already
         // be initialized when rebuilding indexes with rollback-via-refetch.
         if (!collection->isInitialized()) {
-            collection.getWritableCollection()->init(opCtx);
+            collection.getWritableCollection(opCtx)->init(opCtx);
         }
 
         auto dbName = nss.db().toString();
@@ -632,6 +633,7 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
         if (!status.isOK()) {
             return status;
         }
+        indexBuildsSSS.registered.addAndFetch(1);
 
         IndexBuildsManager::SetupOptions options;
         options.protocol = protocol;
@@ -703,7 +705,7 @@ Status IndexBuildsCoordinator::_setUpResumeIndexBuild(OperationContext* opCtx,
 
     if (!collection->isInitialized()) {
         WriteUnitOfWork wuow(opCtx);
-        collection.getWritableCollection()->init(opCtx);
+        collection.getWritableCollection(opCtx)->init(opCtx);
         wuow.commit();
     }
 
@@ -715,6 +717,7 @@ Status IndexBuildsCoordinator::_setUpResumeIndexBuild(OperationContext* opCtx,
     if (!status.isOK()) {
         return status;
     }
+    indexBuildsSSS.registered.addAndFetch(1);
 
     IndexBuildsManager::SetupOptions options;
     options.protocol = protocol;
@@ -1808,11 +1811,12 @@ void IndexBuildsCoordinator::createIndexesOnEmptyCollection(OperationContext* op
 
     auto opObserver = opCtx->getServiceContext()->getOpObserver();
 
-    auto indexCatalog = collection.getWritableCollection()->getIndexCatalog();
+    auto indexCatalog = collection.getWritableCollection(opCtx)->getIndexCatalog();
     // Always run single phase index build for empty collection. And, will be coordinated using
     // createIndexes oplog entry.
     for (const auto& spec : specs) {
-        if (spec.hasField("clustered") && spec.getBoolField("clustered")) {
+        if (spec.hasField(IndexDescriptor::kClusteredFieldName) &&
+            spec.getBoolField(IndexDescriptor::kClusteredFieldName)) {
             // The index is already built implicitly.
             continue;
         }
@@ -1821,7 +1825,7 @@ void IndexBuildsCoordinator::createIndexesOnEmptyCollection(OperationContext* op
         // timestamp.
         opObserver->onCreateIndex(opCtx, nss, collectionUUID, spec, fromMigrate);
         uassertStatusOK(indexCatalog->createIndexOnEmptyCollection(
-            opCtx, collection.getWritableCollection(), spec));
+            opCtx, collection.getWritableCollection(opCtx), spec));
     }
 }
 
@@ -1963,6 +1967,7 @@ IndexBuildsCoordinator::_filterSpecsAndRegisterBuild(OperationContext* opCtx,
     if (!status.isOK()) {
         return status;
     }
+    indexBuildsSSS.registered.addAndFetch(1);
 
     // The index has been registered on the Coordinator in an unstarted state. Return an
     // uninitialized Future so that the caller can set up the index build by calling
@@ -2519,7 +2524,7 @@ void IndexBuildsCoordinator::_scanCollectionAndInsertSortedKeysIntoIndex(
     boost::optional<RecordId> resumeAfterRecordId) {
     // Collection scan and insert into index.
     {
-        const ScopedCounter counter{activeIndexBuildsSSS.scanCollection};
+        indexBuildsSSS.scanCollection.addAndFetch(1);
 
         ScopeGuard scopeGuard([&] {
             opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
@@ -2585,7 +2590,7 @@ CollectionPtr IndexBuildsCoordinator::_setUpForScanCollectionAndInsertSortedKeys
  */
 void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites(
     OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
-    const ScopedCounter counter{activeIndexBuildsSSS.drainSideWritesTable};
+    indexBuildsSSS.drainSideWritesTable.addAndFetch(1);
 
     // Perform the first drain while holding an intent lock.
     const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
@@ -2611,7 +2616,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
     OperationContext* opCtx,
     std::shared_ptr<ReplIndexBuildState> replState,
     const IndexBuildOptions& indexBuildOptions) {
-    const ScopedCounter counter{activeIndexBuildsSSS.drainSideWritesTablePreCommit};
+    indexBuildsSSS.drainSideWritesTablePreCommit.addAndFetch(1);
     const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
     // Perform the second drain while stopping writes on the collection.
     {
@@ -2717,7 +2722,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
                             << ", collection UUID: " << replState->collectionUUID);
 
     {
-        const ScopedCounter counter{activeIndexBuildsSSS.drainSideWritesTableOnCommit};
+        indexBuildsSSS.drainSideWritesTableOnCommit.addAndFetch(1);
         // Perform the third and final drain after releasing a shared lock and reacquiring an
         // exclusive lock on the collection.
         uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
@@ -2759,8 +2764,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
         // can be called for two-phase builds in all replication states except during initial sync
         // when this node is not guaranteed to be consistent.
         {
-            const ScopedCounter counter{
-                activeIndexBuildsSSS.processConstraintsViolatonTableOnCommit};
+            indexBuildsSSS.processConstraintsViolatonTableOnCommit.addAndFetch(1);
             bool twoPhaseAndNotInitialSyncing =
                 IndexBuildProtocol::kTwoPhase == replState->protocol &&
                 !replCoord->getMemberState().startup2();
@@ -2770,7 +2774,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
                     opCtx, collection.get(), replState->buildUUID));
             }
         }
-        const ScopedCounter counter{activeIndexBuildsSSS.commit};
+        indexBuildsSSS.commit.addAndFetch(1);
 
         // If two phase index builds is enabled, index build will be coordinated using
         // startIndexBuild and commitIndexBuild oplog entries.
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index ee617a0742d..ac8193685ac 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -489,9 +489,9 @@ public:
      */
     static int getNumIndexesTotal(OperationContext* opCtx, const CollectionPtr& collection);
 
-    class ActiveIndexBuildsSSS : public ServerStatusSection {
+    class IndexBuildsSSS : public ServerStatusSection {
     public:
-        ActiveIndexBuildsSSS();
+        IndexBuildsSSS();
 
         bool includeByDefault() const final {
             return true;
@@ -504,10 +504,7 @@ public:
             BSONObjBuilder indexBuilds;
             BSONObjBuilder phases;
 
-            indexBuilds.append(
-                "total",
-                static_cast<int>(
-                    IndexBuildsCoordinator::get(opCtx)->activeIndexBuilds.getActiveIndexBuilds()));
+            indexBuilds.append("total", registered.loadRelaxed());
 
             phases.append("scanCollection", scanCollection.loadRelaxed());
             phases.append("drainSideWritesTable", drainSideWritesTable.loadRelaxed());
@@ -525,6 +522,7 @@ public:
             return indexBuilds.obj();
         }
 
+        AtomicWord<int> registered;
         AtomicWord<int> scanCollection;
         AtomicWord<int> drainSideWritesTable;
         AtomicWord<int> drainSideWritesTablePreCommit;
@@ -532,7 +530,7 @@ public:
         AtomicWord<int> drainSideWritesTableOnCommit;
         AtomicWord<int> processConstraintsViolatonTableOnCommit;
         AtomicWord<int> commit;
-    } activeIndexBuildsSSS;
+    } indexBuildsSSS;
 
 private:
     /**
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index fb309720f22..0aa1661bed2 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -709,7 +709,7 @@ void IndexBuildsCoordinatorMongod::_waitForNextIndexBuildActionAndCommit(
                                 << replState->buildUUID);
 
         auto const nextAction = [&] {
-            const ScopedCounter counter{activeIndexBuildsSSS.waitForCommitQuorum};
+            indexBuildsSSS.waitForCommitQuorum.addAndFetch(1);
             // Future wait can be interrupted.
             return _drainSideWritesUntilNextActionIsAvailable(opCtx, replState);
         }();
diff --git a/src/mongo/db/initialize_server_global_state.cpp b/src/mongo/db/initialize_server_global_state.cpp
index e37d6809049..18c2cf45555 100644
--- a/src/mongo/db/initialize_server_global_state.cpp
+++ b/src/mongo/db/initialize_server_global_state.cpp
@@ -64,7 +64,7 @@
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kControl
 
 
-namespace mongo {
+namespace mongo::initialize_server_global_state {
 
 #ifndef _WIN32
 static void croak(StringData prefix, int savedErr = errno) {
@@ -411,7 +411,7 @@ MONGO_INITIALIZER(RegisterShortCircuitExitHandler)(InitializerContext*) {
         uasserted(ErrorCodes::InternalError, "Failed setting short-circuit exit handler.");
 }
 
-bool initializeServerGlobalState(ServiceContext* service, PidFileWrite pidWrite) {
+bool checkSocketPath() {
 #ifndef _WIN32
     if (!serverGlobalParams.noUnixSocket &&
         !boost::filesystem::is_directory(serverGlobalParams.socket)) {
@@ -420,16 +420,14 @@ bool initializeServerGlobalState(ServiceContext* service, PidFileWrite pidWrite)
     }
 #endif
 
-    if (!serverGlobalParams.pidFile.empty() && pidWrite == PidFileWrite::kWrite) {
-        if (!writePidFile(serverGlobalParams.pidFile)) {
-            // error message logged in writePidFile
-            return false;
-        }
-    }
-
     return true;
 }
 
+bool writePidFile() {
+    return serverGlobalParams.pidFile.empty() ? true
+                                              : mongo::writePidFile(serverGlobalParams.pidFile);
+}
+
 #ifndef _WIN32
 namespace {
 // Handling for `honorSystemUmask` and `processUmask` setParameters.
@@ -541,4 +539,4 @@ void ProcessUMaskServerParameter::append(OperationContext*,
 #endif
 }
 
-}  // namespace mongo
+}  // namespace mongo::initialize_server_global_state
diff --git a/src/mongo/db/initialize_server_global_state.h b/src/mongo/db/initialize_server_global_state.h
index d9d6db3e17f..cab4a3f86d1 100644
--- a/src/mongo/db/initialize_server_global_state.h
+++ b/src/mongo/db/initialize_server_global_state.h
@@ -29,28 +29,19 @@
 
 #pragma once
 
-namespace mongo {
+#include "mongo/db/service_context.h"
 
-class ServiceContext;
+namespace mongo::initialize_server_global_state {
 
 /**
- * Enum which controls whether the pid file is written at startup.
+ * Returns whether the specified socket path is a directory.
  */
-enum class PidFileWrite {
-    // Open PID file and write PID to disk
-    kWrite,
-
-    // Do not open or write PID file
-    kNoWrite,
-};
+bool checkSocketPath();
 
 /**
- * Perform initialization activity common across all mongo server types.
- *
- * Set up logging, daemonize the process, configure SSL, etc.
+ * Attempts to write the PID file (if specified) and returns whether it was successful.
  */
-bool initializeServerGlobalState(ServiceContext* service,
-                                 PidFileWrite pidWrite = PidFileWrite::kWrite);
+bool writePidFile();
 
 /**
  * Forks and detaches the server, on platforms that support it, if serverGlobalParams.doFork is
@@ -66,4 +57,4 @@ void forkServerOrDie();
  */
 void signalForkSuccess();
 
-}  // namespace mongo
+}  // namespace mongo::initialize_server_global_state
diff --git a/src/mongo/db/initialize_server_global_state.idl b/src/mongo/db/initialize_server_global_state.idl
index 1e509d9663d..413e81c6bf6 100644
--- a/src/mongo/db/initialize_server_global_state.idl
+++ b/src/mongo/db/initialize_server_global_state.idl
@@ -26,7 +26,7 @@
 # it in the license file.
 
 global:
-    cpp_namespace: mongo
+    cpp_namespace: mongo::initialize_server_global_state
     cpp_includes:
       - mongo/logv2/constants.h
 
diff --git a/src/mongo/db/internal_transactions_feature_flag.idl b/src/mongo/db/internal_transactions_feature_flag.idl
index d0373f56140..bbbb9fa1477 100644
--- a/src/mongo/db/internal_transactions_feature_flag.idl
+++ b/src/mongo/db/internal_transactions_feature_flag.idl
@@ -41,6 +41,11 @@ feature_flags:
         default: true
         version: 6.0
 
+    featureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp:
+        description: Feature flag to enable always creating the config.transactions partial index on step up to primary even if the collection is not empty.
+        cpp_varname: gFeatureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp
+        default: false
+
     featureFlagUpdateDocumentShardKeyUsingTransactionApi:
         description: Feature flag to enable usage of the transaction api for update findAndModify and update commands that change a document's shard key.
         cpp_varname: gFeatureFlagUpdateDocumentShardKeyUsingTransactionApi
diff --git a/src/mongo/db/mongod_main.cpp b/src/mongo/db/mongod_main.cpp
index ccb5ce1a404..531b875918c 100644
--- a/src/mongo/db/mongod_main.cpp
+++ b/src/mongo/db/mongod_main.cpp
@@ -89,7 +89,6 @@
 #include "mongo/db/index_builds_coordinator_mongod.h"
 #include "mongo/db/index_names.h"
 #include "mongo/db/initialize_server_global_state.h"
-#include "mongo/db/initialize_snmp.h"
 #include "mongo/db/internal_transactions_reap_service.h"
 #include "mongo/db/introspect.h"
 #include "mongo/db/json.h"
@@ -543,8 +542,6 @@ ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) {
 
     startMongoDFTDC();
 
-    initializeSNMP();
-
     if (mongodGlobalParams.scriptingEnabled) {
         ScriptEngine::setup();
     }
@@ -835,6 +832,10 @@ ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) {
         }
     }
 
+    if (!initialize_server_global_state::writePidFile()) {
+        quickExit(EXIT_FAILURE);
+    }
+
     // Startup options are written to the audit log at the end of startup so that cluster server
     // parameters are guaranteed to have been initialized from disk at this point.
     audit::logStartupOptions(Client::getCurrent(), serverGlobalParams.parsedOpts);
@@ -842,7 +843,7 @@ ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) {
     serviceContext->notifyStartupComplete();
 
 #ifndef _WIN32
-    mongo::signalForkSuccess();
+    initialize_server_global_state::signalForkSuccess();
 #else
     if (ntservice::shouldStartService()) {
         ntservice::reportStatus(SERVICE_RUNNING);
@@ -894,7 +895,7 @@ ExitCode initService() {
 
 MONGO_INITIALIZER_GENERAL(ForkServer, ("EndStartupOptionHandling"), ("default"))
 (InitializerContext* context) {
-    mongo::forkServerOrDie();
+    initialize_server_global_state::forkServerOrDie();
 }
 
 #ifdef __linux__
@@ -1144,10 +1145,7 @@ void setUpObservers(ServiceContext* serviceContext) {
     opObserverRegistry->addObserver(
         std::make_unique<repl::PrimaryOnlyServiceOpObserver>(serviceContext));
     opObserverRegistry->addObserver(std::make_unique<FcvOpObserver>());
-
-    if (gFeatureFlagClusterWideConfig.isEnabledAndIgnoreFCV()) {
-        opObserverRegistry->addObserver(std::make_unique<ClusterServerParameterOpObserver>());
-    }
+    opObserverRegistry->addObserver(std::make_unique<ClusterServerParameterOpObserver>());
 
     setupFreeMonitoringOpObserver(opObserverRegistry.get());
 
@@ -1542,7 +1540,7 @@ int mongod_main(int argc, char* argv[]) {
     startupConfigActions(std::vector<std::string>(argv, argv + argc));
     cmdline_utils::censorArgvArray(argc, argv);
 
-    if (!initializeServerGlobalState(service))
+    if (!initialize_server_global_state::checkSocketPath())
         quickExit(EXIT_FAILURE);
 
     // There is no single-threaded guarantee beyond this point.
@@ -1550,7 +1548,7 @@ int mongod_main(int argc, char* argv[]) {
     LOGV2(5945603, "Multi threading initialized");
 
     // Per SERVER-7434, startSignalProcessingThread must run after any forks (i.e.
-    // initializeServerGlobalState) and before the creation of any other threads
+    // initialize_server_global_state::forkServerOrDie) and before the creation of any other threads
     startSignalProcessingThread();
 
     ReadWriteConcernDefaults::create(service, readWriteConcernDefaultsCacheLookupMongoD);
diff --git a/src/mongo/db/multitenancy.cpp b/src/mongo/db/multitenancy.cpp
index 27ced8eee24..f12a4f7b55d 100644
--- a/src/mongo/db/multitenancy.cpp
+++ b/src/mongo/db/multitenancy.cpp
@@ -29,61 +29,17 @@
 
 #include "mongo/db/multitenancy.h"
 
-#include "mongo/db/auth/authorization_session.h"
-#include "mongo/db/auth/security_token.h"
-#include "mongo/db/multitenancy_gen.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/tenant_id.h"
-#include "mongo/logv2/log.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
-
 
 namespace mongo {
 
-// Holds the tenantId for the operation if it was provided in the request on the $tenant field only
-// if the tenantId was not also provided in the security token.
-const auto dollarTenantDecoration =
-    OperationContext::declareDecoration<boost::optional<mongo::TenantId>>();
-
-void parseDollarTenantFromRequest(OperationContext* opCtx, const OpMsg& request) {
-    // The internal security user is allowed to run commands on behalf of a tenant by passing
-    // the tenantId in the "$tenant" field.
-    auto tenantElem = request.body["$tenant"];
-    if (!tenantElem)
-        return;
-
-    uassert(ErrorCodes::InvalidOptions,
-            "Multitenancy not enabled, cannot set $tenant in command body",
-            gMultitenancySupport);
-
-    uassert(ErrorCodes::Unauthorized,
-            "'$tenant' may only be specified with the useTenant action type",
-            AuthorizationSession::get(opCtx->getClient())
-                ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(),
-                                                   ActionType::useTenant));
-
-    auto tenantId = TenantId::parseFromBSON(tenantElem);
-
-    uassert(6223901,
-            str::stream() << "Cannot pass $tenant id if also passing securityToken, securityToken: "
-                          << auth::getSecurityToken(opCtx)->getAuthenticatedUser().getTenant()
-                          << " $tenant: " << tenantId,
-            !auth::getSecurityToken(opCtx));
-
-
-    dollarTenantDecoration(opCtx) = std::move(tenantId);
-    LOGV2_DEBUG(
-        6223900, 4, "Setting tenantId from $tenant request parameter", "tenantId"_attr = tenantId);
-}
-
 boost::optional<TenantId> getActiveTenant(OperationContext* opCtx) {
-    auto token = auth::getSecurityToken(opCtx);
-    if (!token) {
-        return dollarTenantDecoration(opCtx);
+    if (auto token = auth::ValidatedTenancyScope::get(opCtx)) {
+        return token->tenantId();
     }
 
-    invariant(!dollarTenantDecoration(opCtx));
-    return token->getAuthenticatedUser().getTenant();
+    return boost::none;
 }
 
 }  // namespace mongo
diff --git a/src/mongo/db/multitenancy.h b/src/mongo/db/multitenancy.h
index f354d225a50..b028286659d 100644
--- a/src/mongo/db/multitenancy.h
+++ b/src/mongo/db/multitenancy.h
@@ -37,13 +37,7 @@
 namespace mongo {
 
 /**
- * Parses the tenantId from the '$tenant' field in the request if it exists and
- * "multitenancySupport" is enabled. Then, sets the parsed tenantId on the opCtx.
- */
-void parseDollarTenantFromRequest(OperationContext* opCtx, const OpMsg& request);
-
-/**
- * Extract the active TenantId for this operation.
+ * Extract the active TenantId for this OperationContext.
  */
 boost::optional<TenantId> getActiveTenant(OperationContext* opCtx);
 
diff --git a/src/mongo/db/namespace_string.cpp b/src/mongo/db/namespace_string.cpp
index 3d74f7a507a..633ab3ce8ce 100644
--- a/src/mongo/db/namespace_string.cpp
+++ b/src/mongo/db/namespace_string.cpp
@@ -95,8 +95,8 @@ const NamespaceString NamespaceString::kTenantMigrationRecipientsNamespace(
 const NamespaceString NamespaceString::kTenantMigrationOplogView(
     NamespaceString::kLocalDb, "system.tenantMigration.oplogView");
 
-const NamespaceString NamespaceString::kTenantSplitDonorsNamespace(NamespaceString::kConfigDb,
-                                                                   "tenantSplitDonors");
+const NamespaceString NamespaceString::kShardSplitDonorsNamespace(NamespaceString::kConfigDb,
+                                                                  "shardSplitDonors");
 
 const NamespaceString NamespaceString::kShardConfigCollectionsNamespace(NamespaceString::kConfigDb,
                                                                         "cache.collections");
@@ -269,6 +269,7 @@ bool NamespaceString::mustBeAppliedInOwnOplogBatch() const {
     return isSystemDotViews() || isServerConfigurationCollection() || isPrivilegeCollection() ||
         _ns == kDonorReshardingOperationsNamespace.ns() ||
         _ns == kForceOplogBatchBoundaryNamespace.ns() ||
+        _ns == kTenantMigrationDonorsNamespace.ns() ||
         _ns == kTenantMigrationRecipientsNamespace.ns() || _ns == kConfigsvrShardsNamespace.ns();
 }
 
@@ -286,6 +287,12 @@ NamespaceString NamespaceString::makeCollectionlessAggregateNSS(const DatabaseNa
     return nss;
 }
 
+NamespaceString NamespaceString::makeChangeCollectionNSS(
+    const boost::optional<TenantId>& tenantId) {
+    // TODO: SERVER-65950 create namespace for a particular tenant.
+    return NamespaceString{NamespaceString::kConfigDb, NamespaceString::kChangeCollectionName};
+}
+
 std::string NamespaceString::getSisterNS(StringData local) const {
     verify(local.size() && local[0] != '.');
     return db().toString() + "." + local.toString();
@@ -422,6 +429,10 @@ bool NamespaceString::isFLE2StateCollection() const {
          coll().endsWith(fle2EcocSuffix));
 }
 
+bool NamespaceString::isOplogOrChangeCollection() const {
+    return isOplog() || isChangeCollection();
+}
+
 NamespaceString NamespaceString::makeTimeseriesBucketsNamespace() const {
     return {db(), kTimeseriesBucketsCollectionPrefix.toString() + coll()};
 }
diff --git a/src/mongo/db/namespace_string.h b/src/mongo/db/namespace_string.h
index 730a2859b91..91dbcd144b8 100644
--- a/src/mongo/db/namespace_string.h
+++ b/src/mongo/db/namespace_string.h
@@ -156,7 +156,7 @@ public:
     static const NamespaceString kTenantMigrationOplogView;
 
     // Namespace for storing the persisted state of tenant split donors.
-    static const NamespaceString kTenantSplitDonorsNamespace;
+    static const NamespaceString kShardSplitDonorsNamespace;
 
     // Namespace for replica set configuration settings.
     static const NamespaceString kSystemReplSetNamespace;
@@ -233,57 +233,50 @@ public:
     /**
      * Constructs an empty NamespaceString.
      */
-    NamespaceString() : _ns(), _dotIndex(std::string::npos), _dbName() {}
+    NamespaceString() = default;
 
     /**
      * Constructs a NamespaceString from the fully qualified namespace named in "ns" and the
      * tenantId. "ns" is NOT expected to contain the tenantId.
      */
     explicit NamespaceString(boost::optional<TenantId> tenantId, StringData ns) {
-        _ns = tenantId ? tenantId->toString() + "_" + ns.toString()
-                       : ns.toString();  // copy to our buffer
-        _dotIndex = _ns.find('.');
+        _dotIndex = ns.find(".");
+
         uassert(ErrorCodes::InvalidNamespace,
                 "namespaces cannot have embedded null characters",
-                _ns.find('\0') == std::string::npos);
+                ns.find('\0') == std::string::npos);
 
-        auto db = _dotIndex == std::string::npos ? ns : ns.substr(0, ns.find('.'));
-        _dbName = DatabaseName(tenantId, db);
+        StringData db = ns.substr(0, _dotIndex);
+        _dbName = DatabaseName(std::move(tenantId), db);
+        _ns = ns.toString();
     }
 
     // TODO SERVER-65920 Remove this constructor once all constructor call sites have been updated
     // to pass tenantId explicitly
     explicit NamespaceString(StringData ns, boost::optional<TenantId> tenantId = boost::none)
-        : NamespaceString(tenantId, ns) {}
+        : NamespaceString(std::move(tenantId), ns) {}
 
     /**
      * Constructs a NamespaceString for the given database and collection names.
      * "dbName" must not contain a ".", and "collectionName" must not start with one.
      */
     NamespaceString(DatabaseName dbName, StringData collectionName)
-        : _ns(dbName.toString().size() + collectionName.size() + 1, '\0') {
+        : _dbName(std::move(dbName)), _ns(str::stream() << _dbName.db() << '.' << collectionName) {
+        auto db = _dbName.db();
+
         uassert(ErrorCodes::InvalidNamespace,
-                "'.' is an invalid character in the database name: " + dbName.db(),
-                dbName.db().find('.') == std::string::npos);
+                "'.' is an invalid character in the database name: " + db,
+                db.find('.') == std::string::npos);
         uassert(ErrorCodes::InvalidNamespace,
                 "Collection names cannot start with '.': " + collectionName,
                 collectionName.empty() || collectionName[0] != '.');
 
-        auto db = dbName.toString();
-        std::string::iterator it = std::copy(db.begin(), db.end(), _ns.begin());
-        *it = '.';
-        ++it;
-        it = std::copy(collectionName.begin(), collectionName.end(), it);
         _dotIndex = db.size();
-
-        dassert(it == _ns.end());
         dassert(_ns[_dotIndex] == '.');
 
         uassert(ErrorCodes::InvalidNamespace,
                 "namespaces cannot have embedded null characters",
                 _ns.find('\0') == std::string::npos);
-
-        _dbName = std::move(dbName);
     }
 
     /**
@@ -292,14 +285,14 @@ public:
      * NOT expected to contain a tenantId.
      */
     NamespaceString(boost::optional<TenantId> tenantId, StringData db, StringData collectionName)
-        : NamespaceString(DatabaseName(tenantId, db), collectionName) {}
+        : NamespaceString(DatabaseName(std::move(tenantId), db), collectionName) {}
 
     // TODO SERVER-65920 Remove this constructor once all constructor call sites have been updated
     // to pass tenantId explicitly
     NamespaceString(StringData db,
                     StringData collectionName,
                     boost::optional<TenantId> tenantId = boost::none)
-        : NamespaceString(DatabaseName(tenantId, db), collectionName) {}
+        : NamespaceString(DatabaseName(std::move(tenantId), db), collectionName) {}
 
     /**
      * Constructs a NamespaceString from the string 'ns'. Should only be used when reading a
@@ -314,6 +307,11 @@ public:
     static NamespaceString makeCollectionlessAggregateNSS(const DatabaseName& dbName);
 
     /**
+     * Constructs the change collection namespace for the specified tenant.
+     */
+    static NamespaceString makeChangeCollectionNSS(const boost::optional<TenantId>& tenantId);
+
+    /**
      * Constructs a NamespaceString representing a listCollections namespace. The format for this
      * namespace is "<dbName>.$cmd.listCollections".
      */
@@ -336,7 +334,7 @@ public:
 
     StringData db() const {
         // TODO SERVER-65456 Remove this function.
-        return StringData(_dbName.toString());
+        return _dbName.db();
     }
 
     const DatabaseName& dbName() const {
@@ -357,6 +355,13 @@ public:
         return ns();
     }
 
+    std::string toStringWithTenantId() const {
+        if (auto tenantId = _dbName.tenantId())
+            return str::stream() << *tenantId << '_' << ns();
+
+        return ns();
+    }
+
     size_t size() const {
         return _ns.size();
     }
@@ -482,6 +487,11 @@ public:
     bool isFLE2StateCollection() const;
 
     /**
+     * Returns true if the namespace is an oplog or a change collection, false otherwise.
+     */
+    bool isOplogOrChangeCollection() const;
+
+    /**
      * Returns the time-series buckets namespace for this view.
      */
     NamespaceString makeTimeseriesBucketsNamespace() const;
@@ -600,9 +610,14 @@ public:
      *                                 contain a $ should be checked explicitly.
      * @return if db is an allowed database name
      */
-    static bool validDBName(StringData dbString,
+    static bool validDBName(StringData dbName,
                             DollarInDbNameBehavior behavior = DollarInDbNameBehavior::Disallow);
 
+    static bool validDBName(const DatabaseName& dbName,
+                            DollarInDbNameBehavior behavior = DollarInDbNameBehavior::Disallow) {
+        return validDBName(dbName.db(), behavior);
+    }
+
     /**
      * Takes a fully qualified namespace (ie dbname.collectionName), and returns true if
      * the collection name component of the namespace is valid.
@@ -632,26 +647,35 @@ public:
 
     // Relops among `NamespaceString`.
     friend bool operator==(const NamespaceString& a, const NamespaceString& b) {
-        return a.ns() == b.ns();
+        return (a.tenantId() == b.tenantId()) && (a.ns() == b.ns());
     }
     friend bool operator!=(const NamespaceString& a, const NamespaceString& b) {
-        return a.ns() != b.ns();
+        return !(a == b);
     }
     friend bool operator<(const NamespaceString& a, const NamespaceString& b) {
+        if (a.tenantId() != b.tenantId()) {
+            return a.tenantId() < b.tenantId();
+        }
         return a.ns() < b.ns();
     }
     friend bool operator>(const NamespaceString& a, const NamespaceString& b) {
+        if (a.tenantId() != b.tenantId()) {
+            return a.tenantId() > b.tenantId();
+        }
         return a.ns() > b.ns();
     }
     friend bool operator<=(const NamespaceString& a, const NamespaceString& b) {
-        return a.ns() <= b.ns();
+        return !(a > b);
     }
     friend bool operator>=(const NamespaceString& a, const NamespaceString& b) {
-        return a.ns() >= b.ns();
+        return !(a < b);
     }
 
     template <typename H>
     friend H AbslHashValue(H h, const NamespaceString& nss) {
+        if (nss.tenantId()) {
+            return H::combine(std::move(h), nss._dbName.tenantId().get(), nss._ns);
+        }
         return H::combine(std::move(h), nss._ns);
     }
 
@@ -660,9 +684,9 @@ public:
     }
 
 private:
-    std::string _ns;
-    size_t _dotIndex = 0;
     DatabaseName _dbName;
+    std::string _ns;
+    size_t _dotIndex = std::string::npos;
 };
 
 /**
@@ -696,12 +720,14 @@ public:
 
     /**
      * Returns database name if this object was initialized with a UUID.
+     *
+     * TODO SERVER-66887 remove this function for better clarity once call sites have been changed
      */
     std::string dbname() const {
         return _dbname ? _dbname->db() : "";
     }
 
-    const boost::optional<DatabaseName>& dbnameWithTenant() const {
+    const boost::optional<DatabaseName>& dbName() const {
         return _dbname;
     }
 
diff --git a/src/mongo/db/namespace_string_test.cpp b/src/mongo/db/namespace_string_test.cpp
index 9673481b874..4412b0246f6 100644
--- a/src/mongo/db/namespace_string_test.cpp
+++ b/src/mongo/db/namespace_string_test.cpp
@@ -303,22 +303,25 @@ TEST(NamespaceStringTest, NSSWithTenantId) {
     TenantId tenantId(OID::gen());
     std::string tenantNsStr = str::stream() << tenantId.toString() << "_foo.bar";
 
-    NamespaceString nss("foo.bar", tenantId);
-    ASSERT_EQ(nss.ns(), tenantNsStr);
-    ASSERT_EQ(nss.toString(), tenantNsStr);
+    NamespaceString nss(tenantId, "foo.bar");
+    ASSERT_EQ(nss.ns(), "foo.bar");
+    ASSERT_EQ(nss.toString(), "foo.bar");
+    ASSERT_EQ(nss.toStringWithTenantId(), tenantNsStr);
     ASSERT(nss.tenantId());
     ASSERT_EQ(*nss.tenantId(), tenantId);
 
     DatabaseName dbName(tenantId, "foo");
     NamespaceString nss2(dbName, "bar");
-    ASSERT_EQ(nss2.ns(), tenantNsStr);
-    ASSERT_EQ(nss2.toString(), tenantNsStr);
+    ASSERT_EQ(nss2.ns(), "foo.bar");
+    ASSERT_EQ(nss2.toString(), "foo.bar");
+    ASSERT_EQ(nss2.toStringWithTenantId(), tenantNsStr);
     ASSERT(nss2.tenantId());
     ASSERT_EQ(*nss2.tenantId(), tenantId);
 
     NamespaceString nss3("foo", "bar", tenantId);
-    ASSERT_EQ(nss3.ns(), tenantNsStr);
-    ASSERT_EQ(nss3.toString(), tenantNsStr);
+    ASSERT_EQ(nss3.ns(), "foo.bar");
+    ASSERT_EQ(nss3.toString(), "foo.bar");
+    ASSERT_EQ(nss3.toStringWithTenantId(), tenantNsStr);
     ASSERT(nss3.tenantId());
     ASSERT_EQ(*nss3.tenantId(), tenantId);
 }
@@ -327,9 +330,10 @@ TEST(NamespaceStringTest, NSSNoCollectionWithTenantId) {
     TenantId tenantId(OID::gen());
     std::string tenantNsStr = str::stream() << tenantId.toString() << "_foo";
 
-    NamespaceString nss("foo", tenantId);
-    ASSERT_EQ(nss.ns(), tenantNsStr);
-    ASSERT_EQ(nss.toString(), tenantNsStr);
+    NamespaceString nss(tenantId, "foo");
+    ASSERT_EQ(nss.ns(), "foo");
+    ASSERT_EQ(nss.toString(), "foo");
+    ASSERT_EQ(nss.toStringWithTenantId(), tenantNsStr);
     ASSERT(nss.tenantId());
     ASSERT_EQ(*nss.tenantId(), tenantId);
 
@@ -351,7 +355,8 @@ TEST(NamespaceStringTest, ParseNSSWithTenantId) {
 
     NamespaceString nss =
         NamespaceString::parseFromStringExpectTenantIdInMultitenancyMode(tenantNsStr);
-    ASSERT_EQ(nss.ns(), tenantNsStr);
+    ASSERT_EQ(nss.ns(), "foo.bar");
+    ASSERT_EQ(nss.toStringWithTenantId(), tenantNsStr);
     ASSERT(nss.tenantId());
     ASSERT_EQ(*nss.tenantId(), tenantId);
 }
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index e14695db5de..383f5d47e34 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -2288,14 +2288,6 @@ void OpObserverImpl::_onReplicationRollback(OperationContext* opCtx,
         fassertFailedNoTrace(50712);
     }
 
-    // Force the config server to update its shard registry on next access. Otherwise it may have
-    // the stale data that has been just rolled back.
-    if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
-        if (auto shardRegistry = Grid::get(opCtx)->shardRegistry()) {
-            shardRegistry->clearEntries();
-        }
-    }
-
     // Force the default read/write concern cache to reload on next access in case the defaults
     // document was rolled back.
     ReadWriteConcernDefaults::get(opCtx).invalidate();
diff --git a/src/mongo/db/op_observer_impl_test.cpp b/src/mongo/db/op_observer_impl_test.cpp
index 0fc2f08ae96..4ad6a6baed0 100644
--- a/src/mongo/db/op_observer_impl_test.cpp
+++ b/src/mongo/db/op_observer_impl_test.cpp
@@ -739,9 +739,13 @@ TEST_F(OpObserverTest, SingleStatementInsertTestIncludesTenantId) {
     auto oplogEntryObj = getSingleOplogEntry(opCtx.get());
     const repl::OplogEntry& entry = assertGet(repl::OplogEntry::parse(oplogEntryObj));
 
-    ASSERT(nss.tenantId().has_value());
+    // TODO SERVER-67155 Check that (nss == entry.getNss()) and uncomment the
+    // line below once the OplogEntry deserializer passes "tid" to the NamespaceString
+    // constructor
+    ASSERT_EQ(NamespaceString(boost::none, nss.ns()), entry.getNss());
+    // ASSERT(nss.tenantId().has_value());
+
     ASSERT_EQ(*nss.tenantId(), *entry.getTid());
-    ASSERT_EQ(nss, entry.getNss());
     ASSERT_EQ(uuid, *entry.getUuid());
 }
 
@@ -772,7 +776,9 @@ TEST_F(OpObserverTest, SingleStatementUpdateTestIncludesTenantId) {
 
     ASSERT(nss.tenantId().has_value());
     ASSERT_EQ(*nss.tenantId(), *entry.getTid());
-    ASSERT_EQ(nss, entry.getNss());
+    // TODO SERVER-67155 Check that (nss == entry.getNss()) once the OplogEntry deserializer passes
+    // "tid" to the NamespaceString constructor
+    ASSERT_EQ(NamespaceString(boost::none, nss.ns()), entry.getNss());
     ASSERT_EQ(uuid, *entry.getUuid());
 }
 
@@ -798,9 +804,11 @@ TEST_F(OpObserverTest, SingleStatementDeleteTestIncludesTenantId) {
     auto oplogEntryObj = getSingleOplogEntry(opCtx.get());
     const repl::OplogEntry& entry = assertGet(repl::OplogEntry::parse(oplogEntryObj));
 
-    ASSERT(nss.tenantId().has_value());
+    // TODO SERVER-67155 Check that (nss == entry.getNss()) once the OplogEntry deserializer passes
+    // "tid" to the NamespaceString constructor
+    // ASSERT(nss.tenantId().has_value());
+    ASSERT_EQ(NamespaceString(boost::none, nss.ns()), entry.getNss());
     ASSERT_EQ(*nss.tenantId(), *entry.getTid());
-    ASSERT_EQ(nss, entry.getNss());
     ASSERT_EQ(uuid, *entry.getUuid());
 }
 
@@ -2953,9 +2961,13 @@ TEST_F(BatchedWriteOutputsTest, TestApplyOpsInsertDeleteUpdateIncludesTenantId)
         const auto innerEntry = innerEntries[0];
         ASSERT(innerEntry.getCommandType() == OplogEntry::CommandType::kNotCommand);
         ASSERT(innerEntry.getOpType() == repl::OpTypeEnum::kInsert);
-        ASSERT(innerEntry.getNss() == _nssWithTid);
-        ASSERT(innerEntry.getNss().tenantId().has_value());
-        ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+        // TODO SERVER-67155 Check that (innerEntry.getNss() == _nssWithTid) and uncomment the
+        // 2 lines below once the OplogEntry deserializer passes "tid" to the NamespaceString
+        // constructor
+        ASSERT(innerEntry.getNss() == NamespaceString(boost::none, _nssWithTid.ns()));
+        // ASSERT(innerEntry.getNss().tenantId().has_value());
+        // ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+
         ASSERT(innerEntry.getTid().has_value());
         ASSERT(*innerEntry.getTid() == *_nssWithTid.tenantId());
         ASSERT(0 ==
@@ -2967,9 +2979,13 @@ TEST_F(BatchedWriteOutputsTest, TestApplyOpsInsertDeleteUpdateIncludesTenantId)
         const auto innerEntry = innerEntries[1];
         ASSERT(innerEntry.getCommandType() == OplogEntry::CommandType::kNotCommand);
         ASSERT(innerEntry.getOpType() == repl::OpTypeEnum::kDelete);
-        ASSERT(innerEntry.getNss() == _nssWithTid);
-        ASSERT(innerEntry.getNss().tenantId().has_value());
-        ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+        // TODO SERVER-67155 Check that (innerEntry.getNss() == _nssWithTid) and uncomment the
+        // 2 lines below once the OplogEntry deserializer passes "tid" to the NamespaceString
+        // constructor
+        ASSERT(innerEntry.getNss() == NamespaceString(boost::none, _nssWithTid.ns()));
+        // ASSERT(innerEntry.getNss().tenantId().has_value());
+        // ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+
         ASSERT(innerEntry.getTid().has_value());
         ASSERT(*innerEntry.getTid() == *_nssWithTid.tenantId());
         ASSERT(0 == innerEntry.getObject().woCompare(BSON("_id" << 1)));
@@ -2979,9 +2995,13 @@ TEST_F(BatchedWriteOutputsTest, TestApplyOpsInsertDeleteUpdateIncludesTenantId)
         const auto innerEntry = innerEntries[2];
         ASSERT(innerEntry.getCommandType() == OplogEntry::CommandType::kNotCommand);
         ASSERT(innerEntry.getOpType() == repl::OpTypeEnum::kUpdate);
-        ASSERT(innerEntry.getNss() == _nssWithTid);
-        ASSERT(innerEntry.getNss().tenantId().has_value());
-        ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+        // TODO SERVER-67155 Check that (innerEntry.getNss() == _nssWithTid) and uncomment the
+        // 2 lines below once the OplogEntry deserializer passes "tid" to the NamespaceString
+        // constructor
+        ASSERT(innerEntry.getNss() == NamespaceString(boost::none, _nssWithTid.ns()));
+        // ASSERT(innerEntry.getNss().tenantId().has_value());
+        // ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+
         ASSERT(innerEntry.getTid().has_value());
         ASSERT(*innerEntry.getTid() == *_nssWithTid.tenantId());
         ASSERT(0 ==
diff --git a/src/mongo/db/ops/SConscript b/src/mongo/db/ops/SConscript
index 0b736897acc..983698e5060 100644
--- a/src/mongo/db/ops/SConscript
+++ b/src/mongo/db/ops/SConscript
@@ -34,7 +34,6 @@ env.Library(
 env.Library(
     target='write_ops_parsers',
     source=[
-        'new_write_error_exception_format_feature_flag.idl',
         'write_ops.cpp',
         'write_ops.idl',
     ],
diff --git a/src/mongo/db/ops/write_ops.cpp b/src/mongo/db/ops/write_ops.cpp
index 54cef4d3d2a..92d0478a541 100644
--- a/src/mongo/db/ops/write_ops.cpp
+++ b/src/mongo/db/ops/write_ops.cpp
@@ -30,7 +30,6 @@
 #include "mongo/db/ops/write_ops.h"
 
 #include "mongo/db/dbmessage.h"
-#include "mongo/db/ops/new_write_error_exception_format_feature_flag_gen.h"
 #include "mongo/db/pipeline/aggregation_request_helper.h"
 #include "mongo/db/update/update_oplog_entry_serialization.h"
 #include "mongo/db/update/update_oplog_entry_version.h"
@@ -295,18 +294,6 @@ WriteError WriteError::parse(const BSONObj& obj) {
         auto code = ErrorCodes::Error(obj[WriteError::kCodeFieldName].Int());
         auto errmsg = obj[WriteError::kErrmsgFieldName].valueStringDataSafe();
 
-        // At least up to FCV 5.x, the write commands operation used to convert StaleConfig errors
-        // into StaleShardVersion and store the extra info of StaleConfig in a sub-field called
-        // "errInfo".
-        //
-        // TODO (SERVER-64449): This special parsing should be removed in the stable version
-        // following the resolution of this ticket.
-        if (code == ErrorCodes::OBSOLETE_StaleShardVersion) {
-            return Status(ErrorCodes::StaleConfig,
-                          std::move(errmsg),
-                          obj[WriteError::kErrInfoFieldName].Obj());
-        }
-
         // All remaining errors have the error stored at the same level as the code and errmsg (in
         // the same way that Status is serialised as part of regular command response)
         return Status(code, std::move(errmsg), obj);
@@ -319,28 +306,10 @@ BSONObj WriteError::serialize() const {
     BSONObjBuilder errBuilder;
     errBuilder.append(WriteError::kIndexFieldName, _index);
 
-    // At least up to FCV 5.x, the write commands operation used to convert StaleConfig errors into
-    // StaleShardVersion and store the extra info of StaleConfig in a sub-field called "errInfo".
-    // This logic preserves this for backwards compatibility.
-    //
-    // TODO (SERVER-64449): This special serialisation should be removed in the stable version
-    // following the resolution of this ticket.
-    if (_status == ErrorCodes::StaleConfig &&
-        !feature_flags::gFeatureFlagNewWriteErrorExceptionFormat.isEnabled(
-            serverGlobalParams.featureCompatibility)) {
-        errBuilder.append(WriteError::kCodeFieldName,
-                          int32_t(ErrorCodes::OBSOLETE_StaleShardVersion));
-        errBuilder.append(WriteError::kErrmsgFieldName, _status.reason());
-        auto extraInfo = _status.extraInfo();
-        invariant(extraInfo);
-        BSONObjBuilder extraInfoBuilder(errBuilder.subobjStart(WriteError::kErrInfoFieldName));
-        extraInfo->serialize(&extraInfoBuilder);
-    } else {
-        errBuilder.append(WriteError::kCodeFieldName, int32_t(_status.code()));
-        errBuilder.append(WriteError::kErrmsgFieldName, _status.reason());
-        if (auto extraInfo = _status.extraInfo()) {
-            extraInfo->serialize(&errBuilder);
-        }
+    errBuilder.append(WriteError::kCodeFieldName, int32_t(_status.code()));
+    errBuilder.append(WriteError::kErrmsgFieldName, _status.reason());
+    if (auto extraInfo = _status.extraInfo()) {
+        extraInfo->serialize(&errBuilder);
     }
 
     return errBuilder.obj();
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index ee703965b72..8e02cf04ec7 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -452,8 +452,13 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx,
                 opCtx,
                 wholeOp.getNamespace(),
                 fixLockModeForSystemDotViewsChanges(wholeOp.getNamespace(), MODE_IX));
-            if (*collection)
+            checkCollectionUUIDMismatch(opCtx,
+                                        wholeOp.getNamespace(),
+                                        collection->getCollection(),
+                                        wholeOp.getCollectionUUID());
+            if (*collection) {
                 break;
+            }
 
             if (source == OperationSource::kTimeseriesInsert) {
                 assertTimeseriesBucketsCollectionNotFound(wholeOp.getNamespace());
@@ -499,11 +504,6 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx,
     if (shouldProceedWithBatchInsert) {
         try {
             if (!collection->getCollection()->isCapped() && !inTxn && batch.size() > 1) {
-                checkCollectionUUIDMismatch(opCtx,
-                                            wholeOp.getNamespace(),
-                                            collection->getCollection(),
-                                            wholeOp.getCollectionUUID());
-
                 // First try doing it all together. If all goes well, this is all we need to do.
                 // See Collection::_insertDocuments for why we do all capped inserts one-at-a-time.
                 lastOpFixer->startingOp();
@@ -546,10 +546,6 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx,
                     // Transactions are not allowed to operate on capped collections.
                     uassertStatusOK(
                         checkIfTransactionOnCappedColl(opCtx, collection->getCollection()));
-                    checkCollectionUUIDMismatch(opCtx,
-                                                wholeOp.getNamespace(),
-                                                collection->getCollection(),
-                                                wholeOp.getCollectionUUID());
                     lastOpFixer->startingOp();
                     insertDocuments(opCtx,
                                     collection->getCollection(),
@@ -604,11 +600,36 @@ SingleWriteResult makeWriteResultForInsertOrDeleteRetry() {
     return res;
 }
 
+
+// Returns the flags that determine the type of document validation we want to
+// perform. First item in the tuple determines whether to bypass document validation altogether,
+// second item determines if _safeContent_ array can be modified in an encrypted collection.
+std::tuple<bool, bool> getDocumentValidationFlags(OperationContext* opCtx,
+                                                  const write_ops::WriteCommandRequestBase& req) {
+    auto& encryptionInfo = req.getEncryptionInformation();
+    const bool fleCrudProcessed = getFleCrudProcessed(opCtx, encryptionInfo);
+    return std::make_tuple(req.getBypassDocumentValidation(), fleCrudProcessed);
+}
 }  // namespace
 
+bool getFleCrudProcessed(OperationContext* opCtx,
+                         const boost::optional<EncryptionInformation>& encryptionInfo) {
+    if (encryptionInfo && encryptionInfo->getCrudProcessed().value_or(false)) {
+        uassert(6666201,
+                "External users cannot have crudProcessed enabled",
+                AuthorizationSession::get(opCtx->getClient())
+                    ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(),
+                                                       ActionType::internal));
+
+        return true;
+    }
+    return false;
+}
+
 WriteResult performInserts(OperationContext* opCtx,
                            const write_ops::InsertCommandRequest& wholeOp,
                            OperationSource source) {
+
     // Insert performs its own retries, so we should only be within a WriteUnitOfWork when run in a
     // transaction.
     auto txnParticipant = TransactionParticipant::get(opCtx);
@@ -643,8 +664,15 @@ WriteResult performInserts(OperationContext* opCtx,
         uassertStatusOK(userAllowedWriteNS(opCtx, wholeOp.getNamespace()));
     }
 
-    DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(
-        opCtx, wholeOp.getWriteCommandRequestBase().getBypassDocumentValidation());
+    const auto [disableDocumentValidation, fleCrudProcessed] =
+        getDocumentValidationFlags(opCtx, wholeOp.getWriteCommandRequestBase());
+
+    DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(opCtx,
+                                                                      disableDocumentValidation);
+
+    DisableSafeContentValidationIfTrue safeContentValidationDisabler(
+        opCtx, disableDocumentValidation, fleCrudProcessed);
+
     LastOpFixer lastOpFixer(opCtx, wholeOp.getNamespace());
 
     WriteResult out;
@@ -766,6 +794,7 @@ static SingleWriteResult performSingleUpdateOp(OperationContext* opCtx,
     boost::optional<AutoGetCollection> collection;
     while (true) {
         collection.emplace(opCtx, ns, fixLockModeForSystemDotViewsChanges(ns, MODE_IX));
+        checkCollectionUUIDMismatch(opCtx, ns, collection->getCollection(), opCollectionUUID);
         if (*collection) {
             break;
         }
@@ -830,8 +859,6 @@ static SingleWriteResult performSingleUpdateOp(OperationContext* opCtx,
         uassertStatusOK(checkIfTransactionOnCappedColl(opCtx, coll));
     }
 
-    checkCollectionUUIDMismatch(opCtx, ns, collection->getCollection(), opCollectionUUID);
-
     const ExtensionsCallbackReal extensionsCallback(opCtx, &updateRequest->getNamespaceString());
     ParsedUpdate parsedUpdate(opCtx, updateRequest, extensionsCallback, forgoOpCounterIncrements);
     uassertStatusOK(parsedUpdate.parseRequest());
@@ -1003,8 +1030,15 @@ WriteResult performUpdates(OperationContext* opCtx,
               (txnParticipant && opCtx->inMultiDocumentTransaction()));
     uassertStatusOK(userAllowedWriteNS(opCtx, ns));
 
-    DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(
-        opCtx, wholeOp.getWriteCommandRequestBase().getBypassDocumentValidation());
+    const auto [disableDocumentValidation, fleCrudProcessed] =
+        getDocumentValidationFlags(opCtx, wholeOp.getWriteCommandRequestBase());
+
+    DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(opCtx,
+                                                                      disableDocumentValidation);
+
+    DisableSafeContentValidationIfTrue safeContentValidationDisabler(
+        opCtx, disableDocumentValidation, fleCrudProcessed);
+
     LastOpFixer lastOpFixer(opCtx, ns);
 
     bool containsRetry = false;
@@ -1231,8 +1265,15 @@ WriteResult performDeletes(OperationContext* opCtx,
               (txnParticipant && opCtx->inMultiDocumentTransaction()));
     uassertStatusOK(userAllowedWriteNS(opCtx, ns));
 
-    DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(
-        opCtx, wholeOp.getWriteCommandRequestBase().getBypassDocumentValidation());
+    const auto [disableDocumentValidation, fleCrudProcessed] =
+        getDocumentValidationFlags(opCtx, wholeOp.getWriteCommandRequestBase());
+
+    DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(opCtx,
+                                                                      disableDocumentValidation);
+
+    DisableSafeContentValidationIfTrue safeContentValidationDisabler(
+        opCtx, disableDocumentValidation, fleCrudProcessed);
+
     LastOpFixer lastOpFixer(opCtx, ns);
 
     bool containsRetry = false;
diff --git a/src/mongo/db/ops/write_ops_exec.h b/src/mongo/db/ops/write_ops_exec.h
index 548a3034713..3550a51c1ce 100644
--- a/src/mongo/db/ops/write_ops_exec.h
+++ b/src/mongo/db/ops/write_ops_exec.h
@@ -64,6 +64,9 @@ struct WriteResult {
     bool canContinue = true;
 };
 
+bool getFleCrudProcessed(OperationContext* opCtx,
+                         const boost::optional<EncryptionInformation>& encryptionInfo);
+
 /**
  * Performs a batch of inserts, updates, or deletes.
  *
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index ff2c639db8b..96c7d59a025 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -99,6 +99,7 @@ env.Library(
         'expression_context.cpp',
         'expression_function.cpp',
         'expression_js_emit.cpp',
+        'expression_parser.idl',
         'expression_test_api_version.cpp',
         'expression_trigonometric.cpp',
         'javascript_execution.cpp',
@@ -106,6 +107,7 @@ env.Library(
         'variables.cpp',
     ],
     LIBDEPS=[
+        '$BUILD_DIR/mongo/crypto/fle_crypto',
         '$BUILD_DIR/mongo/db/bson/dotted_path_support',
         '$BUILD_DIR/mongo/db/commands/test_commands_enabled',
         '$BUILD_DIR/mongo/db/exec/document_value/document_value',
@@ -128,6 +130,7 @@ env.Library(
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/db/mongohasher',
         '$BUILD_DIR/mongo/db/vector_clock',
+        '$BUILD_DIR/mongo/idl/idl_parser',
     ],
 )
 
diff --git a/src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp b/src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp
index 114f49e1b7e..863769c9f5b 100644
--- a/src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp
@@ -49,6 +49,7 @@
 #include "mongo/db/pipeline/document_source_internal_inhibit_optimization.h"
 #include "mongo/db/pipeline/document_source_internal_shard_filter.h"
 #include "mongo/db/pipeline/document_source_internal_split_pipeline.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
 #include "mongo/db/pipeline/document_source_limit.h"
 #include "mongo/db/pipeline/document_source_list_cached_and_active_users.h"
 #include "mongo/db/pipeline/document_source_list_local_sessions.h"
@@ -178,11 +179,19 @@ private:
                                 << static_cast<int>(transformer->getType()) << ")");
     }
 
+    void assertSupportedPath(const std::string& path) {
+        uassert(ErrorCodes::InternalErrorNotSupported,
+                "Projection contains unsupported numeric path component",
+                !FieldRef(path).hasNumericPathComponents());
+    }
+
     void processProjectedPaths(const projection_executor::InclusionNode& node) {
         std::set<std::string> preservedPaths;
         node.reportProjectedPaths(&preservedPaths);
 
         for (const std::string& preservedPathStr : preservedPaths) {
+            assertSupportedPath(preservedPathStr);
+
             _builder.integrateFieldPath(FieldPath(preservedPathStr),
                                         [](const bool isLastElement, FieldMapEntry& entry) {
                                             entry._hasLeadingObj = true;
@@ -232,6 +241,8 @@ private:
 
         // Handle general expression projection.
         for (const std::string& computedPathStr : computedPaths) {
+            assertSupportedPath(computedPathStr);
+
             const FieldPath computedPath(computedPathStr);
 
             auto entry = _ctx.getNode();
@@ -272,6 +283,7 @@ private:
         node.reportProjectedPaths(&preservedPaths);
 
         for (const std::string& preservedPathStr : preservedPaths) {
+            assertSupportedPath(preservedPathStr);
             _builder.integrateFieldPath(FieldPath(preservedPathStr),
                                         [](const bool isLastElement, FieldMapEntry& entry) {
                                             if (isLastElement) {
@@ -326,6 +338,10 @@ public:
         unsupportedStage(source);
     }
 
+    void visit(const DocumentSourceInternalUnpackBucket* source) override {
+        unsupportedStage(source);
+    }
+
     void visit(const DocumentSourceGroup* source) override {
         const StringMap<boost::intrusive_ptr<Expression>>& idFields = source->getIdFields();
         uassert(6624201, "Empty idFields map", !idFields.empty());
@@ -334,6 +350,9 @@ public:
         for (const auto& [fieldName, expr] : idFields) {
             groupByFieldNames.push_back(fieldName);
         }
+        const bool isSingleIdField =
+            groupByFieldNames.size() == 1 && groupByFieldNames.front() == "_id";
+
         // Sort in order to generate consistent plans.
         std::sort(groupByFieldNames.begin(), groupByFieldNames.end());
 
@@ -434,11 +453,21 @@ public:
 
         ABT integrationPath = make<PathIdentity>();
         for (size_t i = 0; i < groupByFieldNames.size(); i++) {
+            std::string fieldName = std::move(groupByFieldNames.at(i));
+            if (!isSingleIdField) {
+                // Erase '_id.' prefix.
+                fieldName = fieldName.substr(strlen("_id."));
+            }
+
             maybeComposePath(integrationPath,
-                             make<PathField>(std::move(groupByFieldNames.at(i)),
+                             make<PathField>(std::move(fieldName),
                                              make<PathConstant>(make<Variable>(
                                                  std::move(groupByProjNames.at(i))))));
         }
+        if (!isSingleIdField) {
+            integrationPath = make<PathField>("_id", std::move(integrationPath));
+        }
+
         for (size_t i = 0; i < aggProjFieldNames.size(); i++) {
             maybeComposePath(
                 integrationPath,
diff --git a/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp b/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
index 06b7f7113d0..05b9fff8932 100644
--- a/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
@@ -158,15 +158,16 @@ public:
 
         const Operations op = translateCmpOpFn(expr->getOp());
         if (op != Operations::Cmp3w) {
-            // If we have EvalPaths coming from the left or on the right, add a PathCompare, and
-            // keep propagating the path.
-            if (auto leftPtr = left.cast<EvalPath>();
-                leftPtr != nullptr && leftPtr->getInput() == _ctx.getRootProjVar()) {
+            // If we have simple EvalPaths coming from the left or on the right, add a PathCompare,
+            // and keep propagating the path.
+            if (auto leftPtr = left.cast<EvalPath>(); leftPtr != nullptr &&
+                isSimplePath(leftPtr->getPath()) && leftPtr->getInput() == _ctx.getRootProjVar()) {
                 addEvalFilterFn(std::move(leftPtr->getPath()), std::move(right), op);
                 return;
             }
-            if (auto rightPtr = right.cast<EvalPath>();
-                rightPtr != nullptr && rightPtr->getInput() == _ctx.getRootProjVar()) {
+            if (auto rightPtr = right.cast<EvalPath>(); rightPtr != nullptr &&
+                isSimplePath(rightPtr->getPath()) &&
+                rightPtr->getInput() == _ctx.getRootProjVar()) {
                 addEvalFilterFn(
                     std::move(rightPtr->getPath()), std::move(left), reverseComparisonOp(op));
                 return;
@@ -248,8 +249,10 @@ public:
         ABT path = translateFieldPath(
             fieldPath,
             make<PathIdentity>(),
-            [](const std::string& fieldName, const bool /*isLastElement*/, ABT input) {
-                // No traverse.
+            [](const std::string& fieldName, const bool isLastElement, ABT input) {
+                if (!isLastElement) {
+                    input = make<PathTraverse>(std::move(input));
+                }
                 return make<PathGet>(fieldName, std::move(input));
             },
             1ul);
@@ -308,7 +311,7 @@ public:
     }
 
     void visit(const ExpressionLn* expr) override final {
-        unsupportedExpression(expr->getOpName());
+        pushSingleArgFunctionFromTop("ln");
     }
 
     void visit(const ExpressionLog* expr) override final {
@@ -319,6 +322,10 @@ public:
         unsupportedExpression(expr->getOpName());
     }
 
+    void visit(const ExpressionInternalFLEEqual* expr) override final {
+        unsupportedExpression(expr->getOpName());
+    }
+
     void visit(const ExpressionMap* expr) override final {
         unsupportedExpression("$map");
     }
@@ -328,7 +335,7 @@ public:
     }
 
     void visit(const ExpressionMod* expr) override final {
-        unsupportedExpression(expr->getOpName());
+        pushMultiArgFunctionFromTop("mod", 2);
     }
 
     void visit(const ExpressionMultiply* expr) override final {
@@ -775,6 +782,7 @@ private:
         for (size_t i = 0; i < arity; i++) {
             ABT child = _ctx.pop();
             if (auto filterPtr = child.cast<EvalFilter>(); allFilters && filterPtr != nullptr &&
+                isSimplePath(filterPtr->getPath()) &&
                 filterPtr->getInput() == _ctx.getRootProjVar()) {
                 childPaths.push_back(filterPtr->getPath());
             } else {
@@ -784,7 +792,7 @@ private:
         }
 
         if (allFilters) {
-            // If all children are paths, place a path composition.
+            // If all children are simple paths, place a path composition.
             ABT result = make<PathIdentity>();
             if (isAnd) {
                 for (ABT& child : childPaths) {
@@ -812,6 +820,8 @@ private:
         for (size_t i = 0; i < argCount; i++) {
             children.emplace_back(_ctx.pop());
         }
+        std::reverse(children.begin(), children.end());
+
         _ctx.push<FunctionCall>(functionName, children);
     }
 
@@ -822,14 +832,10 @@ private:
     void pushArithmeticBinaryExpr(const Expression* expr, const Operations op) {
         const size_t arity = expr->getChildren().size();
         _ctx.ensureArity(arity);
-        if (arity < 2) {
-            // Nothing to do for arity 0 and 1.
-            return;
-        }
 
         ABT current = _ctx.pop();
         for (size_t i = 0; i < arity - 1; i++) {
-            current = make<BinaryOp>(op, std::move(current), _ctx.pop());
+            current = make<BinaryOp>(op, _ctx.pop(), std::move(current));
         }
         _ctx.push(std::move(current));
     }
diff --git a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
index 5eef023db6b..bc0416f658c 100644
--- a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
@@ -155,6 +155,8 @@ public:
                 "$in with regexes is not supported.",
                 expr->getRegexes().empty());
 
+        assertSupportedPathExpression(expr);
+
         const auto& equalities = expr->getEqualities();
 
         // $in with an empty equalities list matches nothing; replace with constant false.
@@ -406,6 +408,8 @@ private:
 
     template <bool isValueElemMatch>
     void generateElemMatch(const ArrayMatchingMatchExpression* expr) {
+        assertSupportedPathExpression(expr);
+
         // Returns true if at least one sub-objects matches the condition.
 
         const size_t childCount = expr->numChildren();
@@ -484,7 +488,15 @@ private:
             });
     }
 
+    void assertSupportedPathExpression(const PathMatchExpression* expr) {
+        uassert(ErrorCodes::InternalErrorNotSupported,
+                "Expression contains a numeric path component",
+                !FieldRef(expr->path()).hasNumericPathComponents());
+    }
+
     void generateSimpleComparison(const ComparisonMatchExpressionBase* expr, const Operations op) {
+        assertSupportedPathExpression(expr);
+
         auto [tag, val] = convertFrom(Value(expr->getData()));
         const bool isArray = tag == sbe::value::TypeTags::Array;
         ABT result = make<PathCompare>(op, make<Constant>(tag, val));
diff --git a/src/mongo/db/pipeline/abt/pipeline_test.cpp b/src/mongo/db/pipeline/abt/pipeline_test.cpp
index e4067947675..694047d6683 100644
--- a/src/mongo/db/pipeline/abt/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/abt/pipeline_test.cpp
@@ -461,7 +461,9 @@ TEST(ABTTranslate, ProjectPaths) {
         "|           EvalPath []\n"
         "|           |   Variable [scan_0]\n"
         "|           PathGet [x]\n"
+        "|           PathTraverse []\n"
         "|           PathGet [y]\n"
+        "|           PathTraverse []\n"
         "|           PathGet [z]\n"
         "|           PathIdentity []\n"
         "Scan [collection]\n"
@@ -525,12 +527,13 @@ TEST(ABTTranslate, ProjectInclusion) {
         "|   BindBlock:\n"
         "|       [projGetPath_0]\n"
         "|           BinaryOp [Add]\n"
-        "|           |   EvalPath []\n"
-        "|           |   |   Variable [scan_0]\n"
-        "|           |   PathGet [c]\n"
-        "|           |   PathGet [d]\n"
-        "|           |   PathIdentity []\n"
-        "|           Const [2]\n"
+        "|           |   Const [2]\n"
+        "|           EvalPath []\n"
+        "|           |   Variable [scan_0]\n"
+        "|           PathGet [c]\n"
+        "|           PathTraverse []\n"
+        "|           PathGet [d]\n"
+        "|           PathIdentity []\n"
         "Scan [collection]\n"
         "    BindBlock:\n"
         "        [scan_0]\n"
@@ -654,9 +657,10 @@ TEST(ABTTranslate, MatchBasic) {
         optimized);
 }
 
-TEST(ABTTranslate, MatchPath) {
-    ABT translated = translatePipeline("[{$match: {$expr: {$eq: ['$a.b', 1]}}}]");
+TEST(ABTTranslate, MatchPath1) {
+    ABT translated = translatePipeline("[{$match: {$expr: {$eq: ['$a', 1]}}}]");
 
+    // Demonstrate simple path is converted to EvalFilter.
     ASSERT_EXPLAIN_V2(
         "Root []\n"
         "|   |   projections: \n"
@@ -667,7 +671,6 @@ TEST(ABTTranslate, MatchPath) {
         "|   EvalFilter []\n"
         "|   |   Variable [scan_0]\n"
         "|   PathGet [a]\n"
-        "|   PathGet [b]\n"
         "|   PathCompare [Eq]\n"
         "|   Const [1]\n"
         "Scan [collection]\n"
@@ -677,6 +680,34 @@ TEST(ABTTranslate, MatchPath) {
         translated);
 }
 
+TEST(ABTTranslate, MatchPath2) {
+    ABT translated = translatePipeline("[{$match: {$expr: {$eq: ['$a.b', 1]}}}]");
+
+    ASSERT_EXPLAIN_V2(
+        "Root []\n"
+        "|   |   projections: \n"
+        "|   |       scan_0\n"
+        "|   RefBlock: \n"
+        "|       Variable [scan_0]\n"
+        "Filter []\n"
+        "|   EvalFilter []\n"
+        "|   |   Variable [scan_0]\n"
+        "|   PathConstant []\n"
+        "|   BinaryOp [Eq]\n"
+        "|   |   Const [1]\n"
+        "|   EvalPath []\n"
+        "|   |   Variable [scan_0]\n"
+        "|   PathGet [a]\n"
+        "|   PathTraverse []\n"
+        "|   PathGet [b]\n"
+        "|   PathIdentity []\n"
+        "Scan [collection]\n"
+        "    BindBlock:\n"
+        "        [scan_0]\n"
+        "            Source []\n",
+        translated);
+}
+
 TEST(ABTTranslate, ElemMatchPath) {
     ABT translated = translatePipeline(
         "[{$project: {a: {$literal: [1, 2, 3, 4]}}}, {$match: {a: {$elemMatch: {$gte: 2, $lte: "
@@ -776,11 +807,11 @@ TEST(ABTTranslate, MatchProject) {
         "|           BinaryOp [Add]\n"
         "|           |   EvalPath []\n"
         "|           |   |   Variable [scan_0]\n"
-        "|           |   PathGet [a]\n"
+        "|           |   PathGet [b]\n"
         "|           |   PathIdentity []\n"
         "|           EvalPath []\n"
         "|           |   Variable [scan_0]\n"
-        "|           PathGet [b]\n"
+        "|           PathGet [a]\n"
         "|           PathIdentity []\n"
         "Scan [collection]\n"
         "    BindBlock:\n"
@@ -942,11 +973,11 @@ TEST(ABTTranslate, GroupBasic) {
         "|           BinaryOp [Mult]\n"
         "|           |   EvalPath []\n"
         "|           |   |   Variable [scan_0]\n"
-        "|           |   PathGet [b]\n"
+        "|           |   PathGet [c]\n"
         "|           |   PathIdentity []\n"
         "|           EvalPath []\n"
         "|           |   Variable [scan_0]\n"
-        "|           PathGet [c]\n"
+        "|           PathGet [b]\n"
         "|           PathIdentity []\n"
         "Evaluation []\n"
         "|   BindBlock:\n"
@@ -954,6 +985,7 @@ TEST(ABTTranslate, GroupBasic) {
         "|           EvalPath []\n"
         "|           |   Variable [scan_0]\n"
         "|           PathGet [a]\n"
+        "|           PathTraverse []\n"
         "|           PathGet [b]\n"
         "|           PathIdentity []\n"
         "Scan [collection]\n"
@@ -1218,6 +1250,7 @@ TEST(ABTTranslate, UnwindAndGroup) {
         "|           EvalPath []\n"
         "|           |   Variable [embedProj_0]\n"
         "|           PathGet [a]\n"
+        "|           PathTraverse []\n"
         "|           PathGet [b]\n"
         "|           PathIdentity []\n"
         "Evaluation []\n"
@@ -2039,11 +2072,12 @@ TEST(ABTTranslate, GroupMultiKey) {
         "|           |   PathField [count]\n"
         "|           |   PathConstant []\n"
         "|           |   Variable [count_agg_0]\n"
+        "|           PathField [_id]\n"
         "|           PathComposeM []\n"
-        "|           |   PathField [_id.year]\n"
+        "|           |   PathField [year]\n"
         "|           |   PathConstant []\n"
         "|           |   Variable [groupByProj_1]\n"
-        "|           PathField [_id.isin]\n"
+        "|           PathField [isin]\n"
         "|           PathConstant []\n"
         "|           Variable [groupByProj_0]\n"
         "GroupBy []\n"
@@ -2292,13 +2326,15 @@ TEST(ABTTranslate, PartialIndex) {
 
     // The expression matches the pipeline.
     // By default the constant is translated as "int32".
-    auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
-        make<PathGet>("b",
-                      make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int32(2)))),
-        make<Variable>(scanProjName)));
-    ASSERT_TRUE(conversionResult._success);
-    ASSERT_FALSE(conversionResult._hasEmptyInterval);
-    ASSERT_FALSE(conversionResult._retainPredicate);
+    auto conversionResult = convertExprToPartialSchemaReq(
+        make<EvalFilter>(
+            make<PathGet>(
+                "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int32(2)))),
+            make<Variable>(scanProjName)),
+        true /*isFilterContext*/);
+    ASSERT_TRUE(conversionResult.has_value());
+    ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+    ASSERT_FALSE(conversionResult->_retainPredicate);
 
     Metadata metadata = {
         {{scanDefName,
@@ -2307,7 +2343,7 @@ TEST(ABTTranslate, PartialIndex) {
                            IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
                                            true /*multiKey*/,
                                            {DistributionType::Centralized},
-                                           std::move(conversionResult._reqMap)}}}}}}};
+                                           std::move(conversionResult->_reqMap)}}}}}}};
 
     ABT translated = translatePipeline(
         metadata, "[{$match: {'a': 3, 'b': 2}}]", scanProjName, scanDefName, prefixId);
@@ -2360,13 +2396,15 @@ TEST(ABTTranslate, PartialIndexNegative) {
     ProjectionName scanProjName = prefixId.getNextId("scan");
 
     // The expression does not match the pipeline.
-    auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
-        make<PathGet>("b",
-                      make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int32(2)))),
-        make<Variable>(scanProjName)));
-    ASSERT_TRUE(conversionResult._success);
-    ASSERT_FALSE(conversionResult._hasEmptyInterval);
-    ASSERT_FALSE(conversionResult._retainPredicate);
+    auto conversionResult = convertExprToPartialSchemaReq(
+        make<EvalFilter>(
+            make<PathGet>(
+                "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int32(2)))),
+            make<Variable>(scanProjName)),
+        true /*isFilterContext*/);
+    ASSERT_TRUE(conversionResult.has_value());
+    ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+    ASSERT_FALSE(conversionResult->_retainPredicate);
 
     Metadata metadata = {
         {{scanDefName,
@@ -2375,7 +2413,7 @@ TEST(ABTTranslate, PartialIndexNegative) {
                            IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
                                            true /*multiKey*/,
                                            {DistributionType::Centralized},
-                                           std::move(conversionResult._reqMap)}}}}}}};
+                                           std::move(conversionResult->_reqMap)}}}}}}};
 
     ABT translated = translatePipeline(
         metadata, "[{$match: {'a': 3, 'b': 3}}]", scanProjName, scanDefName, prefixId);
@@ -2461,11 +2499,11 @@ TEST(ABTTranslate, CommonExpressionElimination) {
         "|   BindBlock:\n"
         "|       [projGetPath_0]\n"
         "|           BinaryOp [Add]\n"
-        "|           |   EvalPath []\n"
-        "|           |   |   Variable [scan_0]\n"
-        "|           |   PathGet [b]\n"
-        "|           |   PathIdentity []\n"
-        "|           Const [1]\n"
+        "|           |   Const [1]\n"
+        "|           EvalPath []\n"
+        "|           |   Variable [scan_0]\n"
+        "|           PathGet [b]\n"
+        "|           PathIdentity []\n"
         "Scan [test]\n"
         "    BindBlock:\n"
         "        [scan_0]\n"
diff --git a/src/mongo/db/pipeline/aggregation_context_fixture.h b/src/mongo/db/pipeline/aggregation_context_fixture.h
index 76cc01a40c4..e7595382094 100644
--- a/src/mongo/db/pipeline/aggregation_context_fixture.h
+++ b/src/mongo/db/pipeline/aggregation_context_fixture.h
@@ -33,6 +33,7 @@
 #include <memory>
 
 #include "mongo/db/concurrency/locker_noop_client_observer.h"
+#include "mongo/db/pipeline/document_source.h"
 #include "mongo/db/pipeline/expression_context_for_test.h"
 #include "mongo/db/service_context_test_fixture.h"
 #include "mongo/unittest/temp_dir.h"
@@ -76,6 +77,14 @@ private:
     boost::intrusive_ptr<ExpressionContextForTest> _expCtx;
 };
 
+// A custom-deleter which disposes a DocumentSource when it goes out of scope.
+struct DocumentSourceDeleter {
+    void operator()(DocumentSource* docSource) {
+        docSource->dispose();
+        delete docSource;
+    }
+};
+
 class ServerlessAggregationContextFixture : public AggregationContextFixture {
 public:
     ServerlessAggregationContextFixture()
diff --git a/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp b/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp
index 15b5f8181f9..c847f32872e 100644
--- a/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp
+++ b/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp
@@ -28,6 +28,7 @@
  */
 
 #include "mongo/db/pipeline/change_stream_document_diff_parser.h"
+
 #include "mongo/db/field_ref.h"
 
 namespace mongo {
@@ -36,119 +37,203 @@ using doc_diff::Diff;
 using doc_diff::DocumentDiffReader;
 
 namespace {
-// If the terminal fieldname in the given FieldRef has an embedded dot, add it into the
-// dottedFieldNames vector.
-void appendIfDottedField(FieldRef* fieldRef, std::vector<Value>* dottedFieldNames) {
-    auto fieldName = fieldRef->getPart(fieldRef->numParts() - 1);
-    if (fieldName.find('.') != std::string::npos) {
-        dottedFieldNames->push_back(Value(fieldName));
+using DeltaUpdateDescription = change_stream_document_diff_parser::DeltaUpdateDescription;
+using FieldNameOrArrayIndex = stdx::variant<StringData, size_t>;
+
+/**
+ * DeltaUpdateDescriptionBuilder is responsible both for tracking the current path as we traverse
+ * the diff, and for populating a DeltaUpdateDescription reflecting the contents of that diff.
+ */
+struct DeltaUpdateDescriptionBuilder {
+    // Adds the specified entry to the 'updateFields' document in the DeltaUpdateDescription.
+    void addToUpdatedFields(FieldNameOrArrayIndex terminalField, Value updatedValue) {
+        DeltaUpdateDescriptionBuilder::TempAppendToPath tmpAppend(*this, terminalField);
+        _updatedFields.addField(_fieldRef.dottedField(), updatedValue);
+        _addToDisambiguatedPathsIfRequired();
+    }
+
+    // Adds the specified entry to the 'removedFields' vector in the DeltaUpdateDescription.
+    void addToRemovedFields(StringData terminalFieldName) {
+        DeltaUpdateDescriptionBuilder::TempAppendToPath tmpAppend(*this, terminalFieldName);
+        _updateDesc.removedFields.push_back(Value(_fieldRef.dottedField()));
+        _addToDisambiguatedPathsIfRequired();
+    }
+
+    // Adds the current path to the 'truncatedArrays' vector in the DeltaUpdateDescription.
+    void addToTruncatedArrays(int newSize) {
+        _updateDesc.truncatedArrays.push_back(
+            Value(Document{{"field", _fieldRef.dottedField()}, {"newSize", newSize}}));
+        _addToDisambiguatedPathsIfRequired();
+    }
+
+    // Called once the diff traversal is complete. Freezes and returns the DeltaUpdateDescription.
+    // It is an error to use the DeltaUpdateDescriptionBuilder again after this method is called.
+    DeltaUpdateDescription&& freezeDeltaUpdateDescription() {
+        _updateDesc.updatedFields = _updatedFields.freeze();
+        _updateDesc.disambiguatedPaths = _disambiguatedPaths.freeze();
+        return std::move(_updateDesc);
     }
-}
+
+    // Returns the last field in the current path.
+    StringData lastPart() const {
+        return _fieldRef.getPart(_fieldRef.numParts() - 1);
+    }
+
+    // Returns the number of fields in the current path.
+    FieldIndex numParts() const {
+        return _fieldRef.numParts();
+    }
+
+    // A structure used to add a scope-guarded field to the current path maintained by the builder.
+    // When this object goes out of scope, it will automatically remove the field from the path.
+    struct TempAppendToPath {
+        TempAppendToPath(DeltaUpdateDescriptionBuilder& builder, FieldNameOrArrayIndex field)
+            : _builder(builder) {
+            // Append the specified field to the builder's path.
+            _builder._appendFieldToPath(std::move(field));
+        }
+
+        ~TempAppendToPath() {
+            // Remove the last field from the path when we go out of scope.
+            _builder._removeLastFieldfromPath();
+        }
+
+    private:
+        DeltaUpdateDescriptionBuilder& _builder;
+    };
+
+private:
+    // A structure for tracking path ambiguity information. Maps 1:1 to fields in the FieldRef via
+    // the _pathAmbiguity list. The 'pathIsAmbiguous' bool indicates whether the path as a whole is
+    // ambiguous as of the corresponding field. Once a path is marked as ambiguous, all subsequent
+    // entries must also be marked as ambiguous.
+    struct AmbiguityInfo {
+        bool pathIsAmbiguous = false;
+        BSONType fieldType = BSONType::String;
+    };
+
+    // Append the given field to the path, and update the path ambiguity information accordingly.
+    void _appendFieldToPath(FieldNameOrArrayIndex field) {
+        // Resolve the FieldNameOrArrayIndex to one or the other, and append it to the path.
+        const bool isArrayIndex = stdx::holds_alternative<size_t>(field);
+        _fieldRef.appendPart(isArrayIndex ? std::to_string(stdx::get<size_t>(field))
+                                          : stdx::get<StringData>(field));
+
+        // Once a path has become ambiguous, it will remain so as new fields are added. If the final
+        // path component is marked ambiguous, retain that value and add the type of the new field.
+        const auto fieldType = (isArrayIndex ? BSONType::NumberInt : BSONType::String);
+        if (!_pathAmbiguity.empty() && _pathAmbiguity.back().pathIsAmbiguous) {
+            _pathAmbiguity.push_back({true /* pathIsAmbiguous */, fieldType});
+            return;
+        }
+        // If the field is a numeric string or contains an embedded dot, it's ambiguous. We record
+        // array indices so that we can reconstruct the path, but the presence of an array index is
+        // not itself sufficient to make the path ambiguous. We don't include numeric fields at the
+        // start of the path because those are unambiguous.
+        const bool isNumeric = (!isArrayIndex && _fieldRef.numParts() > 1 &&
+                                FieldRef::isNumericPathComponentStrict(lastPart()));
+        const bool isDotted =
+            (!isArrayIndex && !isNumeric && lastPart().find('.') != std::string::npos);
+
+        // Add to the field list, marking the path as ambiguous if this field is dotted or numeric.
+        _pathAmbiguity.push_back({(isNumeric || isDotted), fieldType});
+    }
+
+    // Remove the last field from the path, along with its entry in the ambiguity list.
+    void _removeLastFieldfromPath() {
+        _fieldRef.removeLastPart();
+        _pathAmbiguity.pop_back();
+    }
+
+    // If this path is marked as ambiguous, add a new entry for it to 'disambiguatedPaths'.
+    void _addToDisambiguatedPathsIfRequired() {
+        // The final entry in _pathAmbiguity will always be marked as ambiguous if any field in the
+        // path is ambiguous. If so, iterate over the list and create a vector of individual fields.
+        if (!_pathAmbiguity.empty() && _pathAmbiguity.back().pathIsAmbiguous) {
+            std::vector<Value> disambiguatedPath;
+            FieldIndex fieldNum = 0;
+            for (const auto& fieldInfo : _pathAmbiguity) {
+                auto fieldVal = _fieldRef.getPart(fieldNum++);
+                disambiguatedPath.push_back(fieldInfo.fieldType == BSONType::NumberInt
+                                                ? Value(std::stoi(fieldVal.toString()))
+                                                : Value(fieldVal));
+            }
+            // Add the vector of individual fields into the 'disambiguatedPaths' document. The name
+            // of the field matches the entry in updatedFields, removedFields, or truncatedArrays.
+            _disambiguatedPaths.addField(_fieldRef.dottedField(),
+                                         Value(std::move(disambiguatedPath)));
+        }
+    }
+
+    friend struct DeltaUpdateDescriptionBuilder::TempAppendToPath;
+
+    // Each element in the _pathAmbiguity list annotates the field at the corresponding index in the
+    // _fieldRef, indicating the type of that field and whether the path is ambiguous at that point.
+    std::list<AmbiguityInfo> _pathAmbiguity;
+    FieldRef _fieldRef;
+
+    DeltaUpdateDescription _updateDesc;
+    MutableDocument _updatedFields;
+    MutableDocument _disambiguatedPaths;
+};
 
 void buildUpdateDescriptionWithDeltaOplog(
     stdx::variant<DocumentDiffReader*, ArrayDiffReader*> reader,
-    FieldRef* fieldRef,
-    MutableDocument* updatedFields,
-    std::vector<Value>* removedFields,
-    std::vector<Value>* truncatedArrays,
-    MutableDocument* arrayIndices,
-    MutableDocument* dottedFields) {
+    DeltaUpdateDescriptionBuilder* builder,
+    boost::optional<FieldNameOrArrayIndex> currentSubField) {
+
+    // Append the field name associated with the current level of the diff to the path.
+    boost::optional<DeltaUpdateDescriptionBuilder::TempAppendToPath> tempAppend;
+    if (currentSubField) {
+        tempAppend.emplace(*builder, std::move(*currentSubField));
+    }
 
     stdx::visit(
         visit_helper::Overloaded{
             [&](DocumentDiffReader* reader) {
-                // Used to track dotted fieldnames at the current level of the diff.
-                std::vector<Value> currentDottedFieldNames;
-
                 boost::optional<BSONElement> nextMod;
                 while ((nextMod = reader->nextUpdate()) || (nextMod = reader->nextInsert())) {
-                    FieldRef::FieldRefTempAppend tmpAppend(*fieldRef,
-                                                           nextMod->fieldNameStringData());
-                    updatedFields->addField(fieldRef->dottedField(), Value(*nextMod));
-                    appendIfDottedField(fieldRef, &currentDottedFieldNames);
+                    builder->addToUpdatedFields(nextMod->fieldNameStringData(), Value(*nextMod));
                 }
 
-                boost::optional<StringData> nextDelete;
-                while ((nextDelete = reader->nextDelete())) {
-                    FieldRef::FieldRefTempAppend tmpAppend(*fieldRef, *nextDelete);
-                    removedFields->push_back(Value(fieldRef->dottedField()));
-                    appendIfDottedField(fieldRef, &currentDottedFieldNames);
+                while (auto nextDelete = reader->nextDelete()) {
+                    builder->addToRemovedFields(*nextDelete);
                 }
 
-                boost::optional<
-                    std::pair<StringData, stdx::variant<DocumentDiffReader, ArrayDiffReader>>>
-                    nextSubDiff;
-                while ((nextSubDiff = reader->nextSubDiff())) {
-                    FieldRef::FieldRefTempAppend tmpAppend(*fieldRef, nextSubDiff->first);
-                    appendIfDottedField(fieldRef, &currentDottedFieldNames);
-
+                while (auto nextSubDiff = reader->nextSubDiff()) {
                     stdx::variant<DocumentDiffReader*, ArrayDiffReader*> nextReader;
                     stdx::visit(visit_helper::Overloaded{[&nextReader](auto& reader) {
                                     nextReader = &reader;
                                 }},
                                 nextSubDiff->second);
-                    buildUpdateDescriptionWithDeltaOplog(nextReader,
-                                                         fieldRef,
-                                                         updatedFields,
-                                                         removedFields,
-                                                         truncatedArrays,
-                                                         arrayIndices,
-                                                         dottedFields);
-                }
-
-                // Now that we have iterated through all fields at this level of the diff, add any
-                // dotted fieldnames we encountered into the 'dottedFields' output document.
-                if (!currentDottedFieldNames.empty()) {
-                    dottedFields->addField(fieldRef->dottedField(),
-                                           Value(std::move(currentDottedFieldNames)));
+                    buildUpdateDescriptionWithDeltaOplog(
+                        nextReader, builder, {{nextSubDiff->first}});
                 }
             },
 
             [&](ArrayDiffReader* reader) {
-                // ArrayDiffReader can not be the root of the diff object, so 'fieldRef' should not
-                // be empty.
-                invariant(!fieldRef->empty());
-
-                const auto newSize = reader->newSize();
-                if (newSize) {
-                    const int sz = *newSize;
-                    truncatedArrays->push_back(
-                        Value(Document{{"field", fieldRef->dottedField()}, {"newSize", sz}}));
+                // Cannot be the root of the diff object, so 'fieldRef' should not be empty.
+                tassert(6697700, "Invalid diff or parsing error", builder->numParts() > 0);
+
+                // We don't need to add a fieldname, since we already descended into the array diff.
+                if (auto newSize = reader->newSize()) {
+                    builder->addToTruncatedArrays(*newSize);
                 }
 
-                // Used to track the array indices at the current level of the diff.
-                std::vector<Value> currentArrayIndices;
                 for (auto nextMod = reader->next(); nextMod; nextMod = reader->next()) {
-                    const auto& fieldName = std::to_string(nextMod->first);
-                    FieldRef::FieldRefTempAppend tmpAppend(*fieldRef, fieldName);
-
-                    currentArrayIndices.push_back(Value(static_cast<int>(nextMod->first)));
-
                     stdx::visit(
                         visit_helper::Overloaded{
                             [&](BSONElement elem) {
-                                updatedFields->addField(fieldRef->dottedField(), Value(elem));
+                                builder->addToUpdatedFields(nextMod->first, Value(elem));
                             },
 
                             [&](auto& nextReader) {
-                                buildUpdateDescriptionWithDeltaOplog(&nextReader,
-                                                                     fieldRef,
-                                                                     updatedFields,
-                                                                     removedFields,
-                                                                     truncatedArrays,
-                                                                     arrayIndices,
-                                                                     dottedFields);
+                                buildUpdateDescriptionWithDeltaOplog(
+                                    &nextReader, builder, {{nextMod->first}});
                             },
                         },
                         nextMod->second);
                 }
-
-                // Now that we have iterated through all fields at this level of the diff, add all
-                // the array indices we encountered into the 'arrayIndices' output document.
-                if (!currentArrayIndices.empty()) {
-                    arrayIndices->addField(fieldRef->dottedField(),
-                                           Value(std::move(currentArrayIndices)));
-                }
             },
         },
         reader);
@@ -160,25 +245,12 @@ void buildUpdateDescriptionWithDeltaOplog(
 namespace change_stream_document_diff_parser {
 
 DeltaUpdateDescription parseDiff(const Diff& diff) {
-    DeltaUpdateDescription updatedDesc;
-    MutableDocument updatedFields;
-    MutableDocument dottedFields;
-    MutableDocument arrayIndices;
+    DeltaUpdateDescriptionBuilder builder;
     DocumentDiffReader docReader(diff);
-    stdx::variant<DocumentDiffReader*, ArrayDiffReader*> reader = &docReader;
-    FieldRef path;
-    buildUpdateDescriptionWithDeltaOplog(reader,
-                                         &path,
-                                         &updatedFields,
-                                         &updatedDesc.removedFields,
-                                         &updatedDesc.truncatedArrays,
-                                         &arrayIndices,
-                                         &dottedFields);
-    updatedDesc.updatedFields = updatedFields.freeze();
-    updatedDesc.arrayIndices = arrayIndices.freeze();
-    updatedDesc.dottedFields = dottedFields.freeze();
-
-    return updatedDesc;
+
+    buildUpdateDescriptionWithDeltaOplog(&docReader, &builder, boost::none);
+
+    return builder.freezeDeltaUpdateDescription();
 }
 
 }  // namespace change_stream_document_diff_parser
diff --git a/src/mongo/db/pipeline/change_stream_document_diff_parser.h b/src/mongo/db/pipeline/change_stream_document_diff_parser.h
index e9a2864fe91..5e871c177e6 100644
--- a/src/mongo/db/pipeline/change_stream_document_diff_parser.h
+++ b/src/mongo/db/pipeline/change_stream_document_diff_parser.h
@@ -36,11 +36,14 @@
 namespace mongo {
 namespace change_stream_document_diff_parser {
 struct DeltaUpdateDescription {
+    DeltaUpdateDescription(const DeltaUpdateDescription& other) = delete;
+    DeltaUpdateDescription(DeltaUpdateDescription&& other) = default;
+    DeltaUpdateDescription() = default;
+
     Document updatedFields;
     std::vector<Value> removedFields;
     std::vector<Value> truncatedArrays;
-    Document arrayIndices;
-    Document dottedFields;
+    Document disambiguatedPaths;
 };
 
 /**
diff --git a/src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp b/src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp
index 0f5b3506703..fa2bae02a6f 100644
--- a/src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp
+++ b/src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp
@@ -30,7 +30,6 @@
 
 #include "mongo/db/pipeline/change_stream_document_diff_parser.h"
 
-
 #include "mongo/bson/json.h"
 #include "mongo/db/exec/document_value/document.h"
 #include "mongo/db/exec/document_value/document_value_test_util.h"
@@ -41,110 +40,218 @@
 namespace mongo {
 namespace {
 
-
-TEST(ChangeStreamDocumentDiffParserTest, DottedFieldsInsideArrays) {
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesDottedFields) {
     BSONObj diff = fromjson(
         "{"
-        "   'sarr.F.i.eld..': {a: true, l: 10,"
-        "        u0: 1,"
-        "        u1: {'a.b.c': {'a.b': 1}},"
-        "        s6: {u: {"
-        "            'a.b.d': {'a.b.c': 3},"
-        "            'a.b': {d: {'a.b': 1}}"
-        "        }}"
+        "   u: {'a.b': 1},"
+        "   'sc.d.': {"
+        "        u: {'e': 1, 'f.g': 1},"
+        "        'sh': {"
+        "            u: {'i.j': 1}"
+        "        }"
+        "    },"
+        "    'sk': {"
+        "        u: {'l.m': 1}"
         "    }"
         "}");
 
     auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
 
-    ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields,
-                       Document(fromjson("{'arr.F.i.eld...0': 1, 'arr.F.i.eld...1': {'a.b.c': "
-                                         "{'a.b': 1}}, 'arr.F.i.eld...6.a.b.d': {'a.b.c': 3}, "
-                                         "'arr.F.i.eld...6.a.b': {d: {'a.b': 1}}}")));
+    ASSERT_DOCUMENT_EQ(
+        parsedDiff.updatedFields,
+        Document(fromjson("{'a.b': 1, 'c.d..e': 1, 'c.d..f.g': 1, 'c.d..h.i.j': 1, 'k.l.m': 1}")));
 
     ASSERT_DOCUMENT_EQ(
-        parsedDiff.dottedFields,
-        Document(fromjson("{'arr.F.i.eld...6': ['a.b.d', 'a.b'], '': ['arr.F.i.eld..']}")));
+        parsedDiff.disambiguatedPaths,
+        Document(fromjson("{'a.b': ['a.b'], 'c.d..e': ['c.d.', 'e'], 'c.d..f.g': ['c.d.', 'f.g'], "
+                          "'c.d..h.i.j': ['c.d.', 'h', 'i.j'], 'k.l.m': ['k', 'l.m']}")));
 
     ASSERT(parsedDiff.removedFields.empty());
-    ASSERT_EQ(parsedDiff.truncatedArrays.size(), 1);
-    ASSERT_VALUE_EQ(parsedDiff.truncatedArrays[0],
-                    Value(fromjson("{field: 'arr.F.i.eld..', newSize: 10}")));
+    ASSERT(parsedDiff.truncatedArrays.empty());
+}
 
-    ASSERT_DOCUMENT_EQ(parsedDiff.arrayIndices, Document(fromjson("{'arr.F.i.eld..': [0, 1, 6]}")));
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesNumericFields) {
+    BSONObj diff = fromjson(
+        "{"
+        "   'sa': {"
+        "        u: {'0': 1}"
+        "    }"
+        "}");
+
+    auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+    ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'a.0': 1}")));
+
+    ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths, Document(fromjson("{'a.0': ['a', '0']}")));
+
+    ASSERT(parsedDiff.removedFields.empty());
+    ASSERT(parsedDiff.truncatedArrays.empty());
 }
 
-TEST(ChangeStreamDocumentDiffParserTest, DottedFieldsInsideObjects) {
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesNumericFieldsFromArrayIndices) {
     BSONObj diff = fromjson(
         "{"
-        "   'sobject.F.i.eld..': {"
-        "        u: {'0.0.0': 1, '1.1.1': {'0.0': 1}},"
-        "        s6: {'s7.8': {'s9.10': {"
-        "            u: {'a.b.d': {'a.b.c': 3}}"
-        "        }}}"
+        "   'sa': {"
+        "        's0': {a: true, u0: 1}"
         "    }"
         "}");
 
     auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
 
-    ASSERT_DOCUMENT_EQ(
-        parsedDiff.updatedFields,
-        Document(fromjson("{'object.F.i.eld...0.0.0': 1, 'object.F.i.eld...1.1.1': {'0.0': 1},"
-                          "'object.F.i.eld...6.7.8.9.10.a.b.d': {'a.b.c': 3} }")));
+    ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'a.0.0': 1}")));
 
-    ASSERT_DOCUMENT_EQ(parsedDiff.dottedFields,
-                       Document(fromjson("{'object.F.i.eld...6.7.8.9.10': ['a.b.d'], "
-                                         "'object.F.i.eld...6.7.8': ['9.10'], "
-                                         "'object.F.i.eld...6': ['7.8'], "
-                                         "'object.F.i.eld..': ['0.0.0', '1.1.1'],"
-                                         "'': ['object.F.i.eld..']}")));
+    ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths,
+                       Document(fromjson("{'a.0.0': ['a', '0', 0]}")));
 
     ASSERT(parsedDiff.removedFields.empty());
     ASSERT(parsedDiff.truncatedArrays.empty());
-    ASSERT(parsedDiff.arrayIndices.empty());
 }
 
-TEST(ChangeStreamDocumentDiffParserTest, PathToArrayFields) {
+TEST(ChangeStreamDocumentDiffParserTest, DoesNotDisambiguateNumericFieldAtRootOfDocument) {
     BSONObj diff = fromjson(
         "{"
-        "   'sarr.F.i.eld..': {a: true, l: 10,"
-        "        u0: 1,"
-        "        s6: {a: true, s1: {"
-        "            's0.0': {a: true, u0: 1}"
-        "        }}"
+        "   u: {'0': 1}"
+        "}");
+
+    auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+    ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'0': 1}")));
+
+    // A numeric field at the root of the document is unambiguous; it must be a fieldname and cannot
+    // be an array index, since by definition the latter must index a parent field.
+    ASSERT(parsedDiff.disambiguatedPaths.empty());
+
+    ASSERT(parsedDiff.removedFields.empty());
+    ASSERT(parsedDiff.truncatedArrays.empty());
+}
+
+TEST(ChangeStreamDocumentDiffParserTest, DoesNotDisambiguateNumericFieldWithLeadingZeroes) {
+    BSONObj diff = fromjson(
+        "{"
+        "   'sa': {u: {'01': 1}}"
+        "}");
+
+    auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+    ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'a.01': 1}")));
+
+    // A numeric field with leading zeroes is unambiguous; it must be a fieldname and cannot be an
+    // array index, since array indexes are simple integers that do not have leading zeroes.
+    ASSERT(parsedDiff.disambiguatedPaths.empty());
+
+    ASSERT(parsedDiff.removedFields.empty());
+    ASSERT(parsedDiff.truncatedArrays.empty());
+}
+
+TEST(ChangeStreamDocumentDiffParserTest, DoesNotDisambiguateIfOnlyArrayIndicesPresent) {
+    BSONObj diff = fromjson(
+        "{"
+        "   'sa': {a: true,"
+        "        s0: {u: {'b': 1}}"
         "    }"
         "}");
 
     auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
 
-    ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields,
-                       Document(fromjson("{'arr.F.i.eld...0': 1, 'arr.F.i.eld...6.1.0.0.0': 1}")));
+    ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'a.0.b': 1}")));
+
+    ASSERT(parsedDiff.disambiguatedPaths.empty());
+    ASSERT(parsedDiff.removedFields.empty());
+    ASSERT(parsedDiff.truncatedArrays.empty());
+}
+
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesRemovedFields) {
+    BSONObj diff = fromjson(
+        "{"
+        "   d: {'a.b': false},"
+        "   'sc': {"
+        "        d: {'0': false}"
+        "    }"
+        "}");
+
+    auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+    ASSERT(parsedDiff.removedFields.size() == 2);
+    ASSERT_VALUE_EQ(parsedDiff.removedFields[0], Value("a.b"_sd));
+    ASSERT_VALUE_EQ(parsedDiff.removedFields[1], Value("c.0"_sd));
+
+    ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths,
+                       Document(fromjson("{'a.b': ['a.b'], 'c.0': ['c', '0']}")));
+
+    ASSERT(parsedDiff.updatedFields.empty());
+    ASSERT(parsedDiff.truncatedArrays.empty());
+}
 
-    ASSERT_DOCUMENT_EQ(parsedDiff.dottedFields,
-                       Document(fromjson("{'arr.F.i.eld...6.1': ['0.0'], '': ['arr.F.i.eld..']}")));
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesTruncatedArrays) {
+    BSONObj diff = fromjson(
+        "{"
+        "   'sa.b': {a: true, l: 5},"
+        "    'sc': {"
+        "        's0': {a: true, l: 5}"
+        "    }"
+        "}");
+
+    auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+    ASSERT(parsedDiff.truncatedArrays.size() == 2);
+    ASSERT_VALUE_EQ(parsedDiff.truncatedArrays[0], Value(fromjson("{field: 'a.b', newSize: 5}")));
+    ASSERT_VALUE_EQ(parsedDiff.truncatedArrays[1], Value(fromjson("{field: 'c.0', newSize: 5}")));
+
+    ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths,
+                       Document(fromjson("{'a.b': ['a.b'], 'c.0': ['c', '0']}")));
 
+    ASSERT(parsedDiff.updatedFields.empty());
     ASSERT(parsedDiff.removedFields.empty());
+}
+
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesCombinationOfAmbiguousFields) {
+    // Array and numeric field within dotted parent, dotted and numeric fields within array, dotted
+    // field and array within numeric parent.
+    BSONObj diff = fromjson(
+        "{"
+        "   'sa.b': {a: true,"
+        "        's0': {u: {'1': 1}}"
+        "    },"
+        "    'sc': {a: true,"
+        "        's0': {u: {'d.e': 1}},"
+        "        's1': {u: {'2': 1}}"
+        "    },"
+        "    'sf': {"
+        "        's1': {"
+        "            u: {'g.h': 1},"
+        "            's2': {a: true,"
+        "                u3: 1,"
+        "                s4: {u: {'5': 1}}"
+        "            }"
+        "        }"
+        "    }"
+        "}");
+
+    auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
 
-    ASSERT_EQ(parsedDiff.truncatedArrays.size(), 1);
-    ASSERT_VALUE_EQ(parsedDiff.truncatedArrays[0],
-                    Value(fromjson("{field: 'arr.F.i.eld..', newSize: 10}")));
+    ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields,
+                       Document(fromjson("{'a.b.0.1': 1, 'c.0.d.e': 1, 'c.1.2': 1, 'f.1.g.h': 1, "
+                                         "'f.1.2.3': 1, 'f.1.2.4.5': 1}")));
 
     ASSERT_DOCUMENT_EQ(
-        parsedDiff.arrayIndices,
-        Document(fromjson(
-            "{'arr.F.i.eld...6.1.0.0': [0], 'arr.F.i.eld...6': [1], 'arr.F.i.eld..': [0, 6]}")));
+        parsedDiff.disambiguatedPaths,
+        Document(fromjson("{'a.b.0.1': ['a.b', 0, '1'], 'c.0.d.e': ['c', 0, 'd.e'], 'c.1.2': ['c', "
+                          "1, '2'], 'f.1.g.h': ['f', '1', 'g.h'], 'f.1.2.3': ['f', '1', '2', 3], "
+                          "'f.1.2.4.5': ['f', '1', '2', 4, '5']}")));
+
+    ASSERT(parsedDiff.removedFields.empty());
+    ASSERT(parsedDiff.truncatedArrays.empty());
 }
 
-TEST(ChangeStreamDocumentDiffParserTest, WithDuplicateFieldsInDiff) {
+TEST(ChangeStreamDocumentDiffParserTest, DoesNotFullyDisambiguateWithDuplicateFieldsInDiff) {
     BSONObj diff = fromjson("{u: {'a.b' : 2}, sa : {u: {b: 1 }}}");
     auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
 
     auto expectedUpdateFields = Document{{"a.b", 2}, {"a.b", 1}};
     ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, expectedUpdateFields);
-    ASSERT_DOCUMENT_EQ(parsedDiff.dottedFields, Document(fromjson("{'': ['a.b']}")));
+    ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths, Document(fromjson("{'a.b': ['a.b']}")));
     ASSERT(parsedDiff.removedFields.empty());
     ASSERT(parsedDiff.truncatedArrays.empty());
-    ASSERT(parsedDiff.arrayIndices.empty());
 }
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/pipeline/change_stream_event_transform.cpp b/src/mongo/db/pipeline/change_stream_event_transform.cpp
index 7b21e860899..f41ae425423 100644
--- a/src/mongo/db/pipeline/change_stream_event_transform.cpp
+++ b/src/mongo/db/pipeline/change_stream_event_transform.cpp
@@ -213,21 +213,18 @@ Document ChangeStreamDefaultEventTransformation::applyTransformation(const Docum
                 if (_changeStreamSpec.getShowRawUpdateDescription()) {
                     updateDescription = input[repl::OplogEntry::kObjectFieldName];
                 } else {
-                    const auto populateSpecialFields = _changeStreamSpec.getShowExpandedEvents() &&
+                    const auto showDisambiguatedPaths = _changeStreamSpec.getShowExpandedEvents() &&
                         feature_flags::gFeatureFlagChangeStreamsFurtherEnrichedEvents.isEnabled(
                             serverGlobalParams.featureCompatibility);
                     const auto& deltaDesc = change_stream_document_diff_parser::parseDiff(
                         diffObj.getDocument().toBson());
 
-                    updateDescription = Value(
-                        Document{{"updatedFields", deltaDesc.updatedFields},
-                                 {"removedFields", std::move(deltaDesc.removedFields)},
-                                 {"truncatedArrays", std::move(deltaDesc.truncatedArrays)},
-                                 {"specialFields",
-                                  populateSpecialFields
-                                      ? Value(Document{{"arrayIndices", deltaDesc.arrayIndices},
-                                                       {"dottedFields", deltaDesc.dottedFields}})
-                                      : Value()}});
+                    updateDescription = Value(Document{
+                        {"updatedFields", deltaDesc.updatedFields},
+                        {"removedFields", std::move(deltaDesc.removedFields)},
+                        {"truncatedArrays", std::move(deltaDesc.truncatedArrays)},
+                        {"disambiguatedPaths",
+                         showDisambiguatedPaths ? Value(deltaDesc.disambiguatedPaths) : Value()}});
                 }
             } else if (id.missing()) {
                 operationType = DocumentSourceChangeStream::kUpdateOpType;
diff --git a/src/mongo/db/pipeline/change_stream_event_transform_test.cpp b/src/mongo/db/pipeline/change_stream_event_transform_test.cpp
index 3123c26d160..e0060b7d7c7 100644
--- a/src/mongo/db/pipeline/change_stream_event_transform_test.cpp
+++ b/src/mongo/db/pipeline/change_stream_event_transform_test.cpp
@@ -92,8 +92,7 @@ TEST(ChangeStreamEventTransformTest, TestDefaultUpdateTransform) {
             Document{{"updatedFields", Document{{"y", 2}}},
                      {"removedFields", std::vector<Value>()},
                      {"truncatedArrays", std::vector<Value>()},
-                     {"specialFields",
-                      Document{{"arrayIndices", Document()}, {"dottedFields", Document()}}}},
+                     {"disambiguatedPaths", Document{}}},
         },
     };
 
@@ -197,25 +196,15 @@ TEST(ChangeStreamEventTransformTest, TestUpdateTransformWithTenantId) {
     // in the oplog entry. It should still not be a part of the db name in the change event.
     RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
 
-    // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
-    // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
-    // including the tenantId as the db prefix in the OplogEntry's "ns" field. Until SERVER-66019 is
-    // complete, the tenantId will be included in both the "tid" field and "ns" fields in serialized
-    // oplog entries, because serializing NamespaceString currently will include the tenantId.
-    auto oplogEntry = BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
-                                << "u"
-                                << "ns"
-                                << "unittests.serverless_change_stream"
-                                << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid()
-                                << "o" << BSON("$v" << 2 << "diff" << BSON("u" << BSON("y" << 2)))
-                                << "o2" << documentKey.toBson());
-
-    DocumentSourceChangeStreamSpec spec;
-    spec.setStartAtOperationTime(kDefaultTs);
-    ChangeStreamEventTransformer transformer(
-        make_intrusive<ExpressionContextForTest>(nssWithTenant), spec);
+    auto oplogEntry = makeOplogEntry(repl::OpTypeEnum::kUpdate,  // op type
+                                     nssWithTenant,              // namespace
+                                     BSON("$v" << 2 << "diff" << BSON("u" << BSON("y" << 2))),  // o
+                                     testUuid(),           // uuid
+                                     boost::none,          // fromMigrate
+                                     documentKey.toBson()  // o2
+    );
 
-    changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+    changeStreamDoc = applyTransformation(updateField, nssWithTenant);
     outputNs = changeStreamDoc[DocumentSourceChangeStream::kNamespaceField].getDocument();
 
     ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
@@ -261,30 +250,14 @@ TEST(ChangeStreamEventTransformTest, TestRenameTransformWithTenantId) {
     // in the oplog entry. It should still not be a part of the db name in the change event.
     RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
 
-    // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
-    // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
-    // including the tenantId as the db prefix in the OplogEntry's "ns", "renameCollection", and
-    // "to" fields. Until SERVER-66019 is complete, the tenantId will be included in both the "tid"
-    // field and these 3 fields in serialized oplog entries, because serializing NamespaceString
-    // currently will include the tenantId.
-    auto oplogEntry =
-        BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
-                  << "c"
-                  << "ns"
-                  << "unittests.$cmd"
-                  << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid() << "o"
-                  << BSON("renameCollection"
-                          << "unittests.serverless_change_stream"
-                          << "to"
-                          << "unittests.rename_coll"));
-
-    DocumentSourceChangeStreamSpec spec;
-    spec.setStartAtOperationTime(kDefaultTs);
-    spec.setShowExpandedEvents(true);
-    ChangeStreamEventTransformer transformer(make_intrusive<ExpressionContextForTest>(renameFrom),
-                                             spec);
+    auto oplogEntry = makeOplogEntry(
+        repl::OpTypeEnum::kCommand,  // op type
+        renameFrom.getCommandNS(),   // namespace
+        BSON("renameCollection" << renameFrom.toString() << "to" << renameTo.toString()),  // o
+        testUuid()                                                                         // uuid
+    );
 
-    changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+    changeStreamDoc = applyTransformation(oplogEntry, renameFrom);
     renameDoc = Document{
         {DocumentSourceChangeStream::kNamespaceField,
          changeStreamDoc.getField(DocumentSourceChangeStream::kNamespaceField)},
@@ -322,24 +295,13 @@ TEST(ChangeStreamEventTransformTest, TestDropDatabaseTransformWithTenantId) {
     // in the oplog entry. It should still not be a part of the db name in the change event.
     RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
 
-    // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
-    // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
-    // including the tenantId as the db prefix in the OplogEntry's "ns" field Until SERVER-66019 is
-    // complete, the tenantId will be included in both the "tid" and "ns" fields in serialized oplog
-    // entries, because serializing NamespaceString currently will include the tenantId.
-    auto oplogEntry = BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
-                                << "c"
-                                << "ns"
-                                << "unittests.$cmd"
-                                << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid()
-                                << "o" << BSON("dropDatabase" << 1));
-
-    DocumentSourceChangeStreamSpec spec;
-    spec.setStartAtOperationTime(kDefaultTs);
-    ChangeStreamEventTransformer transformer(make_intrusive<ExpressionContextForTest>(dbToDrop),
-                                             spec);
+    auto oplogEntry = makeOplogEntry(repl::OpTypeEnum::kCommand,  // op type
+                                     dbToDrop.getCommandNS(),     // namespace
+                                     BSON("dropDatabase" << 1),   // o
+                                     testUuid()                   // uuid
+    );
 
-    changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+    changeStreamDoc = applyTransformation(oplogEntry, dbToDrop);
     outputNs = changeStreamDoc[DocumentSourceChangeStream::kNamespaceField].getDocument();
 
     ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
@@ -372,25 +334,13 @@ TEST(ChangeStreamEventTransformTest, TestCreateTransformWithTenantId) {
     // in the oplog entry. It should still not be a part of the db name in the change event.
     RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
 
-    // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
-    // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
-    // including the tenantId as the db prefix in the OplogEntry's "ns" field Until SERVER-66019 is
-    // complete, the tenantId will be included in both the "tid" and "ns" fields in serialized oplog
-    // entries, because serializing NamespaceString currently will include the tenantId.
-    auto oplogEntry = BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
-                                << "c"
-                                << "ns"
-                                << "unittests.$cmd"
-                                << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid()
-                                << "o" << BSON("create" << nssWithTenant.coll()));
-
-    DocumentSourceChangeStreamSpec spec;
-    spec.setStartAtOperationTime(kDefaultTs);
-    spec.setShowExpandedEvents(true);
-    ChangeStreamEventTransformer transformer(
-        make_intrusive<ExpressionContextForTest>(nssWithTenant), spec);
+    auto oplogEntry = makeOplogEntry(repl::OpTypeEnum::kCommand,              // op type
+                                     nssWithTenant.getCommandNS(),            // namespace
+                                     BSON("create" << nssWithTenant.coll()),  // o
+                                     testUuid()                               // uuid
+    );
 
-    changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+    changeStreamDoc = applyTransformation(oplogEntry, nssWithTenant);
     outputNs = changeStreamDoc[DocumentSourceChangeStream::kNamespaceField].getDocument();
 
     ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
@@ -424,37 +374,19 @@ TEST(ChangeStreamEventTransformTest, TestCreateViewTransformWithTenantId) {
 
     ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
 
-
     // Now set featureFlagRequireTenantId, so we expect the tenantId to be in a separate "tid" field
     // in the oplog entry. It should still not be a part of the db name in the change event.
     RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
 
-    // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
-    // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
-    // including the tenantId as the db prefix in the OplogEntry's "ns" and "o._id" fields. Until
-    // SERVER-66019 is complete, the tenantId will be included in both the "tid" field and these 2
-    // fields in serialized oplog entries, because serializing NamespaceString currently will
-    // include the tenantId.
-    auto oplogEntry = BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
-                                << "i"
-                                << "ns"
-                                << "viewDB.system.views"
-                                << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid()
-                                << "o"
-                                << BSON("_id"
-                                        << "viewDB.view.name"
-                                        << "viewOn"
-                                        << "baseColl"
-                                        << "pipeline" << viewPipeline));
-
-    DocumentSourceChangeStreamSpec spec;
-    spec.setStartAtOperationTime(kDefaultTs);
-    ChangeStreamEventTransformer transformer(
-        make_intrusive<ExpressionContextForTest>(
-            NamespaceString::makeCollectionlessAggregateNSS(viewNss.dbName())),
-        spec);
+    auto oplogEntry = makeOplogEntry(repl::OpTypeEnum::kInsert,  // op type
+                                     systemViewNss,              // namespace
+                                     BSON("_id" << viewNss.toString() << "viewOn"
+                                                << "baseColl"
+                                                << "pipeline" << viewPipeline),  // o
+                                     testUuid());
 
-    changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+    changeStreamDoc = applyTransformation(
+        oplogEntry, NamespaceString::makeCollectionlessAggregateNSS(viewNss.dbName()));
     outputNs = changeStreamDoc[DocumentSourceChangeStream::kNamespaceField].getDocument();
 
     ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
diff --git a/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp b/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp
index f153a30818f..e4cbb6032ae 100644
--- a/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp
+++ b/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp
@@ -32,35 +32,49 @@
 
 #include "mongo/db/pipeline/change_stream_pre_image_helpers.h"
 
+#include "mongo/base/error_codes.h"
+#include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog_raii.h"
 #include "mongo/db/concurrency/lock_manager_defs.h"
 #include "mongo/db/concurrency/locker.h"
-#include "mongo/db/dbhelpers.h"
+#include "mongo/db/curop.h"
 #include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
 #include "mongo/util/assert_util.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
 
-
 namespace mongo {
 void writeToChangeStreamPreImagesCollection(OperationContext* opCtx,
                                             const ChangeStreamPreImage& preImage) {
-    const auto collectionNamespace = NamespaceString::kChangeStreamPreImagesNamespace;
+    tassert(6646200,
+            "Expected to be executed in a write unit of work",
+            opCtx->lockState()->inAWriteUnitOfWork());
     tassert(5869404,
             str::stream() << "Invalid pre-image document applyOpsIndex: "
                           << preImage.getId().getApplyOpsIndex(),
             preImage.getId().getApplyOpsIndex() >= 0);
 
-    // This lock acquisition can block on a stronger lock held by another operation modifying the
-    // pre-images collection. There are no known cases where an operation holding an exclusive lock
-    // on the pre-images collection also waits for oplog visibility.
+    // This lock acquisition can block on a stronger lock held by another operation modifying
+    // the pre-images collection. There are no known cases where an operation holding an
+    // exclusive lock on the pre-images collection also waits for oplog visibility.
     AllowLockAcquisitionOnTimestampedUnitOfWork allowLockAcquisition(opCtx->lockState());
-    AutoGetCollection preimagesCollectionRaii(opCtx, collectionNamespace, LockMode::MODE_IX);
-    UpdateResult res = Helpers::upsert(opCtx, collectionNamespace.toString(), preImage.toBSON());
+    AutoGetCollection preImagesCollectionRaii(
+        opCtx, NamespaceString::kChangeStreamPreImagesNamespace, LockMode::MODE_IX);
+    auto& changeStreamPreImagesCollection = preImagesCollectionRaii.getCollection();
+    tassert(6646201,
+            "The change stream pre-images collection is not present",
+            changeStreamPreImagesCollection);
+
+    // Inserts into the change stream pre-images collection are not replicated.
+    repl::UnreplicatedWritesBlock unreplicatedWritesBlock{opCtx};
+    const auto insertionStatus = changeStreamPreImagesCollection->insertDocument(
+        opCtx, InsertStatement{preImage.toBSON()}, &CurOp::get(opCtx)->debug());
     tassert(5868601,
-            str::stream() << "Failed to insert a new document into the pre-images collection: ts: "
-                          << preImage.getId().getTs().toString()
-                          << ", applyOpsIndex: " << preImage.getId().getApplyOpsIndex(),
-            !res.existing && !res.upsertedId.isEmpty());
+            str::stream() << "Attempted to insert a duplicate document into the pre-images "
+                             "collection. Pre-image id: "
+                          << preImage.getId().toBSON().toString(),
+            insertionStatus != ErrorCodes::DuplicateKey);
+    uassertStatusOK(insertionStatus);
 }
 }  // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp
index 8b60a31637c..d2a5563c7c7 100644
--- a/src/mongo/db/pipeline/dependencies.cpp
+++ b/src/mongo/db/pipeline/dependencies.cpp
@@ -37,6 +37,13 @@
 
 namespace mongo {
 
+std::list<std::string> DepsTracker::sortedFields() const {
+    // Use a special comparator to put parent fieldpaths before their children.
+    std::list<std::string> sortedFields(fields.begin(), fields.end());
+    sortedFields.sort(PathPrefixComparator());
+    return sortedFields;
+}
+
 BSONObj DepsTracker::toProjectionWithoutMetadata(
     TruncateToRootLevel truncationBehavior /*= TruncateToRootLevel::no*/) const {
     BSONObjBuilder bb;
@@ -52,17 +59,21 @@ BSONObj DepsTracker::toProjectionWithoutMetadata(
         return bb.obj();
     }
 
+    // Go through dependency fieldpaths to find the minimal set of projections that cover the
+    // dependencies. For example, the dependencies ["a.b", "a.b.c.g", "c", "c.d", "f"] would be
+    // minimally covered by the projection {"a.b": 1, "c": 1, "f": 1}. The key operation here is
+    // folding dependencies into ancestor dependencies, wherever possible. This is assisted by a
+    // special sort in DepsTracker::sortedFields that treats '.' as the first char and thus places
+    // parent paths directly before their children.
     bool idSpecified = false;
     std::string last;
-    for (const auto& field : fields) {
+    for (const auto& field : sortedFields()) {
         if (str::startsWith(field, "_id") && (field.size() == 3 || field[3] == '.')) {
             idSpecified = true;
         }
 
         if (!last.empty() && str::startsWith(field, last)) {
-            // we are including a parent of *it so we don't need to include this field
-            // explicitly. This logic relies on on set iterators going in lexicographic order so
-            // that a string is always directly before of all fields it prefixes.
+            // We are including a parent of this field, so we can skip this field.
             continue;
         }
 
@@ -96,4 +107,36 @@ void DepsTracker::setNeedsMetadata(DocumentMetadataFields::MetaType type, bool r
     invariant(required || !_metadataDeps[type]);
     _metadataDeps[type] = required;
 }
+
+// Returns true if the lhs value should sort before the rhs, false otherwise.
+bool PathPrefixComparator::operator()(const std::string& lhs, const std::string& rhs) const {
+    constexpr char dot = '.';
+
+    for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) {
+        // Below, we explicitly choose unsigned char because the usual const char& returned by
+        // operator[] is actually signed on x86 and will incorrectly order unicode characters.
+        unsigned char lchar = lhs[pos], rchar = rhs[pos];
+        if (lchar == rchar) {
+            continue;
+        }
+
+        // Consider the path delimiter '.' as being less than all other characters, so that
+        // paths sort directly before any paths they prefix and directly after any paths
+        // which prefix them.
+        if (lchar == dot) {
+            return true;
+        } else if (rchar == dot) {
+            return false;
+        }
+
+        // Otherwise, default to normal character comparison.
+        return lchar < rchar;
+    }
+
+    // If we get here, then we have reached the end of lhs and/or rhs and all of their path
+    // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs
+    // and should sort before it.
+    return lhs.size() < rhs.size();
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h
index bda3bf9b243..3c892de8181 100644
--- a/src/mongo/db/pipeline/dependencies.h
+++ b/src/mongo/db/pipeline/dependencies.h
@@ -184,6 +184,11 @@ struct DepsTracker {
         }
     }
 
+    /**
+     * Return fieldpaths ordered such that a parent is immediately before its children.
+     */
+    std::list<std::string> sortedFields() const;
+
     std::set<std::string> fields;    // Names of needed fields in dotted notation.
     std::set<Variables::Id> vars;    // IDs of referenced variables.
     bool needWholeDocument = false;  // If true, ignore 'fields'; the whole document is needed.
@@ -201,4 +206,13 @@ private:
     // dependency analysis.
     QueryMetadataBitSet _metadataDeps;
 };
+
+
+/** Custom comparator that orders fieldpath strings by path prefix first, then by field.
+ * This ensures that a parent field is ordered directly before its children.
+ */
+struct PathPrefixComparator {
+    /* Returns true if the lhs value should sort before the rhs, false otherwise. */
+    bool operator()(const std::string& lhs, const std::string& rhs) const;
+};
 }  // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp
index f366ad3ce1d..938130b91bd 100644
--- a/src/mongo/db/pipeline/dependencies_test.cpp
+++ b/src/mongo/db/pipeline/dependencies_test.cpp
@@ -162,6 +162,13 @@ TEST(DependenciesToProjectionTest, ShouldIncludeFieldEvenIfSuffixOfAnotherFieldW
                       BSON("a" << 1 << "ab" << 1 << "_id" << 0));
 }
 
+TEST(DependenciesToProjectionTest, ExcludeIndirectDescendants) {
+    const char* array[] = {"a.b", "_id", "a.b.c.d.e"};
+    DepsTracker deps;
+    deps.fields = arrayToSet(array);
+    ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), BSON("_id" << 1 << "a.b" << 1));
+}
+
 TEST(DependenciesToProjectionTest, ShouldIncludeIdIfNeeded) {
     const char* array[] = {"a", "_id"};
     DepsTracker deps;
@@ -199,6 +206,27 @@ TEST(DependenciesToProjectionTest, ShouldIncludeFieldPrefixedByIdWhenIdSubfieldI
                       BSON("_id.a" << 1 << "_id_a" << 1 << "a" << 1));
 }
 
+// SERVER-66418
+TEST(DependenciesToProjectionTest, ChildCoveredByParentWithSpecialChars) {
+    // without "_id"
+    {
+        // This is an important test case because '-' is one of the few chars before '.' in utf-8.
+        const char* array[] = {"a", "a-b", "a.b"};
+        DepsTracker deps;
+        deps.fields = arrayToSet(array);
+        ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(),
+                          BSON("a" << 1 << "a-b" << 1 << "_id" << 0));
+    }
+    // with "_id"
+    {
+        const char* array[] = {"_id", "a", "a-b", "a.b"};
+        DepsTracker deps;
+        deps.fields = arrayToSet(array);
+        ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(),
+                          BSON("_id" << 1 << "a" << 1 << "a-b" << 1));
+    }
+}
+
 TEST(DependenciesToProjectionTest, ShouldOutputEmptyObjectIfEntireDocumentNeeded) {
     const char* array[] = {"a"};  // fields ignored with needWholeDocument
     DepsTracker deps;
@@ -259,5 +287,56 @@ TEST(DependenciesToProjectionTest,
     ASSERT_TRUE(deps.metadataDeps()[DocumentMetadataFields::kTextScore]);
 }
 
+TEST(DependenciesToProjectionTest, SortFieldPaths) {
+    const char* array[] = {"",
+                           "A",
+                           "_id",
+                           "a",
+                           "a.b",
+                           "a.b.c",
+                           "a.c",
+                           // '-' char in utf-8 comes before '.' but our special fieldpath sort
+                           // puts '.' first so that children directly follow their parents.
+                           "a-b",
+                           "a-b.ear",
+                           "a-bear",
+                           "a-bear.",
+                           "a🌲",
+                           "b",
+                           "b.a"
+                           "b.aa"
+                           "b.🌲d"};
+    DepsTracker deps;
+    deps.fields = arrayToSet(array);
+    // our custom sort will restore the ordering above
+    std::list<std::string> fieldPathSorted = deps.sortedFields();
+    auto itr = fieldPathSorted.begin();
+    for (unsigned long i = 0; i < fieldPathSorted.size(); i++) {
+        ASSERT_EQ(*itr, array[i]);
+        ++itr;
+    }
+}
+
+TEST(DependenciesToProjectionTest, PathLessThan) {
+    auto lessThan = PathPrefixComparator();
+    ASSERT_FALSE(lessThan("a", "a"));
+    ASSERT_TRUE(lessThan("a", "aa"));
+    ASSERT_TRUE(lessThan("a", "b"));
+    ASSERT_TRUE(lessThan("", "a"));
+    ASSERT_TRUE(lessThan("Aa", "aa"));
+    ASSERT_TRUE(lessThan("a.b", "ab"));
+    ASSERT_TRUE(lessThan("a.b", "a-b"));  // SERVER-66418
+    ASSERT_TRUE(lessThan("a.b", "a b"));  // SERVER-66418
+    // verify the difference from the standard sort
+    ASSERT_TRUE(std::string("a.b") > std::string("a-b"));
+    ASSERT_TRUE(std::string("a.b") > std::string("a b"));
+    // test unicode behavior
+    ASSERT_TRUE(lessThan("a.b", "a🌲"));
+    ASSERT_TRUE(lessThan("a.b", "a🌲b"));
+    ASSERT_TRUE(lessThan("🌲", "🌳"));  // U+1F332 < U+1F333
+    ASSERT_TRUE(lessThan("🌲", "🌲.b"));
+    ASSERT_FALSE(lessThan("🌲.b", "🌲"));
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp b/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
index 069a7e2f0b2..effa178fc32 100644
--- a/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
+++ b/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
@@ -175,7 +175,7 @@ TEST_F(DispatchShardPipelineTest, DispatchShardPipelineDoesNotRetryOnStaleConfig
         OID epoch{OID::gen()};
         Timestamp timestamp{1, 0};
         return createErrorCursorResponse({StaleConfigInfo(kTestAggregateNss,
-                                                          ChunkVersion(1, 0, epoch, timestamp),
+                                                          ChunkVersion({epoch, timestamp}, {1, 0}),
                                                           boost::none,
                                                           ShardId{"0"}),
                                           "Mock error: shard version mismatch"});
@@ -218,7 +218,7 @@ TEST_F(DispatchShardPipelineTest, WrappedDispatchDoesRetryOnStaleConfigError) {
     // namespace, then mock out a successful response.
     onCommand([&](const executor::RemoteCommandRequest& request) {
         return createErrorCursorResponse({StaleConfigInfo(kTestAggregateNss,
-                                                          ChunkVersion(2, 0, epoch, timestamp),
+                                                          ChunkVersion({epoch, timestamp}, {2, 0}),
                                                           boost::none,
                                                           ShardId{"0"}),
                                           "Mock error: shard version mismatch"});
@@ -227,7 +227,7 @@ TEST_F(DispatchShardPipelineTest, WrappedDispatchDoesRetryOnStaleConfigError) {
     // Mock the expected config server queries.
     const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
 
-    ChunkVersion version(2, 0, epoch, timestamp);
+    ChunkVersion version({epoch, timestamp}, {2, 0});
 
     ChunkType chunk1(
         uuid, {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)}, version, {"0"});
diff --git a/src/mongo/db/pipeline/document_source_change_stream_test.cpp b/src/mongo/db/pipeline/document_source_change_stream_test.cpp
index fcd7c965b37..bc1707620e4 100644
--- a/src/mongo/db/pipeline/document_source_change_stream_test.cpp
+++ b/src/mongo/db/pipeline/document_source_change_stream_test.cpp
@@ -1421,7 +1421,7 @@ TEST_F(ChangeStreamStageTest, TransformReshardBegin) {
 TEST_F(ChangeStreamStageTest, TransformReshardDoneCatchUpLegacyFormat) {
     auto existingUuid = UUID::gen();
     auto reshardingUuid = UUID::gen();
-    auto temporaryNs = constructTemporaryReshardingNss(nss.db(), existingUuid);
+    auto temporaryNs = resharding::constructTemporaryReshardingNss(nss.db(), existingUuid);
 
     const auto o2FieldInLegacyFormat = BSON("type"
                                             << "reshardDoneCatchUp"
@@ -1460,7 +1460,7 @@ TEST_F(ChangeStreamStageTest, TransformReshardDoneCatchUpLegacyFormat) {
 TEST_F(ChangeStreamStageTest, TransformReshardDoneCatchUp) {
     auto existingUuid = UUID::gen();
     auto reshardingUuid = UUID::gen();
-    auto temporaryNs = constructTemporaryReshardingNss(nss.db(), existingUuid);
+    auto temporaryNs = resharding::constructTemporaryReshardingNss(nss.db(), existingUuid);
 
     ReshardDoneCatchUpChangeEventO2Field o2Field{temporaryNs, reshardingUuid};
     auto reshardDoneCatchUp = makeOplogEntry(OpTypeEnum::kNoop,
diff --git a/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp b/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp
index 701c5b495a7..5d250101fa2 100644
--- a/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp
+++ b/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp
@@ -167,7 +167,7 @@ public:
         : DocumentSourceMock({}, expCtx), _collectionPtr(&_collection) {
         _filterExpr = BSON("ns" << kTestNs);
         _filter = MatchExpressionParser::parseAndNormalize(_filterExpr, pExpCtx);
-        _params.assertTsHasNotFallenOffOplog = Timestamp(0);
+        _params.assertTsHasNotFallenOff = Timestamp(0);
         _params.shouldTrackLatestOplogTimestamp = true;
         _params.minRecord = RecordIdBound(RecordId(0));
         _params.tailable = true;
@@ -178,7 +178,7 @@ public:
         _filterExpr = BSON("ns" << kTestNs << "ts" << BSON("$gte" << resumeToken.clusterTime));
         _filter = MatchExpressionParser::parseAndNormalize(_filterExpr, pExpCtx);
         _params.minRecord = RecordIdBound(RecordId(resumeToken.clusterTime.asLL()));
-        _params.assertTsHasNotFallenOffOplog = resumeToken.clusterTime;
+        _params.assertTsHasNotFallenOff = resumeToken.clusterTime;
     }
 
     void push_back(GetNextResult&& result) {
diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp
index b98af917d99..c992288a0e4 100644
--- a/src/mongo/db/pipeline/document_source_cursor.cpp
+++ b/src/mongo/db/pipeline/document_source_cursor.cpp
@@ -33,6 +33,7 @@
 #include "mongo/db/pipeline/document_source_cursor.h"
 
 #include "mongo/db/catalog/collection.h"
+#include "mongo/db/db_raii.h"
 #include "mongo/db/exec/document_value/document.h"
 #include "mongo/db/exec/working_set_common.h"
 #include "mongo/db/query/collection_query_info.h"
@@ -225,15 +226,20 @@ Value DocumentSourceCursor::serialize(boost::optional<ExplainOptions::Verbosity>
 
     {
         auto opCtx = pExpCtx->opCtx;
-        auto lockMode = getLockModeForQuery(opCtx, _exec->nss());
-        AutoGetDb dbLock(opCtx, _exec->nss().db(), lockMode);
-        Lock::CollectionLock collLock(opCtx, _exec->nss(), lockMode);
-        auto collection = dbLock.getDb()
-            ? CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, _exec->nss())
-            : nullptr;
+        auto secondaryNssList = _exec->getSecondaryNamespaces();
+        AutoGetCollectionForReadMaybeLockFree readLock(opCtx,
+                                                       _exec->nss(),
+                                                       AutoGetCollectionViewMode::kViewsForbidden,
+                                                       Date_t::max(),
+                                                       secondaryNssList);
+        MultipleCollectionAccessor collections(opCtx,
+                                               &readLock.getCollection(),
+                                               readLock.getNss(),
+                                               readLock.isAnySecondaryNamespaceAViewOrSharded(),
+                                               secondaryNssList);
 
         Explain::explainStages(_exec.get(),
-                               collection,
+                               collections,
                                verbosity.get(),
                                _execStatus,
                                _winningPlanTrialStats,
diff --git a/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp b/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp
index e66c1484519..afe4d7cf6e9 100644
--- a/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp
@@ -80,6 +80,7 @@ boost::optional<repl::OplogEntry> forgeNoopImageOplogEntry(
                     "Not forging no-op image oplog entry because no image document found with "
                     "sessionId",
                     "sessionId"_attr = sessionId);
+        return boost::none;
     }
 
     auto image = repl::ImageEntry::parse(IDLParserErrorContext("image entry"), imageDoc->toBson());
diff --git a/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp b/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp
index a80b87586ad..9068a61ff05 100644
--- a/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp
+++ b/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp
@@ -165,7 +165,7 @@ TEST_F(FindAndModifyImageLookupTest, ShouldNotForgeImageEntryWhenImageDocMissing
     const auto stmtId = 1;
     const auto opTime = repl::OpTime(Timestamp(2, 1), 1);
     const auto oplogEntryBson = makeOplogEntry(opTime,
-                                               repl::OpTypeEnum::kNoop,
+                                               repl::OpTypeEnum::kUpdate,
                                                NamespaceString("test.foo"),
                                                UUID::gen(),
                                                BSON("a" << 1),
@@ -203,7 +203,7 @@ TEST_F(FindAndModifyImageLookupTest, ShouldNotForgeImageEntryWhenImageDocHasDiff
     const auto ts = Timestamp(2, 1);
     const auto opTime = repl::OpTime(ts, 1);
     const auto oplogEntryBson = makeOplogEntry(opTime,
-                                               repl::OpTypeEnum::kNoop,
+                                               repl::OpTypeEnum::kUpdate,
                                                NamespaceString("test.foo"),
                                                UUID::gen(),
                                                BSON("a" << 1),
@@ -239,7 +239,6 @@ TEST_F(FindAndModifyImageLookupTest, ShouldNotForgeImageEntryWhenImageDocHasDiff
     ASSERT_TRUE(imageLookup->getNext().isEOF());
 }
 
-
 TEST_F(FindAndModifyImageLookupTest, ShouldForgeImageEntryWhenMatchingImageDocIsFoundCrudOp) {
     std::vector<repl::RetryImageEnum> cases{repl::RetryImageEnum::kPreImage,
                                             repl::RetryImageEnum::kPostImage};
diff --git a/src/mongo/db/pipeline/document_source_lookup_test.cpp b/src/mongo/db/pipeline/document_source_lookup_test.cpp
index 82a0b6bbd61..aae4d7beef5 100644
--- a/src/mongo/db/pipeline/document_source_lookup_test.cpp
+++ b/src/mongo/db/pipeline/document_source_lookup_test.cpp
@@ -82,6 +82,13 @@ public:
     }
 };
 
+auto makeLookUpFromBson(BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx) {
+    auto docSource = DocumentSourceLookUp::createFromBson(elem, expCtx);
+    auto lookup = static_cast<DocumentSourceLookUp*>(docSource.detach());
+    return std::unique_ptr<DocumentSourceLookUp, DocumentSourceDeleter>(lookup,
+                                                                        DocumentSourceDeleter());
+}
+
 // A 'let' variable defined in a $lookup stage is expected to be available to all sub-pipelines. For
 // sub-pipelines below the immediate one, they are passed to via ExpressionContext. This test
 // confirms that variables defined in the ExpressionContext are captured by the $lookup stage.
@@ -869,9 +876,7 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePauses) {
                                          {"foreignField", "_id"_sd},
                                          {"as", "foreignDocs"_sd}}}}
                           .toBson();
-    auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
-    auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
-
+    auto lookup = makeLookUpFromBson(lookupSpec.firstElement(), expCtx);
     lookup->setSource(mockLocalSource.get());
 
     auto next = lookup->getNext();
@@ -890,7 +895,6 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePauses) {
 
     ASSERT_TRUE(lookup->getNext().isEOF());
     ASSERT_TRUE(lookup->getNext().isEOF());
-    lookup->dispose();
 }
 
 TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
@@ -905,6 +909,14 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
     expCtx->mongoProcessInterface =
         std::make_shared<MockMongoInterface>(std::move(mockForeignContents));
 
+    // Mock its input, pausing every other result.
+    auto mockLocalSource =
+        DocumentSourceMock::createForTest({Document{{"foreignId", 0}},
+                                           DocumentSource::GetNextResult::makePauseExecution(),
+                                           Document{{"foreignId", 1}},
+                                           DocumentSource::GetNextResult::makePauseExecution()},
+                                          expCtx);
+
     // Set up the $lookup stage.
     auto lookupSpec = Document{{"$lookup",
                                 Document{{"from", fromNs.coll()},
@@ -912,21 +924,13 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
                                          {"foreignField", "_id"_sd},
                                          {"as", "foreignDoc"_sd}}}}
                           .toBson();
-    auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
-    auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
+    auto lookup = makeLookUpFromBson(lookupSpec.firstElement(), expCtx);
 
     const bool preserveNullAndEmptyArrays = false;
     const boost::optional<std::string> includeArrayIndex = boost::none;
     lookup->setUnwindStage(DocumentSourceUnwind::create(
         expCtx, "foreignDoc", preserveNullAndEmptyArrays, includeArrayIndex));
 
-    // Mock its input, pausing every other result.
-    auto mockLocalSource =
-        DocumentSourceMock::createForTest({Document{{"foreignId", 0}},
-                                           DocumentSource::GetNextResult::makePauseExecution(),
-                                           Document{{"foreignId", 1}},
-                                           DocumentSource::GetNextResult::makePauseExecution()},
-                                          expCtx);
     lookup->setSource(mockLocalSource.get());
 
     auto next = lookup->getNext();
@@ -945,7 +949,6 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
 
     ASSERT_TRUE(lookup->getNext().isEOF());
     ASSERT_TRUE(lookup->getNext().isEOF());
-    lookup->dispose();
 }
 
 TEST_F(DocumentSourceLookUpTest, LookupReportsAsFieldIsModified) {
@@ -961,14 +964,12 @@ TEST_F(DocumentSourceLookUpTest, LookupReportsAsFieldIsModified) {
                                          {"foreignField", "_id"_sd},
                                          {"as", "foreignDocs"_sd}}}}
                           .toBson();
-    auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
-    auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
+    auto lookup = makeLookUpFromBson(lookupSpec.firstElement(), expCtx);
 
     auto modifiedPaths = lookup->getModifiedPaths();
     ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
     ASSERT_EQ(1U, modifiedPaths.paths.size());
     ASSERT_EQ(1U, modifiedPaths.paths.count("foreignDocs"));
-    lookup->dispose();
 }
 
 TEST_F(DocumentSourceLookUpTest, LookupReportsFieldsModifiedByAbsorbedUnwind) {
@@ -984,8 +985,7 @@ TEST_F(DocumentSourceLookUpTest, LookupReportsFieldsModifiedByAbsorbedUnwind) {
                                          {"foreignField", "_id"_sd},
                                          {"as", "foreignDoc"_sd}}}}
                           .toBson();
-    auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
-    auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
+    auto lookup = makeLookUpFromBson(lookupSpec.firstElement(), expCtx);
 
     const bool preserveNullAndEmptyArrays = false;
     const boost::optional<std::string> includeArrayIndex = std::string("arrIndex");
@@ -997,7 +997,6 @@ TEST_F(DocumentSourceLookUpTest, LookupReportsFieldsModifiedByAbsorbedUnwind) {
     ASSERT_EQ(2U, modifiedPaths.paths.size());
     ASSERT_EQ(1U, modifiedPaths.paths.count("foreignDoc"));
     ASSERT_EQ(1U, modifiedPaths.paths.count("arrIndex"));
-    lookup->dispose();
 }
 
 BSONObj sequentialCacheStageObj(const StringData status = "kBuilding"_sd,
diff --git a/src/mongo/db/pipeline/document_source_union_with_test.cpp b/src/mongo/db/pipeline/document_source_union_with_test.cpp
index 04f440fa91a..05e0feb7baa 100644
--- a/src/mongo/db/pipeline/document_source_union_with_test.cpp
+++ b/src/mongo/db/pipeline/document_source_union_with_test.cpp
@@ -60,6 +60,19 @@ using MockMongoInterface = StubLookupSingleDocumentProcessInterface;
 // This provides access to getExpCtx(), but we'll use a different name for this test suite.
 using DocumentSourceUnionWithTest = AggregationContextFixture;
 
+auto makeUnion(const boost::intrusive_ptr<ExpressionContext>& expCtx,
+               std::unique_ptr<Pipeline, PipelineDeleter> pipeline) {
+    return std::unique_ptr<DocumentSourceUnionWith, DocumentSourceDeleter>(
+        new DocumentSourceUnionWith(expCtx, std::move(pipeline)), DocumentSourceDeleter());
+}
+
+auto makeUnionFromBson(BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx) {
+    auto docSource = DocumentSourceUnionWith::createFromBson(elem, expCtx);
+    auto unionWith = static_cast<DocumentSourceUnionWith*>(docSource.detach());
+    return std::unique_ptr<DocumentSourceUnionWith, DocumentSourceDeleter>(unionWith,
+                                                                           DocumentSourceDeleter());
+}
+
 TEST_F(DocumentSourceUnionWithTest, BasicSerialUnions) {
     const auto docs = std::array{Document{{"a", 1}}, Document{{"b", 1}}, Document{{"c", 1}}};
     const auto mock = DocumentSourceMock::createForTest(docs[0], getExpCtx());
@@ -69,19 +82,19 @@ TEST_F(DocumentSourceUnionWithTest, BasicSerialUnions) {
     mockCtxOne->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeOne);
     const auto mockCtxTwo = getExpCtx()->copyWith({});
     mockCtxTwo->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeTwo);
-    auto unionWithOne = DocumentSourceUnionWith(
-        mockCtxOne,
-        Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
-    auto unionWithTwo = DocumentSourceUnionWith(
-        mockCtxTwo,
-        Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
-    unionWithOne.setSource(mock.get());
-    unionWithTwo.setSource(&unionWithOne);
+    auto unionWithOne =
+        makeUnion(mockCtxOne,
+                  Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+    auto unionWithTwo =
+        makeUnion(mockCtxTwo,
+                  Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+    unionWithOne->setSource(mock.get());
+    unionWithTwo->setSource(unionWithOne.get());
 
     auto comparator = DocumentComparator();
     auto results = comparator.makeUnorderedDocumentSet();
     for (auto& doc [[maybe_unused]] : docs) {
-        auto next = unionWithTwo.getNext();
+        auto next = unionWithTwo->getNext();
         ASSERT_TRUE(next.isAdvanced());
         const auto [ignored, inserted] = results.insert(next.releaseDocument());
         ASSERT_TRUE(inserted);
@@ -89,12 +102,9 @@ TEST_F(DocumentSourceUnionWithTest, BasicSerialUnions) {
     for (const auto& doc : docs)
         ASSERT_TRUE(results.find(doc) != results.end());
 
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-
-    unionWithOne.dispose();
-    unionWithTwo.dispose();
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
 }
 
 TEST_F(DocumentSourceUnionWithTest, BasicNestedUnions) {
@@ -109,16 +119,16 @@ TEST_F(DocumentSourceUnionWithTest, BasicNestedUnions) {
     auto unionWithOne = make_intrusive<DocumentSourceUnionWith>(
         mockCtxOne,
         Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
-    auto unionWithTwo = DocumentSourceUnionWith(
-        mockCtxTwo,
-        Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{unionWithOne},
-                         getExpCtx()));
-    unionWithTwo.setSource(mock.get());
+    auto unionWithTwo =
+        makeUnion(mockCtxTwo,
+                  Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{unionWithOne},
+                                   getExpCtx()));
+    unionWithTwo->setSource(mock.get());
 
     auto comparator = DocumentComparator();
     auto results = comparator.makeUnorderedDocumentSet();
     for (auto& doc [[maybe_unused]] : docs) {
-        auto next = unionWithTwo.getNext();
+        auto next = unionWithTwo->getNext();
         ASSERT_TRUE(next.isAdvanced());
         const auto [ignored, inserted] = results.insert(next.releaseDocument());
         ASSERT_TRUE(inserted);
@@ -126,11 +136,9 @@ TEST_F(DocumentSourceUnionWithTest, BasicNestedUnions) {
     for (const auto& doc : docs)
         ASSERT_TRUE(results.find(doc) != results.end());
 
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-
-    unionWithTwo.dispose();
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
 }
 
 TEST_F(DocumentSourceUnionWithTest, UnionsWithNonEmptySubPipelines) {
@@ -145,19 +153,19 @@ TEST_F(DocumentSourceUnionWithTest, UnionsWithNonEmptySubPipelines) {
     mockCtxTwo->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeTwo);
     const auto filter = DocumentSourceMatch::create(BSON("d" << 1), mockCtxOne);
     const auto proj = DocumentSourceAddFields::create(BSON("d" << 1), mockCtxTwo);
-    auto unionWithOne = DocumentSourceUnionWith(
+    auto unionWithOne = makeUnion(
         mockCtxOne,
         Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{filter}, getExpCtx()));
-    auto unionWithTwo = DocumentSourceUnionWith(
+    auto unionWithTwo = makeUnion(
         mockCtxTwo,
         Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{proj}, getExpCtx()));
-    unionWithOne.setSource(mock.get());
-    unionWithTwo.setSource(&unionWithOne);
+    unionWithOne->setSource(mock.get());
+    unionWithTwo->setSource(unionWithOne.get());
 
     auto comparator = DocumentComparator();
     auto results = comparator.makeUnorderedDocumentSet();
     for (auto& doc [[maybe_unused]] : outputDocs) {
-        auto next = unionWithTwo.getNext();
+        auto next = unionWithTwo->getNext();
         ASSERT_TRUE(next.isAdvanced());
         const auto [ignored, inserted] = results.insert(next.releaseDocument());
         ASSERT_TRUE(inserted);
@@ -165,12 +173,9 @@ TEST_F(DocumentSourceUnionWithTest, UnionsWithNonEmptySubPipelines) {
     for (const auto& doc : outputDocs)
         ASSERT_TRUE(results.find(doc) != results.end());
 
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-
-    unionWithOne.dispose();
-    unionWithTwo.dispose();
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
 }
 
 TEST_F(DocumentSourceUnionWithTest, SerializeAndParseWithPipeline) {
@@ -315,26 +320,23 @@ TEST_F(DocumentSourceUnionWithTest, PropagatePauses) {
     mockCtxOne->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeOne);
     const auto mockCtxTwo = getExpCtx()->copyWith({});
     mockCtxTwo->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeTwo);
-    auto unionWithOne = DocumentSourceUnionWith(
-        mockCtxOne,
-        Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
-    auto unionWithTwo = DocumentSourceUnionWith(
-        mockCtxTwo,
-        Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
-    unionWithOne.setSource(mock.get());
-    unionWithTwo.setSource(&unionWithOne);
-
-    ASSERT_TRUE(unionWithTwo.getNext().isAdvanced());
-    ASSERT_TRUE(unionWithTwo.getNext().isPaused());
-    ASSERT_TRUE(unionWithTwo.getNext().isAdvanced());
-    ASSERT_TRUE(unionWithTwo.getNext().isPaused());
-
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-    ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-
-    unionWithOne.dispose();
-    unionWithTwo.dispose();
+    auto unionWithOne =
+        makeUnion(mockCtxOne,
+                  Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+    auto unionWithTwo =
+        makeUnion(mockCtxTwo,
+                  Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+    unionWithOne->setSource(mock.get());
+    unionWithTwo->setSource(unionWithOne.get());
+
+    ASSERT_TRUE(unionWithTwo->getNext().isAdvanced());
+    ASSERT_TRUE(unionWithTwo->getNext().isPaused());
+    ASSERT_TRUE(unionWithTwo->getNext().isAdvanced());
+    ASSERT_TRUE(unionWithTwo->getNext().isPaused());
+
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+    ASSERT_TRUE(unionWithTwo->getNext().isEOF());
 }
 
 TEST_F(DocumentSourceUnionWithTest, ReturnEOFAfterBeingDisposed) {
@@ -406,10 +408,10 @@ TEST_F(DocumentSourceUnionWithTest, RespectsViewDefinition) {
     expCtx->mongoProcessInterface =
         std::make_shared<MockMongoInterface>(std::move(mockForeignContents));
 
-    auto bson = BSON("$unionWith" << nsToUnionWith.coll());
-    auto unionWith = DocumentSourceUnionWith::createFromBson(bson.firstElement(), expCtx);
     const auto localMock =
         DocumentSourceMock::createForTest({Document{{"_id"_sd, "local"_sd}}}, getExpCtx());
+    auto bson = BSON("$unionWith" << nsToUnionWith.coll());
+    auto unionWith = makeUnionFromBson(bson.firstElement(), expCtx);
     unionWith->setSource(localMock.get());
 
     auto result = unionWith->getNext();
@@ -421,8 +423,6 @@ TEST_F(DocumentSourceUnionWithTest, RespectsViewDefinition) {
     ASSERT_DOCUMENT_EQ(result.getDocument(), (Document{{"_id"_sd, 2}}));
 
     ASSERT_TRUE(unionWith->getNext().isEOF());
-
-    unionWith->dispose();
 }
 
 TEST_F(DocumentSourceUnionWithTest, ConcatenatesViewDefinitionToPipeline) {
@@ -445,7 +445,7 @@ TEST_F(DocumentSourceUnionWithTest, ConcatenatesViewDefinitionToPipeline) {
                          "coll" << viewNsToUnionWith.coll() << "pipeline"
                                 << BSON_ARRAY(fromjson(
                                        "{$set: {originalId: '$_id', _id: {$add: [1, '$_id']}}}"))));
-    auto unionWith = DocumentSourceUnionWith::createFromBson(bson.firstElement(), expCtx);
+    auto unionWith = makeUnionFromBson(bson.firstElement(), expCtx);
     unionWith->setSource(localMock.get());
 
     auto result = unionWith->getNext();
@@ -459,8 +459,6 @@ TEST_F(DocumentSourceUnionWithTest, ConcatenatesViewDefinitionToPipeline) {
     ASSERT_DOCUMENT_EQ(result.getDocument(), (Document{{"_id"_sd, 3}, {"originalId"_sd, 2}}));
 
     ASSERT_TRUE(unionWith->getNext().isEOF());
-
-    unionWith->dispose();
 }
 
 TEST_F(DocumentSourceUnionWithTest, RejectUnionWhenDepthLimitIsExceeded) {
@@ -482,9 +480,9 @@ TEST_F(DocumentSourceUnionWithTest, RejectUnionWhenDepthLimitIsExceeded) {
 }
 
 TEST_F(DocumentSourceUnionWithTest, ConstraintsWithoutPipelineAreCorrect) {
-    auto emptyUnion = DocumentSourceUnionWith(
-        getExpCtx(),
-        Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+    auto emptyUnion =
+        makeUnion(getExpCtx(),
+                  Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
     StageConstraints defaultConstraints(StageConstraints::StreamType::kStreaming,
                                         StageConstraints::PositionRequirement::kNone,
                                         StageConstraints::HostTypeRequirement::kAnyShard,
@@ -493,9 +491,7 @@ TEST_F(DocumentSourceUnionWithTest, ConstraintsWithoutPipelineAreCorrect) {
                                         StageConstraints::TransactionRequirement::kNotAllowed,
                                         StageConstraints::LookupRequirement::kAllowed,
                                         StageConstraints::UnionRequirement::kAllowed);
-    ASSERT_TRUE(emptyUnion.constraints(Pipeline::SplitState::kUnsplit) == defaultConstraints);
-
-    emptyUnion.dispose();
+    ASSERT_TRUE(emptyUnion->constraints(Pipeline::SplitState::kUnsplit) == defaultConstraints);
 }
 
 TEST_F(DocumentSourceUnionWithTest, ConstraintsWithMixedSubPipelineAreCorrect) {
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index 8237dbdfeae..464d2ad6953 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -39,6 +39,9 @@
 #include <utility>
 #include <vector>
 
+#include "mongo/bson/bsonmisc.h"
+#include "mongo/bson/bsontypes.h"
+#include "mongo/crypto/fle_crypto.h"
 #include "mongo/db/bson/dotted_path_support.h"
 #include "mongo/db/commands/feature_compatibility_version_documentation.h"
 #include "mongo/db/exec/document_value/document.h"
@@ -46,6 +49,7 @@
 #include "mongo/db/hasher.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/db/pipeline/expression_context.h"
+#include "mongo/db/pipeline/expression_parser_gen.h"
 #include "mongo/db/pipeline/variable_validation.h"
 #include "mongo/db/query/datetime/date_time_support.h"
 #include "mongo/db/query/sort_pattern.h"
@@ -304,111 +308,173 @@ const char* ExpressionAbs::getOpName() const {
 
 /* ------------------------- ExpressionAdd ----------------------------- */
 
-StatusWith<Value> ExpressionAdd::apply(Value lhs, Value rhs) {
-    BSONType diffType = Value::getWidestNumeric(rhs.getType(), lhs.getType());
+namespace {
 
-    if (diffType == NumberDecimal) {
-        Decimal128 left = lhs.coerceToDecimal();
-        Decimal128 right = rhs.coerceToDecimal();
-        return Value(left.add(right));
-    } else if (diffType == NumberDouble) {
-        double right = rhs.coerceToDouble();
-        double left = lhs.coerceToDouble();
-        return Value(left + right);
-    } else if (diffType == NumberLong) {
-        long long result;
+/**
+ * We'll try to return the narrowest possible result value while avoiding overflow or implicit use
+ * of decimal types. To do that, compute separate sums for long, double and decimal values, and
+ * track the current widest type. The long sum will be converted to double when the first double
+ * value is seen or when long arithmetic would overflow.
+ */
+class AddState {
+    long long longTotal = 0;
+    double doubleTotal = 0;
+    Decimal128 decimalTotal;
+    BSONType widestType = NumberInt;
+    bool isDate = false;
 
-        // If there is an overflow, convert the values to doubles.
-        if (overflow::add(lhs.coerceToLong(), rhs.coerceToLong(), &result)) {
-            return Value(lhs.coerceToDouble() + rhs.coerceToDouble());
+public:
+    /**
+     * Update the internal state with another operand. It is up to the caller to validate that the
+     * operand is of a proper type.
+     */
+    void operator+=(const Value& operand) {
+        auto oldWidestType = widestType;
+        // Dates are represented by the long number of milliseconds since the unix epoch, so we can
+        // treat them as regular numeric values for the purposes of addition after making sure that
+        // only one date is present in the operand list.
+        Value valToAdd;
+        if (operand.getType() == Date) {
+            uassert(16612, "only one date allowed in an $add expression", !isDate);
+            isDate = true;
+            valToAdd = Value(operand.getDate().toMillisSinceEpoch());
+        } else {
+            widestType = Value::getWidestNumeric(widestType, operand.getType());
+            valToAdd = operand;
         }
-        return Value(result);
-    } else if (diffType == NumberInt) {
-        long long right = rhs.coerceToLong();
-        long long left = lhs.coerceToLong();
-        return Value::createIntOrLong(left + right);
-    } else if (lhs.nullish() || rhs.nullish()) {
-        return Value(BSONNULL);
-    } else {
-        return Status(ErrorCodes::TypeMismatch,
-                      str::stream() << "cannot $add a" << typeName(rhs.getType()) << " from a "
-                                    << typeName(lhs.getType()));
-    }
-}
-
-Value ExpressionAdd::evaluate(const Document& root, Variables* variables) const {
-    // We'll try to return the narrowest possible result value while avoiding overflow, loss
-    // of precision due to intermediate rounding or implicit use of decimal types. To do that,
-    // compute a compensated sum for non-decimal values and a separate decimal sum for decimal
-    // values, and track the current narrowest type.
-    DoubleDoubleSummation nonDecimalTotal;
-    Decimal128 decimalTotal;
-    BSONType totalType = NumberInt;
-    bool haveDate = false;
 
-    const size_t n = _children.size();
-    for (size_t i = 0; i < n; ++i) {
-        Value val = _children[i]->evaluate(root, variables);
+        // If this operation widens the return type, perform any necessary type conversions.
+        if (oldWidestType != widestType) {
+            switch (widestType) {
+                case NumberLong:
+                    // Int -> Long is handled by the same sum.
+                    break;
+                case NumberDouble:
+                    // Int/Long -> Double converts the existing longTotal to a doubleTotal.
+                    doubleTotal = longTotal;
+                    break;
+                case NumberDecimal:
+                    // Convert the right total to NumberDecimal by looking at the old widest type.
+                    switch (oldWidestType) {
+                        case NumberInt:
+                        case NumberLong:
+                            decimalTotal = Decimal128(longTotal);
+                            break;
+                        case NumberDouble:
+                            decimalTotal = Decimal128(doubleTotal, Decimal128::kRoundTo34Digits);
+                            break;
+                        default:
+                            MONGO_UNREACHABLE;
+                    }
+                    break;
+                default:
+                    MONGO_UNREACHABLE;
+            }
+        }
 
-        switch (val.getType()) {
-            case NumberDecimal:
-                decimalTotal = decimalTotal.add(val.getDecimal());
-                totalType = NumberDecimal;
-                break;
-            case NumberDouble:
-                nonDecimalTotal.addDouble(val.getDouble());
-                if (totalType != NumberDecimal)
-                    totalType = NumberDouble;
-                break;
+        // Perform the add operation.
+        switch (widestType) {
+            case NumberInt:
             case NumberLong:
-                nonDecimalTotal.addLong(val.getLong());
-                if (totalType == NumberInt)
-                    totalType = NumberLong;
+                // If the long long arithmetic overflows, promote the result to a NumberDouble and
+                // start incrementing the doubleTotal.
+                long long newLongTotal;
+                if (overflow::add(longTotal, valToAdd.coerceToLong(), &newLongTotal)) {
+                    widestType = NumberDouble;
+                    doubleTotal = longTotal + valToAdd.coerceToDouble();
+                } else {
+                    longTotal = newLongTotal;
+                }
                 break;
-            case NumberInt:
-                nonDecimalTotal.addDouble(val.getInt());
+            case NumberDouble:
+                doubleTotal += valToAdd.coerceToDouble();
                 break;
-            case Date:
-                uassert(16612, "only one date allowed in an $add expression", !haveDate);
-                haveDate = true;
-                nonDecimalTotal.addLong(val.getDate().toMillisSinceEpoch());
+            case NumberDecimal:
+                decimalTotal = decimalTotal.add(valToAdd.coerceToDecimal());
                 break;
             default:
-                uassert(16554,
-                        str::stream() << "$add only supports numeric or date types, not "
-                                      << typeName(val.getType()),
-                        val.nullish());
-                return Value(BSONNULL);
+                uasserted(ErrorCodes::TypeMismatch,
+                          str::stream() << "$add only supports numeric or date types, not "
+                                        << typeName(valToAdd.getType()));
         }
     }
 
-    if (haveDate) {
-        int64_t longTotal;
-        if (totalType == NumberDecimal) {
-            longTotal = decimalTotal.add(nonDecimalTotal.getDecimal()).toLong();
+    Value getValue() const {
+        // If one of the operands was a date, then convert the result to a date.
+        if (isDate) {
+            switch (widestType) {
+                case NumberInt:
+                case NumberLong:
+                    return Value(Date_t::fromMillisSinceEpoch(longTotal));
+                case NumberDouble:
+                    using limits = std::numeric_limits<long long>;
+                    uassert(ErrorCodes::Overflow,
+                            "date overflow in $add",
+                            // The upper bound is exclusive because it rounds up when it is cast to
+                            // a double.
+                            doubleTotal >= limits::min() &&
+                                doubleTotal < static_cast<double>(limits::max()));
+                    return Value(Date_t::fromMillisSinceEpoch(llround(doubleTotal)));
+                case NumberDecimal:
+                    // Decimal dates are not checked for overflow.
+                    return Value(Date_t::fromMillisSinceEpoch(decimalTotal.toLong()));
+                default:
+                    MONGO_UNREACHABLE;
+            }
         } else {
-            uassert(ErrorCodes::Overflow, "date overflow in $add", nonDecimalTotal.fitsLong());
-            longTotal = nonDecimalTotal.getLong();
+            switch (widestType) {
+                case NumberInt:
+                    return Value::createIntOrLong(longTotal);
+                case NumberLong:
+                    return Value(longTotal);
+                case NumberDouble:
+                    return Value(doubleTotal);
+                case NumberDecimal:
+                    return Value(decimalTotal);
+                default:
+                    MONGO_UNREACHABLE;
+            }
         }
-        return Value(Date_t::fromMillisSinceEpoch(longTotal));
     }
-    switch (totalType) {
-        case NumberDecimal:
-            return Value(decimalTotal.add(nonDecimalTotal.getDecimal()));
-        case NumberLong:
-            dassert(nonDecimalTotal.isInteger());
-            if (nonDecimalTotal.fitsLong())
-                return Value(nonDecimalTotal.getLong());
-            [[fallthrough]];
-        case NumberInt:
-            if (nonDecimalTotal.fitsLong())
-                return Value::createIntOrLong(nonDecimalTotal.getLong());
-            [[fallthrough]];
-        case NumberDouble:
-            return Value(nonDecimalTotal.getDouble());
-        default:
-            massert(16417, "$add resulted in a non-numeric type", false);
+};
+
+Status checkAddOperandType(Value val) {
+    if (!val.numeric() && val.getType() != Date) {
+        return Status(ErrorCodes::TypeMismatch,
+                      str::stream() << "$add only supports numeric or date types, not "
+                                    << typeName(val.getType()));
     }
+
+    return Status::OK();
+}
+}  // namespace
+
+StatusWith<Value> ExpressionAdd::apply(Value lhs, Value rhs) {
+    if (lhs.nullish())
+        return Value(BSONNULL);
+    if (Status s = checkAddOperandType(lhs); !s.isOK())
+        return s;
+    if (rhs.nullish())
+        return Value(BSONNULL);
+    if (Status s = checkAddOperandType(rhs); !s.isOK())
+        return s;
+
+    AddState state;
+    state += lhs;
+    state += rhs;
+    return state.getValue();
+}
+
+Value ExpressionAdd::evaluate(const Document& root, Variables* variables) const {
+    AddState state;
+    for (auto&& child : _children) {
+        Value val = child->evaluate(root, variables);
+        if (val.nullish())
+            return Value(BSONNULL);
+        uassertStatusOK(checkAddOperandType(val));
+        state += val;
+    }
+    return state.getValue();
 }
 
 REGISTER_STABLE_EXPRESSION(add, ExpressionAdd::parse);
@@ -3253,7 +3319,7 @@ Value ExpressionMultiply::evaluate(const Document& root, Variables* variables) c
         if (val.nullish())
             return Value(BSONNULL);
         uassertStatusOK(checkMultiplyNumeric(val));
-        state *= child->evaluate(root, variables);
+        state *= val;
     }
     return state.getValue();
 }
@@ -3742,6 +3808,123 @@ const char* ExpressionLog10::getOpName() const {
     return "$log10";
 }
 
+/* ----------------------- ExpressionInternalFLEEqual ---------------------------- */
+constexpr auto kInternalFleEq = "$_internalFleEq"_sd;
+
+ExpressionInternalFLEEqual::ExpressionInternalFLEEqual(ExpressionContext* const expCtx,
+                                                       boost::intrusive_ptr<Expression> field,
+                                                       ConstDataRange serverToken,
+                                                       int64_t contentionFactor,
+                                                       ConstDataRange edcToken)
+    : Expression(expCtx, {std::move(field)}),
+      _serverToken(PrfBlockfromCDR(serverToken)),
+      _edcToken(PrfBlockfromCDR(edcToken)),
+      _contentionFactor(contentionFactor) {
+    expCtx->sbeCompatible = false;
+
+    auto tokens =
+        EDCServerCollection::generateEDCTokens(ConstDataRange(_edcToken), _contentionFactor);
+
+    for (auto& token : tokens) {
+        _cachedEDCTokens.insert(std::move(token.data));
+    }
+}
+
+void ExpressionInternalFLEEqual::_doAddDependencies(DepsTracker* deps) const {
+    for (auto&& operand : _children) {
+        operand->addDependencies(deps);
+    }
+}
+
+REGISTER_EXPRESSION_WITH_MIN_VERSION(_internalFleEq,
+                                     ExpressionInternalFLEEqual::parse,
+                                     AllowedWithApiStrict::kAlways,
+                                     AllowedWithClientType::kAny,
+                                     multiversion::FeatureCompatibilityVersion::kVersion_6_0);
+
+intrusive_ptr<Expression> ExpressionInternalFLEEqual::parse(ExpressionContext* const expCtx,
+                                                            BSONElement expr,
+                                                            const VariablesParseState& vps) {
+
+    IDLParserErrorContext ctx(kInternalFleEq);
+    auto fleEq = InternalFleEqStruct::parse(ctx, expr.Obj());
+
+    auto fieldExpr = Expression::parseOperand(expCtx, fleEq.getField().getElement(), vps);
+
+    auto serverTokenPair = fromEncryptedConstDataRange(fleEq.getServerEncryptionToken());
+
+    uassert(6672405,
+            "Invalid server token",
+            serverTokenPair.first == EncryptedBinDataType::kFLE2TransientRaw &&
+                serverTokenPair.second.length() == sizeof(PrfBlock));
+
+    auto edcTokenPair = fromEncryptedConstDataRange(fleEq.getEdcDerivedToken());
+
+    uassert(6672406,
+            "Invalid edc token",
+            edcTokenPair.first == EncryptedBinDataType::kFLE2TransientRaw &&
+                edcTokenPair.second.length() == sizeof(PrfBlock));
+
+
+    auto cf = fleEq.getMaxCounter();
+    uassert(6672408, "Contention factor must be between 0 and 10000", cf >= 0 && cf < 10000);
+
+    return new ExpressionInternalFLEEqual(expCtx,
+                                          std::move(fieldExpr),
+                                          serverTokenPair.second,
+                                          fleEq.getMaxCounter(),
+                                          edcTokenPair.second);
+}
+
+Value toValue(const std::array<std::uint8_t, 32>& buf) {
+    auto vec = toEncryptedVector(EncryptedBinDataType::kFLE2TransientRaw, buf);
+    return Value(BSONBinData(vec.data(), vec.size(), BinDataType::Encrypt));
+}
+
+Value ExpressionInternalFLEEqual::serialize(bool explain) const {
+    return Value(Document{{kInternalFleEq,
+                           Document{{"field", _children[0]->serialize(explain)},
+                                    {"edc", toValue(_edcToken)},
+                                    {"counter", Value(static_cast<long long>(_contentionFactor))},
+                                    {"server", toValue(_serverToken)}}}});
+}
+
+Value ExpressionInternalFLEEqual::evaluate(const Document& root, Variables* variables) const {
+    // Inputs
+    // 1. Value for FLE2IndexedEqualityEncryptedValue field
+
+    Value fieldValue = _children[0]->evaluate(root, variables);
+
+    if (fieldValue.nullish()) {
+        return Value(BSONNULL);
+    }
+
+    if (fieldValue.getType() != BinData) {
+        return Value(false);
+    }
+
+    auto fieldValuePair = fromEncryptedBinData(fieldValue);
+
+    uassert(6672407,
+            "Invalid encrypted indexed field",
+            fieldValuePair.first == EncryptedBinDataType::kFLE2EqualityIndexedValue);
+
+    // Value matches if
+    // 1. Decrypt field is successful
+    // 2. EDC_u Token is in GenTokens(EDC Token, ContentionFactor)
+    //
+    auto swIndexed =
+        EDCServerCollection::decryptAndParse(ConstDataRange(_serverToken), fieldValuePair.second);
+    uassertStatusOK(swIndexed);
+    auto indexed = swIndexed.getValue();
+
+    return Value(_cachedEDCTokens.count(indexed.edc.data) == 1);
+}
+
+const char* ExpressionInternalFLEEqual::getOpName() const {
+    return kInternalFleEq.rawData();
+}
+
 /* ------------------------ ExpressionNary ----------------------------- */
 
 /**
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index ff53eaedf3e..4b5745bb2b6 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -29,6 +29,7 @@
 
 #pragma once
 
+#include "mongo/base/data_range.h"
 #include "mongo/platform/basic.h"
 
 #include <algorithm>
@@ -2197,6 +2198,38 @@ public:
     }
 };
 
+class ExpressionInternalFLEEqual final : public Expression {
+public:
+    ExpressionInternalFLEEqual(ExpressionContext* expCtx,
+                               boost::intrusive_ptr<Expression> field,
+                               ConstDataRange serverToken,
+                               int64_t contentionFactor,
+                               ConstDataRange edcToken);
+    Value serialize(bool explain) const final;
+
+    Value evaluate(const Document& root, Variables* variables) const final;
+    const char* getOpName() const;
+
+    static boost::intrusive_ptr<Expression> parse(ExpressionContext* expCtx,
+                                                  BSONElement expr,
+                                                  const VariablesParseState& vps);
+    void _doAddDependencies(DepsTracker* deps) const final;
+
+    void acceptVisitor(ExpressionMutableVisitor* visitor) final {
+        return visitor->visit(this);
+    }
+
+    void acceptVisitor(ExpressionConstVisitor* visitor) const final {
+        return visitor->visit(this);
+    }
+
+private:
+    std::array<std::uint8_t, 32> _serverToken;
+    std::array<std::uint8_t, 32> _edcToken;
+    int64_t _contentionFactor;
+    stdx::unordered_set<std::array<std::uint8_t, 32>> _cachedEDCTokens;
+};
+
 class ExpressionMap final : public Expression {
 public:
     ExpressionMap(
diff --git a/src/mongo/s/pm2583_feature_flags.idl b/src/mongo/db/pipeline/expression_parser.idl
index 2fe34ddf7a3..9f1cde70856 100644
--- a/src/mongo/s/pm2583_feature_flags.idl
+++ b/src/mongo/db/pipeline/expression_parser.idl
@@ -24,19 +24,34 @@
 # delete this exception statement from your version. If you delete this
 # exception statement from all source files in the program, then also delete
 # it in the license file.
-#
-
-# Feature flag for PM-2583
 
 global:
-    cpp_namespace: "mongo::feature_flags"
+    cpp_namespace: "mongo"
 
 imports:
     - "mongo/idl/basic_types.idl"
 
-feature_flags:
-    featureFlagNewPersistedChunkVersionFormat:
-        description: Feature flag for enabling the new persisted chunk version format.
-        cpp_varname: gFeatureFlagNewPersistedChunkVersionFormat
-        default: true
-        version: 6.0
+structs:
+
+    InternalFleEqStruct:
+        description: "Struct for $_internalFleEq"
+        strict: true
+        fields:
+          field:
+            description: "Expression"
+            type: IDLAnyType
+            cpp_name: field
+          edc:
+            description: "EDCDerivedFromDataToken"
+            type: bindata_encrypt
+            cpp_name: edcDerivedToken
+          server:
+            description: "ServerDataEncryptionLevel1Token"
+            type: bindata_encrypt
+            cpp_name: serverEncryptionToken
+          counter:
+            description: "Queryable Encryption max counter"
+            type: long
+            cpp_name: maxCounter
+
+
diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp
index b33f3bc893c..314062c3f03 100644
--- a/src/mongo/db/pipeline/expression_test.cpp
+++ b/src/mongo/db/pipeline/expression_test.cpp
@@ -30,6 +30,8 @@
 
 #include "mongo/platform/basic.h"
 
+#include <climits>
+
 #include "mongo/bson/bsonmisc.h"
 #include "mongo/config.h"
 #include "mongo/db/exec/document_value/document.h"
@@ -47,6 +49,8 @@
 #include "mongo/idl/server_parameter_test_util.h"
 #include "mongo/logv2/log.h"
 #include "mongo/unittest/unittest.h"
+#include "mongo/util/summation.h"
+#include "mongo/util/time_support.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
@@ -171,6 +175,7 @@ void parseAndVerifyResults(
     ASSERT_VALUE_EQ(expr->evaluate({}, &expCtx.variables), expected);
 }
 
+
 /* ------------------------- ExpressionArrayToObject -------------------------- */
 
 TEST(ExpressionArrayToObjectTest, KVFormatSimple) {
@@ -3717,4 +3722,449 @@ TEST(ExpressionCondTest, ConstantCondShouldOptimizeWithNonConstantBranches) {
     ASSERT_BSONOBJ_BINARY_EQ(expectedResult, expressionToBson(optimizedExprCond));
 }
 
+TEST(ExpressionAddTest, Integers) {
+    assertExpectedResults("$add",
+                          {
+                              // Empty case.
+                              {{}, 0},
+                              // Singleton case.
+                              {{1}, 1},
+                              // Integer addition.
+                              {{1, 2, 3}, 6},
+                              // Adding negative numbers
+                              {{6, -3, 2}, 5},
+                              // Getting a negative result
+                              {{-6, -3, 2}, -7},
+                              // Min/max ints are not promoted to longs.
+                              {{INT_MAX}, INT_MAX},
+                              {{INT_MAX, -1}, Value(INT_MAX - 1)},
+                              {{INT_MIN}, INT_MIN},
+                              {{INT_MIN, 1}, Value(INT_MIN + 1)},
+                              // Integer overflow is promoted to a long.
+                              {{INT_MAX, 1}, Value((long long)INT_MAX + 1LL)},
+                              {{INT_MIN, -1}, Value((long long)INT_MIN - 1LL)},
+                          });
+}
+
+
+TEST(ExpressionAddTest, Longs) {
+    assertExpectedResults(
+        "$add",
+        {
+            // Singleton case.
+            {{1LL}, 1LL},
+            // Long addition.
+            {{1LL, 2LL, 3LL}, 6LL},
+            // Adding negative numbers
+            {{6LL, -3LL, 2LL}, 5LL},
+            // Getting a negative result
+            {{-6LL, -3LL, 2LL}, -7LL},
+            // Confirm that NumberLong is wider than NumberInt, and the output
+            // will be a long if any operand is a long.
+            {{1LL, 2, 3LL}, 6LL},
+            {{1LL, 2, 3}, 6LL},
+            {{1, 2, 3LL}, 6LL},
+            {{1, 2LL, 3LL}, 6LL},
+            {{6, -3LL, 2}, 5LL},
+            {{-6LL, -3, 2}, -7LL},
+            // Min/max longs are not promoted to double.
+            {{LLONG_MAX}, LLONG_MAX},
+            {{LLONG_MAX, -1LL}, Value(LLONG_MAX - 1LL)},
+            {{LLONG_MIN}, LLONG_MIN},
+            {{LLONG_MIN, 1LL}, Value(LLONG_MIN + 1LL)},
+            // Long overflow is promoted to a double.
+            {{LLONG_MAX, 1LL}, Value((double)LLONG_MAX + 1.0)},
+            // The result is "incorrect" here due to floating-point rounding errors.
+            {{LLONG_MIN, -1LL}, Value((double)LLONG_MIN)},
+        });
+}
+
+TEST(ExpressionAddTest, Doubles) {
+    assertExpectedResults("$add",
+                          {
+                              // Singleton case.
+                              {{1.0}, 1.0},
+                              // Double addition.
+                              {{1.0, 2.0, 3.0}, 6.0},
+                              // Adding negative numbers
+                              {{6.0, -3.0, 2.0}, 5.0},
+                              // Getting a negative result
+                              {{-6.0, -3.0, 2.0}, -7.0},
+                              // Confirm that doubles are wider than ints and longs, and the output
+                              // will be a double if any operand is a double.
+                              {{1, 2, 3.0}, 6.0},
+                              {{1LL, 2LL, 3.0}, 6.0},
+                              {{3.0, 2, 1LL}, 6.0},
+                              {{3, 2.0, 1LL}, 6.0},
+                              {{-3, 2.0, 1LL}, 0.0},
+                              {{-6LL, 2LL, 3.0}, -1.0},
+                              {{-6.0, 2LL, 3.0}, -1.0},
+                              // Confirm floating point arithmetic has rounding errors.
+                              {{0.1, 0.2}, 0.30000000000000004},
+                          });
+}
+
+TEST(ExpressionAddTest, Decimals) {
+    assertExpectedResults(
+        "$add",
+        {
+            // Singleton case.
+            {{Decimal128(1)}, Decimal128(1)},
+            // Decimal addition.
+            {{Decimal128(1.0), Decimal128(2.0), Decimal128(3.0)}, Decimal128(6.0)},
+            {{Decimal128(-6.0), Decimal128(2.0), Decimal128(3.0)}, Decimal128(-1.0)},
+            // Confirm that decimals are wider than all other types, and the output
+            // will be a double if any operand is a double.
+            {{Decimal128(1), 2LL, 3}, Decimal128(6.0)},
+            {{Decimal128(3), 2.0, 1LL}, Decimal128(6.0)},
+            {{Decimal128(3), 2, 1.0}, Decimal128(6.0)},
+            {{1, 2, Decimal128(3.0)}, Decimal128(6.0)},
+            {{1LL, Decimal128(2.0), 3.0}, Decimal128(6.0)},
+            {{1.0, 2.0, Decimal128(3.0)}, Decimal128(6.0)},
+            {{1, Decimal128(2.0), 3.0}, Decimal128(6.0)},
+            {{1LL, Decimal128(2.0), 3.0, 2}, Decimal128(8.0)},
+            {{1LL, Decimal128(2.0), 3, 2.0}, Decimal128(8.0)},
+            {{1, Decimal128(2.0), 3LL, 2.0}, Decimal128(8.0)},
+            {{3.0, Decimal128(0.0), 2, 1LL}, Decimal128(6.0)},
+            {{1, 3LL, 2.0, Decimal128(2.0)}, Decimal128(8.0)},
+            {{3.0, 2, 1LL, Decimal128(0.0)}, Decimal128(6.0)},
+            {{Decimal128(-6.0), 2.0, 3LL}, Decimal128(-1.0)},
+        });
+}
+
+TEST(ExpressionAddTest, DatesNonDecimal) {
+    assertExpectedResults(
+        "$add",
+        {
+            {{1, 2, 3, Date_t::fromMillisSinceEpoch(100)}, Date_t::fromMillisSinceEpoch(106)},
+            {{1LL, 2LL, 3LL, Value(Date_t::fromMillisSinceEpoch(100))},
+             Date_t::fromMillisSinceEpoch(106)},
+            {{1.0, 2.0, 3.0, Value(Date_t::fromMillisSinceEpoch(100))},
+             Date_t::fromMillisSinceEpoch(106)},
+            {{1.0, 2.0, Value(Date_t::fromMillisSinceEpoch(100)), 3.0},
+             Date_t::fromMillisSinceEpoch(106)},
+            {{1.0, 2.2, 3.5, Value(Date_t::fromMillisSinceEpoch(100))},
+             Date_t::fromMillisSinceEpoch(107)},
+            {{1, 2.2, 3.5, Value(Date_t::fromMillisSinceEpoch(100))},
+             Date_t::fromMillisSinceEpoch(107)},
+            {{1, Date_t::fromMillisSinceEpoch(100), 2.2, 3.5}, Date_t::fromMillisSinceEpoch(107)},
+            {{Date_t::fromMillisSinceEpoch(100), 1, 2.2, 3.5}, Date_t::fromMillisSinceEpoch(107)},
+            {{-6, Date_t::fromMillisSinceEpoch(100)}, Date_t::fromMillisSinceEpoch(94)},
+            {{-200, Date_t::fromMillisSinceEpoch(100)}, Date_t::fromMillisSinceEpoch(-100)},
+            {{1, 2, 3, Date_t::fromMillisSinceEpoch(-100)}, Date_t::fromMillisSinceEpoch(-94)},
+        });
+}
+
+TEST(ExpressionAddTest, DatesDecimal) {
+    assertExpectedResults(
+        "$add",
+        {
+            {{1, Decimal128(2), 3, Date_t::fromMillisSinceEpoch(100)},
+             Date_t::fromMillisSinceEpoch(106)},
+            {{1LL, 2LL, Decimal128(3LL), Value(Date_t::fromMillisSinceEpoch(100))},
+             Date_t::fromMillisSinceEpoch(106)},
+            {{1, Decimal128(2.2), 3.5, Value(Date_t::fromMillisSinceEpoch(100))},
+             Date_t::fromMillisSinceEpoch(107)},
+            {{1, Decimal128(2.2), Decimal128(3.5), Value(Date_t::fromMillisSinceEpoch(100))},
+             Date_t::fromMillisSinceEpoch(107)},
+            {{1.0, Decimal128(2.2), Decimal128(3.5), Value(Date_t::fromMillisSinceEpoch(100))},
+             Date_t::fromMillisSinceEpoch(107)},
+            {{Decimal128(-6), Date_t::fromMillisSinceEpoch(100)}, Date_t::fromMillisSinceEpoch(94)},
+            {{Decimal128(-200), Date_t::fromMillisSinceEpoch(100)},
+             Date_t::fromMillisSinceEpoch(-100)},
+            {{1, Decimal128(2), 3, Date_t::fromMillisSinceEpoch(-100)},
+             Date_t::fromMillisSinceEpoch(-94)},
+        });
+}
+
+TEST(ExpressionAddTest, Assertions) {
+    // Date addition must fit in a NumberLong from a double.
+    ASSERT_THROWS_CODE(
+        evaluateExpression("$add", {Date_t::fromMillisSinceEpoch(100), (double)LLONG_MAX}),
+        AssertionException,
+        ErrorCodes::Overflow);
+
+    // Only one date allowed in an $add expression.
+    ASSERT_THROWS_CODE(
+        evaluateExpression(
+            "$add", {Date_t::fromMillisSinceEpoch(100), 1, Date_t::fromMillisSinceEpoch(100)}),
+        AssertionException,
+        16612);
+
+    // Only numeric types are allowed in a $add.
+    ASSERT_THROWS_CODE(evaluateExpression("$add", {1, 2, "not numeric!"_sd, 3}),
+                       AssertionException,
+                       ErrorCodes::TypeMismatch);
+}
+
+
+TEST(ExpressionAddTest, VerifyNoDoubleDoubleSummation) {
+    // Confirm that we're not using DoubleDoubleSummation for $add expression with a set of double
+    // values from mongo/util/summation_test.cpp.
+    std::vector<ImplicitValue> doubleValues = {
+        1.4831356930199802e-05,  -3.121724665346865,     3041897608700.073,
+        1001318343149.7166,      -1714.6229586696593,    1731390114894580.8,
+        6.256645803154374e-08,   -107144114533844.25,    -0.08839485091750919,
+        -265119153.02185738,     -0.02450615965231944,   0.0002684331017079073,
+        32079040427.68358,       -0.04733295911845742,   0.061381859083076085,
+        -25329.59126796951,      -0.0009567520620034965, -1553879364344.9932,
+        -2.1101077525869814e-08, -298421079729.5547,     0.03182394834273594,
+        22.201944843278916,      -33.35667991109125,     11496013.960449915,
+        -40652595.33210472,      3.8496066090328163,     2.5074042398147304e-08,
+        -0.02208724071782122,    -134211.37290639878,    0.17640433666616578,
+        4.463787499171126,       9.959669945399718,      129265976.35224283,
+        1.5865526187526546e-07,  -4746011.710555799,     -712048598925.0789,
+        582214206210.4034,       0.025236204812875362,   530078170.91147506,
+        -14.865307666195053,     1.6727994895185032e-05, -113386276.03121366,
+        -6.135827207137054,      10644945799901.145,     -100848907797.1582,
+        2.2404406961625282e-08,  1.315662618424494e-09,  -0.832190208349044,
+        -9.779323414999364,      -546522170658.2997};
+    double straightSum = 0.0;
+    DoubleDoubleSummation compensatedSum;
+    for (auto x : doubleValues) {
+        compensatedSum.addDouble(x.getDouble());
+        straightSum += x.getDouble();
+    }
+    ASSERT_NE(straightSum, compensatedSum.getDouble());
+
+    Value result = evaluateExpression("$add", doubleValues);
+    ASSERT_VALUE_EQ(result, Value(straightSum));
+    ASSERT_VALUE_NE(result, Value(compensatedSum.getDouble()));
+}
+TEST(ExpressionFLETest, BadInputs) {
+
+    auto expCtx = ExpressionContextForTest();
+    auto vps = expCtx.variablesParseState;
+    {
+        auto expr = fromjson("{$_internalFleEq: 12}");
+        ASSERT_THROWS_CODE(ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps),
+                           DBException,
+                           10065);
+    }
+}
+
+// Test we return true if it matches
+TEST(ExpressionFLETest, TestBinData) {
+    auto expCtx = ExpressionContextForTest();
+    auto vps = expCtx.variablesParseState;
+
+    {
+        auto expr = fromjson(R"({$_internalFleEq: {
+        field: {
+            "$binary": {
+                "base64":
+                "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+                    "subType": "6"
+            }
+        },
+        server: {
+            "$binary": {
+                "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                "subType": "6"
+            }
+        },
+        counter: {
+            "$numberLong": "3"
+        },
+        edc: {
+            "$binary": {
+                "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                "subType": "6"
+            }
+        }    } })");
+        auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+        ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(true));
+    }
+
+    // Negative: Use wrong server token
+    {
+        auto expr = fromjson(R"({$_internalFleEq: {
+        field: {
+            "$binary": {
+                "base64":
+                "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+                    "subType": "6"
+            }
+        },
+        server: {
+            "$binary": {
+                "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                "subType": "6"
+            }
+        },
+        counter: {
+            "$numberLong": "3"
+        },
+        edc: {
+            "$binary": {
+                "base64": "CEWSMQID7SFWYAUI3ZKSFKATKRYDQFNXXEOGAD5D4RSG",
+                "subType": "6"
+            }
+        }    } })");
+        auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+        ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(false));
+    }
+
+    // Negative: Use wrong edc token
+    {
+        auto expr = fromjson(R"({$_internalFleEq: {
+        field: {
+            "$binary": {
+                "base64":
+                "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+                    "subType": "6"
+            }
+        },
+        server: {
+            "$binary": {
+                "base64": "COUAC/ERLYAKKX6B0VZ1R3QODOQFFJQJD+XLGIPU4/PS",
+                "subType": "6"
+            }
+        },
+        counter: {
+            "$numberLong": "3"
+        },
+        edc: {
+            "$binary": {
+                "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                "subType": "6"
+            }
+        }    } })");
+        auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+        ASSERT_THROWS_CODE(
+            exprFle->evaluate({}, &expCtx.variables), DBException, ErrorCodes::Overflow);
+    }
+}
+
+TEST(ExpressionFLETest, TestBinData_ContentionFactor) {
+    auto expCtx = ExpressionContextForTest();
+    auto vps = expCtx.variablesParseState;
+
+    // Use the wrong contention factor - 0
+    {
+        auto expr = fromjson(R"({$_internalFleEq: {
+        field: {
+            "$binary": {
+                "base64":
+                "BxI0VngSNJh2EjQSNFZ4kBIQ5+Wa5+SZafJeRUDGdLNx+i2ADDkyV2qA90Xcve7FqltoDm1PllSSgUS4fYtw3XDjzoNZrFFg8LfG2wH0HYbLMswv681KJpmEw7+RXy4CcPVFgoRFt24N13p7jT+pqu2oQAHAoxYTy/TsiAyY4RnAMiXYGg3hWz4AO/WxHNSyq6B6kX5d7x/hrXvppsZDc2Pmhd+c5xmovlv5RPj7wnNld13kYcMluztjNswiCH05hM/kp2/P7kw30iVnbz0SZxn1FjjCug==",
+                    "subType": "6"
+            }
+        },
+        server: {
+            "$binary": {
+                "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                "subType": "6"
+            }
+        },
+        counter: {
+            "$numberLong": "0"
+        },
+        edc: {
+            "$binary": {
+                "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                "subType": "6"
+            }
+        }    } })");
+        auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+        ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(false));
+    }
+
+    // Use the right contention factor - 50
+    {
+        auto expr = fromjson(R"({$_internalFleEq: {
+        field: {
+            "$binary": {
+                "base64":
+"BxI0VngSNJh2EjQSNFZ4kBIQ5+Wa5+SZafJeRUDGdLNx+i2ADDkyV2qA90Xcve7FqltoDm1PllSSgUS4fYtw3XDjzoNZrFFg8LfG2wH0HYbLMswv681KJpmEw7+RXy4CcPVFgoRFt24N13p7jT+pqu2oQAHAoxYTy/TsiAyY4RnAMiXYGg3hWz4AO/WxHNSyq6B6kX5d7x/hrXvppsZDc2Pmhd+c5xmovlv5RPj7wnNld13kYcMluztjNswiCH05hM/kp2/P7kw30iVnbz0SZxn1FjjCug==",
+                    "subType": "6"
+            }
+        },
+        server: {
+            "$binary": {
+                "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                "subType": "6"
+            }
+        },
+        counter: {
+            "$numberLong": "50"
+        },
+        edc: {
+            "$binary": {
+                "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                "subType": "6"
+            }
+        }    } })");
+        auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+        ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(true));
+    }
+}
+
+TEST(ExpressionFLETest, TestBinData_RoundTrip) {
+    auto expCtx = ExpressionContextForTest();
+    auto vps = expCtx.variablesParseState;
+
+    auto expr = fromjson(R"({$_internalFleEq: {
+    field: {
+        "$binary": {
+            "base64":
+            "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+                "subType": "6"
+        }
+    },
+    server: {
+        "$binary": {
+            "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+            "subType": "6"
+        }
+    },
+    counter: {
+        "$numberLong": "3"
+    },
+    edc: {
+        "$binary": {
+            "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+            "subType": "6"
+        }
+    }    } })");
+    auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+    ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(true));
+
+    // Verify it round trips
+    auto value = exprFle->serialize(false);
+
+    auto roundTripExpr = fromjson(R"({$_internalFleEq: {
+    field: {
+        "$const" : { "$binary": {
+            "base64":
+            "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+                "subType": "6"
+        }}
+    },
+    edc: {
+        "$binary": {
+            "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+            "subType": "6"
+        }
+    },
+    counter: {
+        "$numberLong": "3"
+    },
+    server: {
+        "$binary": {
+            "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+            "subType": "6"
+        }
+    }
+        } })");
+
+
+    ASSERT_BSONOBJ_EQ(value.getDocument().toBson(), roundTripExpr);
+}
+
 }  // namespace ExpressionTests
diff --git a/src/mongo/db/pipeline/expression_visitor.h b/src/mongo/db/pipeline/expression_visitor.h
index 46ad3ee6295..6b7c4fc4cdd 100644
--- a/src/mongo/db/pipeline/expression_visitor.h
+++ b/src/mongo/db/pipeline/expression_visitor.h
@@ -153,6 +153,7 @@ class ExpressionHyperbolicSine;
 class ExpressionInternalFindSlice;
 class ExpressionInternalFindPositional;
 class ExpressionInternalFindElemMatch;
+class ExpressionInternalFLEEqual;
 class ExpressionInternalJsEmit;
 class ExpressionFunction;
 class ExpressionDegreesToRadians;
@@ -245,6 +246,7 @@ public:
     virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionLn>) = 0;
     virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionLog>) = 0;
     virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionLog10>) = 0;
+    virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionInternalFLEEqual>) = 0;
     virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionMap>) = 0;
     virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionMeta>) = 0;
     virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionMod>) = 0;
@@ -424,6 +426,7 @@ struct SelectiveConstExpressionVisitorBase : public ExpressionConstVisitor {
     void visit(const ExpressionLn*) override {}
     void visit(const ExpressionLog*) override {}
     void visit(const ExpressionLog10*) override {}
+    void visit(const ExpressionInternalFLEEqual*) override {}
     void visit(const ExpressionMap*) override {}
     void visit(const ExpressionMeta*) override {}
     void visit(const ExpressionMod*) override {}
diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h
index 96cf6426be3..976b344e4f9 100644
--- a/src/mongo/db/pipeline/pipeline.h
+++ b/src/mongo/db/pipeline/pipeline.h
@@ -291,11 +291,6 @@ public:
     static std::vector<Value> serializeContainer(
         const SourceContainer& container, boost::optional<ExplainOptions::Verbosity> = boost::none);
 
-    /**
-     * Serializes the pipeline into BSON for explain/debug logging purposes.
-     */
-    std::vector<BSONObj> serializeToBSONForDebug() const;
-
     // The initial source is special since it varies between mongos and mongod.
     void addInitialSource(boost::intrusive_ptr<DocumentSource> source);
 
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index 3169859560f..adbca1bf973 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -34,6 +34,7 @@
 #include "mongo/db/pipeline/pipeline_d.h"
 
 #include "mongo/base/exact_cast.h"
+#include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/bson/simple_bsonobj_comparator.h"
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/database.h"
@@ -84,9 +85,11 @@
 #include "mongo/db/query/query_feature_flags_gen.h"
 #include "mongo/db/query/query_knobs_gen.h"
 #include "mongo/db/query/query_planner.h"
+#include "mongo/db/query/query_planner_params.h"
 #include "mongo/db/query/sort_pattern.h"
 #include "mongo/db/query/stage_types.h"
 #include "mongo/db/s/collection_sharding_state.h"
+#include "mongo/db/server_options.h"
 #include "mongo/db/service_context.h"
 #include "mongo/db/stats/top.h"
 #include "mongo/db/storage/record_store.h"
@@ -115,13 +118,11 @@ namespace {
  * Group stages are extracted from the pipeline when all of the following conditions are met:
  *    - When the 'internalQueryForceClassicEngine' feature flag is 'false'.
  *    - When the 'internalQuerySlotBasedExecutionDisableGroupPushdown' query knob is 'false'.
- *    - When the 'featureFlagSBEGroupPushdown' feature flag is 'true'.
  *    - When the DocumentSourceGroup has 'doingMerge=false'.
  *
  * Lookup stages are extracted from the pipeline when all of the following conditions are met:
  *    - When the 'internalQueryForceClassicEngine' feature flag is 'false'.
  *    - When the 'internalQuerySlotBasedExecutionDisableLookupPushdown' query knob is 'false'.
- *    - When the 'featureFlagSBELookupPushdown' feature flag is 'true'.
  *    - The $lookup uses only the 'localField'/'foreignField' syntax (no pipelines).
  *    - The foreign collection is neither sharded nor a view.
  */
@@ -146,12 +147,6 @@ std::vector<std::unique_ptr<InnerPipelineStageInterface>> extractSbeCompatibleSt
 
     auto&& sources = pipeline->getSources();
 
-    const auto disallowGroupPushdown =
-        !(serverGlobalParams.featureCompatibility.isVersionInitialized() &&
-          feature_flags::gFeatureFlagSBEGroupPushdown.isEnabled(
-              serverGlobalParams.featureCompatibility)) ||
-        internalQuerySlotBasedExecutionDisableGroupPushdown.load();
-
     bool isMainCollectionSharded = false;
     if (const auto& mainColl = collections.getMainCollection()) {
         isMainCollectionSharded = mainColl.isSharded();
@@ -165,7 +160,6 @@ std::vector<std::unique_ptr<InnerPipelineStageInterface>> extractSbeCompatibleSt
     // sharded and which ones aren't. As such, if any secondary collection is a view or is sharded,
     // no $lookup will be eligible for pushdown.
     const bool disallowLookupPushdown =
-        !feature_flags::gFeatureFlagSBELookupPushdown.isEnabledAndIgnoreFCV() ||
         internalQuerySlotBasedExecutionDisableLookupPushdown.load() || isMainCollectionSharded ||
         collections.isAnySecondaryNamespaceAViewOrSharded();
 
@@ -175,7 +169,7 @@ std::vector<std::unique_ptr<InnerPipelineStageInterface>> extractSbeCompatibleSt
 
         // $group pushdown logic.
         if (auto groupStage = dynamic_cast<DocumentSourceGroup*>(itr->get())) {
-            if (disallowGroupPushdown) {
+            if (internalQuerySlotBasedExecutionDisableGroupPushdown.load()) {
                 break;
             }
 
@@ -244,7 +238,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe
     SkipThenLimit skipThenLimit,
     boost::optional<std::string> groupIdForDistinctScan,
     const AggregateCommandRequest* aggRequest,
-    const size_t plannerOpts,
+    const QueryPlannerParams& plannerOpts,
     const MatchExpressionParser::AllowedFeatureSet& matcherFeatures,
     Pipeline* pipeline) {
     auto findCommand = std::make_unique<FindCommandRequest>(nss);
@@ -313,7 +307,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe
         // index would produce one result for '1' and another for '2', which would be incorrect.
         auto distinctExecutor =
             getExecutorDistinct(&collections.getMainCollection(),
-                                plannerOpts | QueryPlannerParams::STRICT_DISTINCT_ONLY,
+                                plannerOpts.options | QueryPlannerParams::STRICT_DISTINCT_ONLY,
                                 &parsedDistinct);
         if (!distinctExecutor.isOK()) {
             return distinctExecutor.getStatus().withContext(
@@ -1111,6 +1105,41 @@ bool PipelineD::sortAndKeyPatternPartAgreeAndOnMeta(const BucketUnpacker& bucket
     return (keyPatternFieldPath.tail() == sortFieldPath.tail());
 }
 
+boost::optional<TraversalPreference> createTimeSeriesTraversalPreference(
+    DocumentSourceInternalUnpackBucket* unpack, DocumentSourceSort* sort) {
+    const auto metaField = unpack->bucketUnpacker().getMetaField();
+    BSONObjBuilder builder;
+    // Reverse the sort pattern so we can look for indexes that match.
+    for (const auto& sortPart : sort->getSortKeyPattern()) {
+        if (!sortPart.fieldPath) {
+            return boost::none;
+        }
+        const int reversedDirection = sortPart.isAscending ? -1 : 1;
+        const auto& path = sortPart.fieldPath->fullPath();
+        if (metaField.has_value() &&
+            (expression::isPathPrefixOf(*metaField, path) || *metaField == path)) {
+            std::string rewrittenField =
+                std::string{timeseries::kBucketMetaFieldName} + path.substr(metaField->size());
+            builder.append(rewrittenField, reversedDirection);
+        } else if (path == unpack->bucketUnpacker().getTimeField()) {
+            if (reversedDirection == 1) {
+                builder.append(unpack->bucketUnpacker().getMinField(path), reversedDirection);
+            } else {
+                builder.append(unpack->bucketUnpacker().getMaxField(path), reversedDirection);
+            }
+        } else {
+            // The field wasn't meta or time, so no direction preference should be made.
+            return boost::none;
+        }
+    }
+
+    TraversalPreference traversalPreference;
+    traversalPreference.sortPattern = builder.obj();
+    traversalPreference.clusterField = unpack->getMinTimeField();
+    traversalPreference.direction = -1;
+    return traversalPreference;
+}
+
 std::pair<PipelineD::AttachExecutorCallback, std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>>
 PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& collections,
                                           const NamespaceString& nss,
@@ -1166,6 +1195,19 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
         ? DepsTracker::kDefaultUnavailableMetadata & ~DepsTracker::kOnlyTextScore
         : DepsTracker::kDefaultUnavailableMetadata;
 
+    // If this is a query on a time-series collection then it may be eligible for a post-planning
+    // sort optimization. We check eligibility and perform the rewrite here.
+    auto [unpack, sort] = findUnpackThenSort(pipeline->_sources);
+    QueryPlannerParams plannerOpts;
+    if (serverGlobalParams.featureCompatibility.isVersionInitialized() &&
+        serverGlobalParams.featureCompatibility.isGreaterThanOrEqualTo(
+            multiversion::FeatureCompatibilityVersion::kVersion_6_0) &&
+        feature_flags::gFeatureFlagBucketUnpackWithSort.isEnabled(
+            serverGlobalParams.featureCompatibility) &&
+        unpack && sort) {
+        plannerOpts.traversalPreference = createTimeSeriesTraversalPreference(unpack, sort);
+    }
+
     // Create the PlanExecutor.
     bool shouldProduceEmptyDocs = false;
     auto exec = uassertStatusOK(prepareExecutor(expCtx,
@@ -1179,11 +1221,11 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
                                                 skipThenLimit,
                                                 aggRequest,
                                                 Pipeline::kAllowedMatcherFeatures,
-                                                &shouldProduceEmptyDocs));
+                                                &shouldProduceEmptyDocs,
+                                                std::move(plannerOpts)));
 
     // If this is a query on a time-series collection then it may be eligible for a post-planning
     // sort optimization. We check eligibility and perform the rewrite here.
-    auto [unpack, sort] = findUnpackThenSort(pipeline->_sources);
     if (serverGlobalParams.featureCompatibility.isVersionInitialized() &&
         serverGlobalParams.featureCompatibility.isGreaterThanOrEqualTo(
             multiversion::FeatureCompatibilityVersion::kVersion_6_0) &&
@@ -1192,7 +1234,6 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
         unpack && sort) {
         auto execImpl = dynamic_cast<PlanExecutorImpl*>(exec.get());
         if (execImpl) {
-
             // Get source stage
             PlanStage* rootStage = execImpl->getRootStage();
             while (rootStage &&
@@ -1205,28 +1246,27 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
                     case STAGE_SHARDING_FILTER:
                         rootStage = rootStage->child().get();
                         break;
-                    case STAGE_MULTI_PLAN:
-                        if (auto mps = static_cast<MultiPlanStage*>(rootStage)) {
-                            if (mps->bestPlanChosen() && mps->bestPlanIdx()) {
-                                rootStage = (mps->getChildren())[*(mps->bestPlanIdx())].get();
-                            } else {
-                                rootStage = nullptr;
-                                tasserted(6655801,
-                                          "Expected multiplanner to have selected a bestPlan.");
-                            }
+                    case STAGE_MULTI_PLAN: {
+                        auto mps = static_cast<MultiPlanStage*>(rootStage);
+                        if (mps->bestPlanChosen() && mps->bestPlanIdx()) {
+                            rootStage = (mps->getChildren())[*(mps->bestPlanIdx())].get();
+                        } else {
+                            rootStage = nullptr;
+                            tasserted(6655801,
+                                      "Expected multiplanner to have selected a bestPlan.");
                         }
                         break;
-                    case STAGE_CACHED_PLAN:
-                        if (auto cp = static_cast<CachedPlanStage*>(rootStage)) {
-                            if (cp->bestPlanChosen()) {
-                                rootStage = rootStage->child().get();
-                            } else {
-                                rootStage = nullptr;
-                                tasserted(6655802,
-                                          "Expected cached plan to have selected a bestPlan.");
-                            }
+                    }
+                    case STAGE_CACHED_PLAN: {
+                        auto cp = static_cast<CachedPlanStage*>(rootStage);
+                        if (cp->bestPlanChosen()) {
+                            rootStage = rootStage->child().get();
+                        } else {
+                            rootStage = nullptr;
+                            tasserted(6655802, "Expected cached plan to have selected a bestPlan.");
                         }
                         break;
+                    }
                     default:
                         rootStage = nullptr;
                 }
@@ -1358,8 +1398,9 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
                                 sort->getSortKeyPattern(),
                                 (indexOrderedByMinTime ? DocumentSourceSort::kMin
                                                        : DocumentSourceSort::kMax),
-                                ((indexOrderedByMinTime) ? unpack->getBucketMaxSpanSeconds()
-                                                         : -unpack->getBucketMaxSpanSeconds()) *
+                                static_cast<long long>((indexOrderedByMinTime)
+                                                           ? unpack->getBucketMaxSpanSeconds()
+                                                           : -unpack->getBucketMaxSpanSeconds()) *
                                     1000,
                                 sort->getLimit(),
                                 expCtx));
@@ -1399,7 +1440,9 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
                                     // This produces {$const: maxBucketSpanSeconds}
                                     make_intrusive<ExpressionConstant>(
                                         expCtx.get(),
-                                        Value{unpack->getBucketMaxSpanSeconds() * 1000}))),
+                                        Value{static_cast<long long>(
+                                                  unpack->getBucketMaxSpanSeconds()) *
+                                              1000}))),
                             expCtx);
                         pipeline->_sources.insert(
                             unpackIter,
@@ -1513,24 +1556,22 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep
     SkipThenLimit skipThenLimit,
     const AggregateCommandRequest* aggRequest,
     const MatchExpressionParser::AllowedFeatureSet& matcherFeatures,
-    bool* hasNoRequirements) {
+    bool* hasNoRequirements,
+    QueryPlannerParams plannerOpts) {
     invariant(hasNoRequirements);
 
-    // Any data returned from the inner executor must be owned.
-    size_t plannerOpts = QueryPlannerParams::DEFAULT;
-
     bool isChangeStream =
         pipeline->peekFront() && pipeline->peekFront()->constraints().isChangeStreamStage();
     if (isChangeStream) {
         invariant(expCtx->tailableMode == TailableModeEnum::kTailableAndAwaitData);
-        plannerOpts |= (QueryPlannerParams::TRACK_LATEST_OPLOG_TS |
-                        QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG);
+        plannerOpts.options |= (QueryPlannerParams::TRACK_LATEST_OPLOG_TS |
+                                QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG);
     }
 
     // The $_requestReshardingResumeToken parameter is only valid for an oplog scan.
     if (aggRequest && aggRequest->getRequestReshardingResumeToken()) {
-        plannerOpts |= (QueryPlannerParams::TRACK_LATEST_OPLOG_TS |
-                        QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG);
+        plannerOpts.options |= (QueryPlannerParams::TRACK_LATEST_OPLOG_TS |
+                                QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG);
     }
 
     // If there is a sort stage eligible for pushdown, serialize its SortPattern to a BSONObj. The
@@ -1570,7 +1611,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep
     if (*hasNoRequirements) {
         // This query might be eligible for count optimizations, since the remaining stages in the
         // pipeline don't actually need to read any data produced by the query execution layer.
-        plannerOpts |= QueryPlannerParams::IS_COUNT;
+        plannerOpts.options |= QueryPlannerParams::IS_COUNT;
     } else {
         // Build a BSONObj representing a projection eligible for pushdown. If there is an inclusion
         // projection at the front of the pipeline, it will be removed and handled by the PlanStage
@@ -1588,7 +1629,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep
         // top-k sort, which both sorts and limits.)
         bool allowExpressions = !sortStage && !skipThenLimit.getSkip() && !skipThenLimit.getLimit();
         projObj = buildProjectionForPushdown(deps, pipeline, allowExpressions);
-        plannerOpts |= QueryPlannerParams::RETURN_OWNED_DATA;
+        plannerOpts.options |= QueryPlannerParams::RETURN_OWNED_DATA;
     }
 
     if (rewrittenGroupStage) {
diff --git a/src/mongo/db/pipeline/pipeline_d.h b/src/mongo/db/pipeline/pipeline_d.h
index cd40bc33b8b..c109e75b1b8 100644
--- a/src/mongo/db/pipeline/pipeline_d.h
+++ b/src/mongo/db/pipeline/pipeline_d.h
@@ -30,6 +30,7 @@
 #pragma once
 
 #include "mongo/db/exec/bucket_unpacker.h"
+#include "mongo/db/query/query_planner_params.h"
 #include <boost/intrusive_ptr.hpp>
 #include <memory>
 
@@ -44,6 +45,7 @@
 #include "mongo/db/query/collation/collator_factory_interface.h"
 #include "mongo/db/query/multiple_collection_accessor.h"
 #include "mongo/db/query/plan_executor.h"
+#include "mongo/db/query/query_planner.h"
 
 namespace mongo {
 class Collection;
@@ -202,7 +204,8 @@ private:
         SkipThenLimit skipThenLimit,
         const AggregateCommandRequest* aggRequest,
         const MatchExpressionParser::AllowedFeatureSet& matcherFeatures,
-        bool* hasNoRequirements);
+        bool* hasNoRequirements,
+        QueryPlannerParams plannerOpts = QueryPlannerParams{});
 
     /**
      * Build a PlanExecutor and prepare a callback to create a special DocumentSourceGeoNearCursor
diff --git a/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp b/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp
index b1d0090aa56..2fecb18bebe 100644
--- a/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp
+++ b/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp
@@ -365,12 +365,6 @@ Status CommonMongodProcessInterface::appendQueryExecStats(OperationContext* opCt
                                                           const NamespaceString& nss,
                                                           BSONObjBuilder* builder) const {
     AutoGetCollectionForReadCommand collection(opCtx, nss);
-
-    if (!collection.getDb()) {
-        return {ErrorCodes::NamespaceNotFound,
-                str::stream() << "Database [" << nss.db().toString() << "] not found."};
-    }
-
     if (!collection) {
         return {ErrorCodes::NamespaceNotFound,
                 str::stream() << "Collection [" << nss.toString() << "] not found."};
@@ -398,9 +392,6 @@ BSONObj CommonMongodProcessInterface::getCollectionOptionsLocally(OperationConte
                                                                   const NamespaceString& nss) {
     AutoGetCollectionForReadCommand collection(opCtx, nss);
     BSONObj collectionOptions = {};
-    if (!collection.getDb()) {
-        return collectionOptions;
-    }
     if (!collection) {
         return collectionOptions;
     }
@@ -436,14 +427,8 @@ CommonMongodProcessInterface::attachCursorSourceToPipelineForLocalRead(Pipeline*
 
     // Reparse 'pipeline' to discover whether there are secondary namespaces that we need to lock
     // when constructing our query executor.
-    std::vector<NamespaceStringOrUUID> secondaryNamespaces = [&]() {
-        if (feature_flags::gFeatureFlagSBELookupPushdown.isEnabledAndIgnoreFCV()) {
-            auto lpp = LiteParsedPipeline(expCtx->ns, pipeline->serializeToBson());
-            return lpp.getForeignExecutionNamespaces();
-        } else {
-            return std::vector<NamespaceStringOrUUID>{};
-        }
-    }();
+    auto lpp = LiteParsedPipeline(expCtx->ns, pipeline->serializeToBson());
+    std::vector<NamespaceStringOrUUID> secondaryNamespaces = lpp.getForeignExecutionNamespaces();
 
     autoColl.emplace(expCtx->opCtx,
                      nsOrUUID,
@@ -574,7 +559,8 @@ std::vector<BSONObj> CommonMongodProcessInterface::getMatchingPlanCacheEntryStat
                                      collVersion = collQueryInfo.getPlanCacheInvalidatorVersion()](
                                         const sbe::PlanCacheKey& key) {
             // Only fetch plan cache entries with keys matching given UUID and collectionVersion.
-            return uuid == key.getCollectionUuid() && collVersion == key.getCollectionVersion();
+            return uuid == key.getMainCollectionState().uuid &&
+                collVersion == key.getMainCollectionState().version;
         };
 
         auto planCacheEntriesSBE =
@@ -883,8 +869,7 @@ boost::optional<Document> CommonMongodProcessInterface::lookupSingleDocumentLoca
     const Document& documentKey) {
     AutoGetCollectionForRead autoColl(expCtx->opCtx, nss);
     BSONObj document;
-    if (!Helpers::findById(
-            expCtx->opCtx, autoColl.getDb(), nss.ns(), documentKey.toBson(), document)) {
+    if (!Helpers::findById(expCtx->opCtx, nss.ns(), documentKey.toBson(), document)) {
         return boost::none;
     }
     return Document(document).getOwned();
diff --git a/src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp b/src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp
index 83b35a0c9fc..30de3fbfb1f 100644
--- a/src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp
+++ b/src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/pipeline/aggregation_context_fixture.h"
 #include "mongo/db/pipeline/process_interface/mongos_process_interface.h"
 #include "mongo/unittest/unittest.h"
@@ -63,7 +61,7 @@ public:
 TEST_F(MongosProcessInterfaceTest, FailsToEnsureFieldsUniqueIfTargetCollectionVersionIsSpecified) {
     auto expCtx = getExpCtx();
     auto targetCollectionVersion =
-        boost::make_optional(ChunkVersion(0, 0, OID::gen(), Timestamp(1, 1)));
+        boost::make_optional(ChunkVersion({OID::gen(), Timestamp(1, 1)}, {0, 0}));
     auto processInterface = makeProcessInterface();
 
     ASSERT_THROWS_CODE(processInterface->ensureFieldsUniqueOrResolveDocumentKey(
diff --git a/src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp b/src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp
index a0e9bd5e572..a8ca2a48896 100644
--- a/src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp
+++ b/src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/pipeline/aggregation_context_fixture.h"
 #include "mongo/db/pipeline/process_interface/standalone_process_interface.h"
 #include "mongo/unittest/unittest.h"
@@ -67,7 +65,7 @@ TEST_F(ProcessInterfaceStandaloneTest,
        FailsToEnsureFieldsUniqueIfTargetCollectionVersionIsSpecifiedOnMongos) {
     auto expCtx = getExpCtx();
     auto targetCollectionVersion =
-        boost::make_optional(ChunkVersion(0, 0, OID::gen(), Timestamp(1, 1)));
+        boost::make_optional(ChunkVersion({OID::gen(), Timestamp(1, 1)}, {0, 0}));
     auto processInterface = makeProcessInterface();
 
     // Test that 'targetCollectionVersion' is not accepted if not from mongos.
@@ -90,7 +88,7 @@ TEST_F(ProcessInterfaceStandaloneTest,
 TEST_F(ProcessInterfaceStandaloneTest, FailsToEnsureFieldsUniqueIfJoinFieldsAreNotSentFromMongos) {
     auto expCtx = getExpCtx();
     auto targetCollectionVersion =
-        boost::make_optional(ChunkVersion(0, 0, OID::gen(), Timestamp(1, 1)));
+        boost::make_optional(ChunkVersion({OID::gen(), Timestamp(1, 1)}, {0, 0}));
     auto processInterface = makeProcessInterface();
 
     expCtx->fromMongos = true;
diff --git a/src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp b/src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp
index bc17e9d0133..2df79a991d4 100644
--- a/src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp
+++ b/src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/catalog/collection_catalog.h"
 #include "mongo/db/pipeline/document_source_mock.h"
 #include "mongo/db/pipeline/sharded_agg_helpers.h"
@@ -39,7 +36,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
diff --git a/src/mongo/db/pipeline/sharded_union_test.cpp b/src/mongo/db/pipeline/sharded_union_test.cpp
index 79863fc7f14..a8d15b8dbbe 100644
--- a/src/mongo/db/pipeline/sharded_union_test.cpp
+++ b/src/mongo/db/pipeline/sharded_union_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/exec/document_value/document_value_test_util.h"
 #include "mongo/db/pipeline/document_source_group.h"
 #include "mongo/db/pipeline/document_source_match.h"
@@ -163,10 +161,12 @@ TEST_F(ShardedUnionTest, RetriesSubPipelineOnStaleConfigError) {
     onCommand([&](const executor::RemoteCommandRequest& request) {
         OID epoch{OID::gen()};
         Timestamp timestamp{1, 0};
-        return createErrorCursorResponse(Status{
-            StaleConfigInfo(
-                kTestAggregateNss, ChunkVersion(1, 0, epoch, timestamp), boost::none, ShardId{"0"}),
-            "Mock error: shard version mismatch"});
+        return createErrorCursorResponse(
+            Status{StaleConfigInfo(kTestAggregateNss,
+                                   ChunkVersion({epoch, timestamp}, {1, 0}),
+                                   boost::none,
+                                   ShardId{"0"}),
+                   "Mock error: shard version mismatch"});
     });
 
     // Mock the expected config server queries.
@@ -175,7 +175,7 @@ TEST_F(ShardedUnionTest, RetriesSubPipelineOnStaleConfigError) {
     const Timestamp timestamp(1, 1);
     const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
 
-    ChunkVersion version(1, 0, epoch, timestamp);
+    ChunkVersion version({epoch, timestamp}, {1, 0});
 
     ChunkType chunk1(cm.getUUID(),
                      {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)},
@@ -246,10 +246,12 @@ TEST_F(ShardedUnionTest, CorrectlySplitsSubPipelineIfRefreshedDistributionRequir
 
         OID epoch{OID::gen()};
         Timestamp timestamp{1, 0};
-        return createErrorCursorResponse(Status{
-            StaleConfigInfo(
-                kTestAggregateNss, ChunkVersion(1, 0, epoch, timestamp), boost::none, ShardId{"0"}),
-            "Mock error: shard version mismatch"});
+        return createErrorCursorResponse(
+            Status{StaleConfigInfo(kTestAggregateNss,
+                                   ChunkVersion({epoch, timestamp}, {1, 0}),
+                                   boost::none,
+                                   ShardId{"0"}),
+                   "Mock error: shard version mismatch"});
     });
 
     // Mock the expected config server queries. Update the distribution as if a chunk [0, 10] was
@@ -259,7 +261,7 @@ TEST_F(ShardedUnionTest, CorrectlySplitsSubPipelineIfRefreshedDistributionRequir
     const Timestamp timestamp(1, 1);
     const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
 
-    ChunkVersion version(1, 0, epoch, timestamp);
+    ChunkVersion version({epoch, timestamp}, {1, 0});
 
     ChunkType chunk1(cm.getUUID(),
                      {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)},
@@ -337,23 +339,27 @@ TEST_F(ShardedUnionTest, AvoidsSplittingSubPipelineIfRefreshedDistributionDoesNo
     Timestamp timestamp{1, 1};
 
     onCommand([&](const executor::RemoteCommandRequest& request) {
-        return createErrorCursorResponse(Status{
-            StaleConfigInfo(
-                kTestAggregateNss, ChunkVersion(1, 0, epoch, timestamp), boost::none, ShardId{"0"}),
-            "Mock error: shard version mismatch"});
+        return createErrorCursorResponse(
+            Status{StaleConfigInfo(kTestAggregateNss,
+                                   ChunkVersion({epoch, timestamp}, {1, 0}),
+                                   boost::none,
+                                   ShardId{"0"}),
+                   "Mock error: shard version mismatch"});
     });
     onCommand([&](const executor::RemoteCommandRequest& request) {
-        return createErrorCursorResponse(Status{
-            StaleConfigInfo(
-                kTestAggregateNss, ChunkVersion(1, 0, epoch, timestamp), boost::none, ShardId{"0"}),
-            "Mock error: shard version mismatch"});
+        return createErrorCursorResponse(
+            Status{StaleConfigInfo(kTestAggregateNss,
+                                   ChunkVersion({epoch, timestamp}, {1, 0}),
+                                   boost::none,
+                                   ShardId{"0"}),
+                   "Mock error: shard version mismatch"});
     });
 
     // Mock the expected config server queries. Update the distribution so that all chunks are on
     // the same shard.
     const UUID uuid = UUID::gen();
     const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
-    ChunkVersion version(1, 0, epoch, timestamp);
+    ChunkVersion version({epoch, timestamp}, {1, 0});
     ChunkType chunk1(
         cm.getUUID(),
         {shardKeyPattern.getKeyPattern().globalMin(), shardKeyPattern.getKeyPattern().globalMax()},
@@ -412,7 +418,7 @@ TEST_F(ShardedUnionTest, IncorporatesViewDefinitionAndRetriesWhenViewErrorReceiv
     const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
 
     const Timestamp timestamp(1, 1);
-    ChunkVersion version(1, 0, epoch, timestamp);
+    ChunkVersion version({epoch, timestamp}, {1, 0});
 
     ChunkType chunk1(cm.getUUID(),
                      {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)},
diff --git a/src/mongo/db/pipeline/visitors/document_source_visitor.h b/src/mongo/db/pipeline/visitors/document_source_visitor.h
index a0158147e38..1db827e9dfb 100644
--- a/src/mongo/db/pipeline/visitors/document_source_visitor.h
+++ b/src/mongo/db/pipeline/visitors/document_source_visitor.h
@@ -47,6 +47,7 @@ class DocumentSourceIndexStats;
 class DocumentSourceInternalInhibitOptimization;
 class DocumentSourceInternalShardFilter;
 class DocumentSourceInternalSplitPipeline;
+class DocumentSourceInternalUnpackBucket;
 class DocumentSourceLimit;
 class DocumentSourceListCachedAndActiveUsers;
 class DocumentSourceListLocalSessions;
@@ -98,6 +99,8 @@ public:
         tree_walker::MaybeConstPtr<IsConst, DocumentSourceInternalShardFilter> source) = 0;
     virtual void visit(
         tree_walker::MaybeConstPtr<IsConst, DocumentSourceInternalSplitPipeline> source) = 0;
+    virtual void visit(
+        tree_walker::MaybeConstPtr<IsConst, DocumentSourceInternalUnpackBucket> source) = 0;
     virtual void visit(tree_walker::MaybeConstPtr<IsConst, DocumentSourceLimit> source) = 0;
     virtual void visit(
         tree_walker::MaybeConstPtr<IsConst, DocumentSourceListCachedAndActiveUsers> source) = 0;
diff --git a/src/mongo/db/pipeline/visitors/document_source_walker.cpp b/src/mongo/db/pipeline/visitors/document_source_walker.cpp
index b0ea004cae9..0fb3dba9967 100644
--- a/src/mongo/db/pipeline/visitors/document_source_walker.cpp
+++ b/src/mongo/db/pipeline/visitors/document_source_walker.cpp
@@ -44,6 +44,7 @@
 #include "mongo/db/pipeline/document_source_internal_inhibit_optimization.h"
 #include "mongo/db/pipeline/document_source_internal_shard_filter.h"
 #include "mongo/db/pipeline/document_source_internal_split_pipeline.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
 #include "mongo/db/pipeline/document_source_limit.h"
 #include "mongo/db/pipeline/document_source_list_cached_and_active_users.h"
 #include "mongo/db/pipeline/document_source_list_local_sessions.h"
@@ -108,6 +109,7 @@ void DocumentSourceWalker::walk(const Pipeline& pipeline) {
                 visitHelper<DocumentSourceInternalInhibitOptimization>(ds) ||
                 visitHelper<DocumentSourceInternalShardFilter>(ds) ||
                 visitHelper<DocumentSourceInternalSplitPipeline>(ds) ||
+                visitHelper<DocumentSourceInternalUnpackBucket>(ds) ||
                 visitHelper<DocumentSourceLimit>(ds) ||
                 visitHelper<DocumentSourceListCachedAndActiveUsers>(ds) ||
                 visitHelper<DocumentSourceListLocalSessions>(ds) ||
diff --git a/src/mongo/db/process_health/config_server_health_observer.cpp b/src/mongo/db/process_health/config_server_health_observer.cpp
index 5e8a9ada390..bf011d28472 100644
--- a/src/mongo/db/process_health/config_server_health_observer.cpp
+++ b/src/mongo/db/process_health/config_server_health_observer.cpp
@@ -87,7 +87,7 @@ public:
      * previous one is filled, thus synchronization can be relaxed.
      */
     Future<HealthCheckStatus> periodicCheckImpl(
-        PeriodicHealthCheckContext&& periodicCheckContext) noexcept override;
+        PeriodicHealthCheckContext&& periodicCheckContext) override;
 
 private:
     // Collects the results of one check.
@@ -146,7 +146,7 @@ ConfigServerHealthObserver::ConfigServerHealthObserver(ServiceContext* svcCtx)
     : HealthObserverBase(svcCtx) {}
 
 Future<HealthCheckStatus> ConfigServerHealthObserver::periodicCheckImpl(
-    PeriodicHealthCheckContext&& periodicCheckContext) noexcept {
+    PeriodicHealthCheckContext&& periodicCheckContext) {
     // The chain is not capturing 'this' for the case the network call outlives the observer.
     return _checkImpl(std::move(periodicCheckContext))
         .then([type = getType()](CheckResult result) mutable -> Future<HealthCheckStatus> {
diff --git a/src/mongo/db/process_health/dns_health_observer.cpp b/src/mongo/db/process_health/dns_health_observer.cpp
index 6f41e5e2785..ff6611d10ac 100644
--- a/src/mongo/db/process_health/dns_health_observer.cpp
+++ b/src/mongo/db/process_health/dns_health_observer.cpp
@@ -47,9 +47,10 @@ namespace process_health {
 MONGO_FAIL_POINT_DEFINE(dnsHealthObserverFp);
 
 Future<HealthCheckStatus> DnsHealthObserver::periodicCheckImpl(
-    PeriodicHealthCheckContext&& periodicCheckContext) noexcept {
+    PeriodicHealthCheckContext&& periodicCheckContext) {
     LOGV2_DEBUG(5938401, 2, "DNS health observer executing");
 
+
     auto makeFailedHealthCheckFuture = [this](const Status& status) {
         return Future<HealthCheckStatus>::makeReady(
             makeSimpleFailedStatus(Severity::kFailure, {status}));
@@ -101,13 +102,17 @@ Future<HealthCheckStatus> DnsHealthObserver::periodicCheckImpl(
     auto status = periodicCheckContext.taskExecutor->scheduleWork(
         [this, servers, promise = std::move(completionPf.promise)](
             const executor::TaskExecutor::CallbackArgs& cbArgs) mutable {
-            auto statusWith =
-                getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward);
-            if (statusWith.isOK() && !statusWith.getValue().empty()) {
-                promise.emplaceValue(makeHealthyStatus());
-            } else {
-                promise.emplaceValue(
-                    makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()}));
+            try {
+                auto statusWith =
+                    getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward);
+                if (statusWith.isOK() && !statusWith.getValue().empty()) {
+                    promise.emplaceValue(makeHealthyStatus());
+                } else {
+                    promise.emplaceValue(
+                        makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()}));
+                }
+            } catch (const DBException& e) {
+                promise.emplaceValue(makeSimpleFailedStatus(Severity::kFailure, {e.toStatus()}));
             }
         });
 
diff --git a/src/mongo/db/process_health/dns_health_observer.h b/src/mongo/db/process_health/dns_health_observer.h
index 2640c9024f7..11f54ad01bd 100644
--- a/src/mongo/db/process_health/dns_health_observer.h
+++ b/src/mongo/db/process_health/dns_health_observer.h
@@ -56,7 +56,7 @@ protected:
     }
 
     Future<HealthCheckStatus> periodicCheckImpl(
-        PeriodicHealthCheckContext&& periodicCheckContext) noexcept override;
+        PeriodicHealthCheckContext&& periodicCheckContext) override;
 
 private:
     mutable PseudoRandom _random;
diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp
index 243a9cf7937..01d90bf3db6 100644
--- a/src/mongo/db/process_health/health_observer_base.cpp
+++ b/src/mongo/db/process_health/health_observer_base.cpp
@@ -56,25 +56,38 @@ SharedSemiFuture<HealthCheckStatus> HealthObserverBase::periodicCheck(
         _currentlyRunningHealthCheck = true;
     }
 
+    Future<HealthCheckStatus> healthCheckResult;
+
+    try {
+        healthCheckResult = periodicCheckImpl({token, taskExecutor});
+    } catch (const DBException& e) {
+        LOGV2_DEBUG(6728001,
+                    2,
+                    "Health observer failed due to an exception",
+                    "observerType"_attr = getType(),
+                    "errorCode"_attr = e.code(),
+                    "reason"_attr = e.reason());
+
+        healthCheckResult = makeSimpleFailedStatus(Severity::kFailure, {e.toStatus()});
+    }
+
     _deadlineFuture = DeadlineFuture<HealthCheckStatus>::create(
         taskExecutor,
-        periodicCheckImpl({token, taskExecutor})
-            .onCompletion([this](StatusWith<HealthCheckStatus> status) {
-                const auto now = _svcCtx->getPreciseClockSource()->now();
-
-                auto lk = stdx::lock_guard(_mutex);
-                ++_completedChecksCount;
-                invariant(_currentlyRunningHealthCheck);
-                _currentlyRunningHealthCheck = false;
-                _lastTimeCheckCompleted = now;
-
-                if (!status.isOK() ||
-                    !HealthCheckStatus::isResolved(status.getValue().getSeverity())) {
-                    ++_completedChecksWithFaultCount;
-                }
-
-                return status;
-            }),
+        std::move(healthCheckResult).onCompletion([this](StatusWith<HealthCheckStatus> status) {
+            const auto now = _svcCtx->getPreciseClockSource()->now();
+
+            auto lk = stdx::lock_guard(_mutex);
+            ++_completedChecksCount;
+            invariant(_currentlyRunningHealthCheck);
+            _currentlyRunningHealthCheck = false;
+            _lastTimeCheckCompleted = now;
+
+            if (!status.isOK() || !HealthCheckStatus::isResolved(status.getValue().getSeverity())) {
+                ++_completedChecksWithFaultCount;
+            }
+
+            return status;
+        }),
         getObserverTimeout());
 
     return _deadlineFuture->get();
diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h
index 18f24eb8540..ef7900f640f 100644
--- a/src/mongo/db/process_health/health_observer_base.h
+++ b/src/mongo/db/process_health/health_observer_base.h
@@ -91,7 +91,7 @@ protected:
      * @return The result of a complete health check
      */
     virtual Future<HealthCheckStatus> periodicCheckImpl(
-        PeriodicHealthCheckContext&& periodicCheckContext) noexcept = 0;
+        PeriodicHealthCheckContext&& periodicCheckContext) = 0;
 
     HealthObserverLivenessStats getStatsLocked(WithLock) const;
 
diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h
index e7a500bdf8c..b44fd35a368 100644
--- a/src/mongo/db/process_health/health_observer_mock.h
+++ b/src/mongo/db/process_health/health_observer_mock.h
@@ -40,8 +40,11 @@ namespace mongo {
 namespace process_health {
 
 /**
- * Mocked health observer is using a test callback to fetch the next
- * fault severity value every time the periodic check is invoked.
+ * Mocked health observer has two modes of operation (depending on constructor called):
+ *   1.  Passing a callback that runs on an executor and returns a severity
+ *   2.  Passing an implementation of periodicCheckImpl
+ *
+ * See unit test HealthCheckThrowingExceptionMakesFailedStatus for an example of the second mode.
  */
 class HealthObserverMock : public HealthObserverBase {
 public:
@@ -54,6 +57,16 @@ public:
           _getSeverityCallback(getSeverityCallback),
           _observerTimeout(observerTimeout) {}
 
+    HealthObserverMock(
+        FaultFacetType mockType,
+        ServiceContext* svcCtx,
+        std::function<Future<HealthCheckStatus>(PeriodicHealthCheckContext&&)> periodicCheckImpl,
+        Milliseconds observerTimeout)
+        : HealthObserverBase(svcCtx),
+          _mockType(mockType),
+          _periodicCheckImpl(periodicCheckImpl),
+          _observerTimeout(observerTimeout) {}
+
     virtual ~HealthObserverMock() = default;
 
     bool isConfigured() const override {
@@ -70,7 +83,11 @@ protected:
     }
 
     Future<HealthCheckStatus> periodicCheckImpl(
-        PeriodicHealthCheckContext&& periodicCheckContext) noexcept override {
+        PeriodicHealthCheckContext&& periodicCheckContext) override {
+
+        if (_periodicCheckImpl.has_value()) {
+            return (*_periodicCheckImpl)(std::move(periodicCheckContext));
+        }
 
         auto completionPf = makePromiseFuture<HealthCheckStatus>();
 
@@ -99,6 +116,8 @@ protected:
 private:
     const FaultFacetType _mockType;
     std::function<Severity()> _getSeverityCallback;
+    boost::optional<std::function<Future<HealthCheckStatus>(PeriodicHealthCheckContext&&)>>
+        _periodicCheckImpl;
     const Milliseconds _observerTimeout;
 };
 
diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp
index 66f5c8a6e99..cd79db11ebc 100644
--- a/src/mongo/db/process_health/health_observer_test.cpp
+++ b/src/mongo/db/process_health/health_observer_test.cpp
@@ -46,6 +46,7 @@ namespace process_health {
 
 // Using the common fault manager test suite.
 using test::FaultManagerTest;
+using PeriodicHealthCheckContext = HealthObserverBase::PeriodicHealthCheckContext;
 
 namespace {
 // Tests that the mock observer is registered properly.
@@ -254,6 +255,49 @@ TEST_F(FaultManagerTest, SchedulingDuplicateHealthChecksRejected) {
     LOGV2(6418205, "Total completed checks count", "count"_attr = totalCompletedCount);
 }
 
+TEST_F(FaultManagerTest, HealthCheckThrowingExceptionMakesFailedStatus) {
+    resetManager(std::make_unique<FaultManagerConfig>());
+
+    FaultFacetType facetType = FaultFacetType::kMock1;
+    AtomicWord<bool> shouldThrow{false};
+
+    std::string logMsg = "Failed due to exception";
+
+    auto periodicCheckImpl =
+        [facetType, &shouldThrow, logMsg](
+            PeriodicHealthCheckContext&& periodicHealthCheckCtx) -> Future<HealthCheckStatus> {
+        if (shouldThrow.load()) {
+            uasserted(ErrorCodes::InternalError, logMsg);
+        }
+        auto completionPf = makePromiseFuture<HealthCheckStatus>();
+        completionPf.promise.emplaceValue(HealthCheckStatus(facetType, Severity::kOk, "success"));
+        return std::move(completionPf.future);
+    };
+
+    HealthObserverRegistration::registerObserverFactory(
+        [facetType, periodicCheckImpl](ServiceContext* svcCtx) {
+            return std::make_unique<HealthObserverMock>(
+                facetType, svcCtx, periodicCheckImpl, Milliseconds(Seconds(30)));
+        });
+
+    assertSoon([this] { return (manager().getFaultState() == FaultState::kStartupCheck); });
+
+    auto initialHealthCheckFuture = manager().startPeriodicHealthChecks();
+    assertSoon([this] { return (manager().getFaultState() == FaultState::kOk); });
+
+    auto observer = manager().getHealthObserversTest().front();
+    ASSERT_EQ(observer->getStats().completedChecksWithFaultCount, 0);
+
+    shouldThrow.store(true);
+    assertSoon([this] { return (manager().getFaultState() == FaultState::kTransientFault); });
+
+    ASSERT_EQ(manager().currentFault()->toBSON()["facets"]["mock1"]["description"].String(),
+              "InternalError: Failed due to exception ");
+
+    ASSERT_GTE(observer->getStats().completedChecksWithFaultCount, 1);
+    resetManager();
+}
+
 }  // namespace
 }  // namespace process_health
 }  // namespace mongo
diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp
index 70572c48851..01224117baa 100644
--- a/src/mongo/db/process_health/test_health_observer.cpp
+++ b/src/mongo/db/process_health/test_health_observer.cpp
@@ -43,7 +43,7 @@ MONGO_FAIL_POINT_DEFINE(testHealthObserver);
 MONGO_FAIL_POINT_DEFINE(badConfigTestHealthObserver);
 MONGO_FAIL_POINT_DEFINE(statusFailureTestHealthObserver);
 Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl(
-    PeriodicHealthCheckContext&& periodicCheckContext) noexcept {
+    PeriodicHealthCheckContext&& periodicCheckContext) {
     LOGV2_DEBUG(5936801, 2, "Test health observer executing");
     hangTestHealthObserver.pauseWhileSet();
 
diff --git a/src/mongo/db/process_health/test_health_observer.h b/src/mongo/db/process_health/test_health_observer.h
index 428d57f8e9d..0c23df7fb42 100644
--- a/src/mongo/db/process_health/test_health_observer.h
+++ b/src/mongo/db/process_health/test_health_observer.h
@@ -50,7 +50,7 @@ protected:
     }
 
     Future<HealthCheckStatus> periodicCheckImpl(
-        PeriodicHealthCheckContext&& periodicCheckContext) noexcept override;
+        PeriodicHealthCheckContext&& periodicCheckContext) override;
 
     bool isConfigured() const override;
 };
diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp
index c4d2de8fcb0..6449c5241fc 100644
--- a/src/mongo/db/query/canonical_query.cpp
+++ b/src/mongo/db/query/canonical_query.cpp
@@ -538,10 +538,11 @@ std::string CanonicalQuery::toStringShort() const {
 }
 
 CanonicalQuery::QueryShapeString CanonicalQuery::encodeKey() const {
-    // TODO SERVER-61507: remove '_pipeline.empty()' check. Canonical queries with pushed down
-    // $group/$lookup stages are not SBE-compatible until SERVER-61507 is complete.
+    // TODO SERVER-61507: remove 'canUseSbePlanCache' check. Canonical queries with pushed
+    // down $group stages are not compatible with the SBE plan cache until SERVER-61507 is complete.
     return (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
-            !_forceClassicEngine && _sbeCompatible && _pipeline.empty())
+            !_forceClassicEngine && _sbeCompatible &&
+            canonical_query_encoder::canUseSbePlanCache(*this))
         ? canonical_query_encoder::encodeSBE(*this)
         : canonical_query_encoder::encode(*this);
 }
diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp
index 2013c8a635e..11b1a99479a 100644
--- a/src/mongo/db/query/canonical_query_encoder.cpp
+++ b/src/mongo/db/query/canonical_query_encoder.cpp
@@ -40,6 +40,7 @@
 #include "mongo/db/matcher/expression_text_noop.h"
 #include "mongo/db/matcher/expression_where.h"
 #include "mongo/db/matcher/expression_where_noop.h"
+#include "mongo/db/pipeline/document_source_lookup.h"
 #include "mongo/db/query/analyze_regex.h"
 #include "mongo/db/query/projection.h"
 #include "mongo/db/query/query_feature_flags_gen.h"
@@ -86,6 +87,7 @@ const char kEncodeProjectionRequirementSeparator = '-';
 const char kEncodeRegexFlagsSeparator = '/';
 const char kEncodeSortSection = '~';
 const char kEncodeEngineSection = '@';
+const char kEncodePipelineSection = '^';
 
 // These special bytes are used in the encoding of auto-parameterized match expressions in the SBE
 // plan cache key.
@@ -135,6 +137,7 @@ void encodeUserString(StringData s, BuilderType* builder) {
             case kEncodeEngineSection:
             case kEncodeParamMarker:
             case kEncodeConstantLiteralMarker:
+            case kEncodePipelineSection:
             case '\\':
                 if constexpr (hasAppendChar<BuilderType>) {
                     builder->appendChar('\\');
@@ -431,6 +434,26 @@ void encodeCollation(const CollatorInterface* collation, StringBuilder* keyBuild
     // not be stable between versions.
 }
 
+void encodePipeline(const std::vector<std::unique_ptr<InnerPipelineStageInterface>>& pipeline,
+                    BufBuilder* bufBuilder) {
+    bufBuilder->appendChar(kEncodePipelineSection);
+    for (auto& stage : pipeline) {
+        std::vector<Value> serializedArray;
+        if (auto lookupStage = dynamic_cast<DocumentSourceLookUp*>(stage->documentSource())) {
+            lookupStage->serializeToArray(serializedArray, boost::none);
+            tassert(6443201,
+                    "$lookup stage isn't serialized to a single bson object",
+                    serializedArray.size() == 1 && serializedArray[0].getType() == Object);
+            const auto bson = serializedArray[0].getDocument().toBson();
+            bufBuilder->appendBuf(bson.objdata(), bson.objsize());
+        } else {
+            tasserted(6443200,
+                      str::stream() << "Pipeline stage cannot be encoded in plan cache key: "
+                                    << stage->documentSource()->getSourceName());
+        }
+    }
+}
+
 template <class RegexIterator>
 void encodeRegexFlagsForMatch(RegexIterator first, RegexIterator last, StringBuilder* keyBuilder) {
     // We sort the flags, so that queries with the same regex flags in different orders will have
@@ -1085,6 +1108,8 @@ std::string encodeSBE(const CanonicalQuery& cq) {
 
     encodeFindCommandRequest(cq.getFindCommandRequest(), &bufBuilder);
 
+    encodePipeline(cq.pipeline(), &bufBuilder);
+
     return base64::encode(StringData(bufBuilder.buf(), bufBuilder.len()));
 }
 
@@ -1106,5 +1131,14 @@ CanonicalQuery::IndexFilterKey encodeForIndexFilters(const CanonicalQuery& cq) {
 uint32_t computeHash(StringData key) {
     return SimpleStringDataComparator::kInstance.hash(key);
 }
+
+bool canUseSbePlanCache(const CanonicalQuery& cq) {
+    for (auto& stage : cq.pipeline()) {
+        if (StringData{stage->documentSource()->getSourceName()} != "$lookup") {
+            return false;
+        }
+    }
+    return true;
+}
 }  // namespace canonical_query_encoder
 }  // namespace mongo
diff --git a/src/mongo/db/query/canonical_query_encoder.h b/src/mongo/db/query/canonical_query_encoder.h
index 3164ddbec67..4bfbb68c2f2 100644
--- a/src/mongo/db/query/canonical_query_encoder.h
+++ b/src/mongo/db/query/canonical_query_encoder.h
@@ -68,5 +68,11 @@ CanonicalQuery::IndexFilterKey encodeForIndexFilters(const CanonicalQuery& cq);
  * Returns a hash of the given key (produced from either a QueryShapeString or a PlanCacheKey).
  */
 uint32_t computeHash(StringData key);
+
+/**
+ * Returns whether a plan generated from this query can be stored in the SBE plan cache.
+ */
+bool canUseSbePlanCache(const CanonicalQuery& cq);
+
 }  // namespace canonical_query_encoder
 }  // namespace mongo
diff --git a/src/mongo/db/query/canonical_query_encoder_test.cpp b/src/mongo/db/query/canonical_query_encoder_test.cpp
index 486b4f2d14f..3394e048be8 100644
--- a/src/mongo/db/query/canonical_query_encoder_test.cpp
+++ b/src/mongo/db/query/canonical_query_encoder_test.cpp
@@ -29,10 +29,11 @@
 
 #include "mongo/db/query/canonical_query_encoder.h"
 
-#include "mongo/db/catalog/collection_mock.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/db/json.h"
+#include "mongo/db/pipeline/document_source.h"
 #include "mongo/db/pipeline/expression_context_for_test.h"
+#include "mongo/db/pipeline/inner_pipeline_stage_impl.h"
 #include "mongo/db/query/canonical_query.h"
 #include "mongo/db/query/plan_cache_key_factory.h"
 #include "mongo/db/query/query_test_service_context.h"
@@ -46,10 +47,17 @@ namespace {
 using std::unique_ptr;
 
 static const NamespaceString nss("testdb.testcoll");
+static const NamespaceString foreignNss("testdb.foreigncoll");
 
-PlanCacheKey makeKey(const CanonicalQuery& cq) {
-    CollectionMock coll(nss);
-    return plan_cache_key_factory::make<PlanCacheKey>(cq, &coll);
+std::vector<std::unique_ptr<InnerPipelineStageInterface>> parsePipeline(
+    const boost::intrusive_ptr<ExpressionContext> expCtx, const std::vector<BSONObj>& rawPipeline) {
+    auto pipeline = Pipeline::parse(rawPipeline, expCtx);
+
+    std::vector<std::unique_ptr<InnerPipelineStageInterface>> stages;
+    for (auto&& source : pipeline->getSources()) {
+        stages.emplace_back(std::make_unique<InnerPipelineStageImpl>(source));
+    }
+    return stages;
 }
 
 /**
@@ -59,7 +67,8 @@ unique_ptr<CanonicalQuery> canonicalize(BSONObj query,
                                         BSONObj sort,
                                         BSONObj proj,
                                         BSONObj collation,
-                                        std::unique_ptr<FindCommandRequest> findCommand = nullptr) {
+                                        std::unique_ptr<FindCommandRequest> findCommand = nullptr,
+                                        std::vector<BSONObj> pipelineObj = {}) {
     QueryTestServiceContext serviceContext;
     auto opCtx = serviceContext.makeOperationContext();
 
@@ -70,14 +79,26 @@ unique_ptr<CanonicalQuery> canonicalize(BSONObj query,
     findCommand->setSort(sort.getOwned());
     findCommand->setProjection(proj.getOwned());
     findCommand->setCollation(collation.getOwned());
-    const boost::intrusive_ptr<ExpressionContext> expCtx;
+
+    const auto expCtx = make_intrusive<ExpressionContextForTest>(opCtx.get(), nss);
+    expCtx->addResolvedNamespaces({foreignNss});
+    if (!findCommand->getCollation().isEmpty()) {
+        auto statusWithCollator = CollatorFactoryInterface::get(opCtx->getServiceContext())
+                                      ->makeFromBSON(findCommand->getCollation());
+        ASSERT_OK(statusWithCollator.getStatus());
+        expCtx->setCollator(std::move(statusWithCollator.getValue()));
+    }
+    auto pipeline = parsePipeline(expCtx, pipelineObj);
+
     auto statusWithCQ =
         CanonicalQuery::canonicalize(opCtx.get(),
                                      std::move(findCommand),
                                      false,
                                      expCtx,
                                      ExtensionsCallbackNoop(),
-                                     MatchExpressionParser::kAllowAllSpecialFeatures);
+                                     MatchExpressionParser::kAllowAllSpecialFeatures,
+                                     ProjectionPolicies::findProjectionPolicies(),
+                                     std::move(pipeline));
     ASSERT_OK(statusWithCQ.getStatus());
     return std::move(statusWithCQ.getValue());
 }
@@ -115,13 +136,14 @@ void testComputeSBEKey(BSONObj query,
                        BSONObj sort,
                        BSONObj proj,
                        std::string expectedStr,
-                       std::unique_ptr<FindCommandRequest> findCommand = nullptr) {
+                       std::unique_ptr<FindCommandRequest> findCommand = nullptr,
+                       std::vector<BSONObj> pipelineObj = {}) {
     BSONObj collation;
     unique_ptr<CanonicalQuery> cq(
-        canonicalize(query, sort, proj, collation, std::move(findCommand)));
+        canonicalize(query, sort, proj, collation, std::move(findCommand), std::move(pipelineObj)));
     cq->setSbeCompatible(true);
-    auto key = makeKey(*cq);
-    ASSERT_EQUALS(key.toString(), expectedStr);
+    const auto key = canonical_query_encoder::encodeSBE(*cq);
+    ASSERT_EQUALS(key, expectedStr);
 }
 
 void testComputeKey(const char* queryStr,
@@ -135,12 +157,14 @@ void testComputeSBEKey(const char* queryStr,
                        const char* sortStr,
                        const char* projStr,
                        std::string expectedStr,
-                       std::unique_ptr<FindCommandRequest> findCommand = nullptr) {
+                       std::unique_ptr<FindCommandRequest> findCommand = nullptr,
+                       std::vector<BSONObj> pipelineObj = {}) {
     testComputeSBEKey(fromjson(queryStr),
                       fromjson(sortStr),
                       fromjson(projStr),
                       expectedStr,
-                      std::move(findCommand));
+                      std::move(findCommand),
+                      std::move(pipelineObj));
 }
 
 TEST(CanonicalQueryEncoderTest, ComputeKey) {
@@ -262,8 +286,6 @@ TEST(CanonicalQueryEncoderTest, ComputeKeyEscaped) {
 // Cache keys for $geoWithin queries with legacy and GeoJSON coordinates should
 // not be the same.
 TEST(CanonicalQueryEncoderTest, ComputeKeyGeoWithin) {
-    PlanCache planCache(5000);
-
     // Legacy coordinates.
     unique_ptr<CanonicalQuery> cqLegacy(
         canonicalize("{a: {$geoWithin: "
@@ -273,7 +295,8 @@ TEST(CanonicalQueryEncoderTest, ComputeKeyGeoWithin) {
         canonicalize("{a: {$geoWithin: "
                      "{$geometry: {type: 'Polygon', coordinates: "
                      "[[[0, 0], [0, 90], [90, 0], [0, 0]]]}}}}"));
-    ASSERT_NOT_EQUALS(makeKey(*cqLegacy), makeKey(*cqNew));
+    ASSERT_NOT_EQUALS(canonical_query_encoder::encode(*cqLegacy),
+                      canonical_query_encoder::encode(*cqNew));
 }
 
 // GEO_NEAR cache keys should include information on geometry and CRS in addition
@@ -395,85 +418,87 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
     // SBE must be enabled in order to generate SBE plan cache keys.
     RAIIServerParameterControllerForTest controllerSBE("internalQueryForceClassicEngine", false);
 
-    // TODO SERVER-61314: Remove when featureFlagSbePlanCache is removed.
     RAIIServerParameterControllerForTest controllerSBEPlanCache("featureFlagSbePlanCache", true);
 
-    testComputeSBEKey("{}", "{}", "{}", "YW4ABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+    testComputeSBEKey("{}", "{}", "{}", "YW4ABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
     testComputeSBEKey(
         "{$or: [{a: 1}, {b: 2}]}",
         "{}",
         "{}",
-        "b3IAW2VxAGE/AAAAACxlcQBiPwEAAABdBQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+        "b3IAW2VxAGE/AAAAACxlcQBiPwEAAABdBQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
     testComputeSBEKey(
-        "{a: 1}", "{}", "{}", "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+        "{a: 1}", "{}", "{}", "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
     testComputeSBEKey(
-        "{b: 1}", "{}", "{}", "ZXEAYj8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+        "{b: 1}", "{}", "{}", "ZXEAYj8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
     testComputeSBEKey(
         "{a: 1, b: 1, c: 1}",
         "{}",
         "{}",
-        "YW4AW2VxAGE/AAAAACxlcQBiPwEAAAAsZXEAYz8CAAAAXQUAAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+        "YW4AW2VxAGE/"
+        "AAAAACxlcQBiPwEAAAAsZXEAYz8CAAAAXQUAAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
 
     // With sort
-    testComputeSBEKey("{}", "{a: 1}", "{}", "YW4ABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+    testComputeSBEKey("{}", "{a: 1}", "{}", "YW4ABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
     testComputeSBEKey(
-        "{}", "{a: -1}", "{}", "YW4ABQAAAAB+ZGEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+        "{}", "{a: -1}", "{}", "YW4ABQAAAAB+ZGEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
     testComputeSBEKey(
-        "{a: 1}", "{a: 1}", "{}", "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+        "{a: 1}", "{a: 1}", "{}", "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
 
     // With projection
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{a: 1}",
-                      "ZXEAYT8AAAAADAAAABBhAAEAAAAAfmFhAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
-    testComputeSBEKey(
-        "{}", "{a: 1}", "{a: 1}", "YW4ADAAAABBhAAEAAAAAfmFhAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+                      "ZXEAYT8AAAAADAAAABBhAAEAAAAAfmFhAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
+    testComputeSBEKey("{}",
+                      "{a: 1}",
+                      "{a: 1}",
+                      "YW4ADAAAABBhAAEAAAAAfmFhAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
     testComputeSBEKey("{}",
                       "{a: 1}",
                       "{a: 1, b: [{$const: 1}]}",
                       "YW4AKAAAABBhAAEAAAAEYgAZAAAAAzAAEQAAABAkY29uc3QAAQAAAAAAAH5hYQAAAAAAAAAAbm5u"
-                      "bgUAAAAABQAAAAAFAAAAAA==");
+                      "bgUAAAAABQAAAAAFAAAAAF4=");
     testComputeSBEKey(
-        "{}", "{}", "{a: 1}", "YW4ADAAAABBhAAEAAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+        "{}", "{}", "{a: 1}", "YW4ADAAAABBhAAEAAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
     testComputeSBEKey(
-        "{}", "{}", "{a: true}", "YW4ACQAAAAhhAAEAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+        "{}", "{}", "{a: true}", "YW4ACQAAAAhhAAEAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
     testComputeSBEKey(
-        "{}", "{}", "{a: false}", "YW4ACQAAAAhhAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+        "{}", "{}", "{a: false}", "YW4ACQAAAAhhAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
 
     // With FindCommandRequest
     auto findCommand = std::make_unique<FindCommandRequest>(nss);
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=",
+                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe",
                       std::move(findCommand));
     findCommand = std::make_unique<FindCommandRequest>(nss);
     findCommand->setAllowDiskUse(true);
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAHRubm4FAAAAAAUAAAAABQAAAAA=",
+                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAHRubm4FAAAAAAUAAAAABQAAAABe",
                       std::move(findCommand));
     findCommand = std::make_unique<FindCommandRequest>(nss);
     findCommand->setAllowDiskUse(false);
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAGZubm4FAAAAAAUAAAAABQAAAAA=",
+                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAGZubm4FAAAAAAUAAAAABQAAAABe",
                       std::move(findCommand));
     findCommand = std::make_unique<FindCommandRequest>(nss);
     findCommand->setReturnKey(true);
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG50bm4FAAAAAAUAAAAABQAAAAA=",
+                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG50bm4FAAAAAAUAAAAABQAAAABe",
                       std::move(findCommand));
     findCommand = std::make_unique<FindCommandRequest>(nss);
     findCommand->setRequestResumeToken(false);
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5uZm4FAAAAAAUAAAAABQAAAAA=",
+                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5uZm4FAAAAAAUAAAAABQAAAABe",
                       std::move(findCommand));
 
     findCommand = std::make_unique<FindCommandRequest>(nss);
@@ -481,7 +506,7 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEKAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA",
+                      "ZXEAYT8AAAAABQAAAAB+YWEKAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==",
                       std::move(findCommand));
 
     findCommand = std::make_unique<FindCommandRequest>(nss);
@@ -489,7 +514,7 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAACgAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA",
+                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAACgAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==",
                       std::move(findCommand));
 
     findCommand = std::make_unique<FindCommandRequest>(nss);
@@ -497,14 +522,14 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAwAAAAQYQABAAAAAAUAAAAA",
+                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAwAAAAQYQABAAAAAAUAAAAAXg==",
                       std::move(findCommand));
     findCommand = std::make_unique<FindCommandRequest>(nss);
     findCommand->setMax(mongo::fromjson("{ a : 1 }"));
     testComputeSBEKey("{a: 1}",
                       "{a: 1}",
                       "{}",
-                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAADAAAABBhAAEAAAAA",
+                      "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAADAAAABBhAAEAAAAAXg==",
                       std::move(findCommand));
     findCommand = std::make_unique<FindCommandRequest>(nss);
     findCommand->setRequestResumeToken(true);
@@ -515,9 +540,74 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
         "{a: 1}",
         "{}",
         "{}",
-        "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5udG4YAAAAEiRyZWNvcmRJZAABAAAAAAAAAAAFAAAAAAUAAAAA",
+        "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5udG4YAAAAEiRyZWNvcmRJZAABAAAAAAAAAAAFAAAAAAUAAAAAXg==",
         std::move(findCommand));
 }
 
+TEST(CanonicalQueryEncoderTest, ComputeKeySBEWithPipeline) {
+    // SBE must be enabled in order to generate SBE plan cache keys.
+    RAIIServerParameterControllerForTest controllerSBE("internalQueryForceClassicEngine", false);
+
+    RAIIServerParameterControllerForTest controllerSBEPlanCache("featureFlagSbePlanCache", true);
+
+    auto getLookupBson = [](StringData localField, StringData foreignField, StringData asField) {
+        return BSON("$lookup" << BSON("from" << foreignNss.coll() << "localField" << localField
+                                             << "foreignField" << foreignField << "as" << asField));
+    };
+
+    // No pipeline stage.
+    testComputeSBEKey("{a: 1}",
+                      "{}",
+                      "{}",
+                      "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe",
+                      nullptr,
+                      {});
+
+    // Different $lookup stage options.
+    testComputeSBEKey(
+        "{a: 1}",
+        "{}",
+        "{}",
+        "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWgAAAAMkbG9va3VwAEwAAAACZnJvbQAMAA"
+        "AAZm9yZWlnbmNvbGwAAmFzAAMAAABhcwACbG9jYWxGaWVsZAACAAAAYQACZm9yZWlnbkZpZWxkAAIAAABiAAAA",
+        nullptr,
+        {getLookupBson("a", "b", "as")});
+    testComputeSBEKey("{a: 1}",
+                      "{}",
+                      "{}",
+                      "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWwAAAAMkbG9va3VwAE0A"
+                      "AAACZnJvbQAMAAAAZm9yZWlnbmNvbGwAAmFzAAMAAABhcwACbG9jYWxGaWVsZAADAAAAYTEAAmZv"
+                      "cmVpZ25GaWVsZAACAAAAYgAAAA==",
+                      nullptr,
+                      {getLookupBson("a1", "b", "as")});
+    testComputeSBEKey("{a: 1}",
+                      "{}",
+                      "{}",
+                      "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWwAAAAMkbG9va3VwAE0A"
+                      "AAACZnJvbQAMAAAAZm9yZWlnbmNvbGwAAmFzAAMAAABhcwACbG9jYWxGaWVsZAACAAAAYQACZm9y"
+                      "ZWlnbkZpZWxkAAMAAABiMQAAAA==",
+                      nullptr,
+                      {getLookupBson("a", "b1", "as")});
+    testComputeSBEKey("{a: 1}",
+                      "{}",
+                      "{}",
+                      "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWwAAAAMkbG9va3VwAE0A"
+                      "AAACZnJvbQAMAAAAZm9yZWlnbmNvbGwAAmFzAAQAAABhczEAAmxvY2FsRmllbGQAAgAAAGEAAmZv"
+                      "cmVpZ25GaWVsZAACAAAAYgAAAA==",
+                      nullptr,
+                      {getLookupBson("a", "b", "as1")});
+
+    // Multiple $lookup stages.
+    testComputeSBEKey("{a: 1}",
+                      "{}",
+                      "{}",
+                      "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWgAAAAMkbG9va3VwAEwA"
+                      "AAACZnJvbQAMAAAAZm9yZWlnbmNvbGwAAmFzAAMAAABhcwACbG9jYWxGaWVsZAACAAAAYQACZm9y"
+                      "ZWlnbkZpZWxkAAIAAABiAAAAXQAAAAMkbG9va3VwAE8AAAACZnJvbQAMAAAAZm9yZWlnbmNvbGwA"
+                      "AmFzAAQAAABhczEAAmxvY2FsRmllbGQAAwAAAGExAAJmb3JlaWduRmllbGQAAwAAAGIxAAAA",
+                      nullptr,
+                      {getLookupBson("a", "b", "as"), getLookupBson("a1", "b1", "as1")});
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/query/classic_stage_builder.cpp b/src/mongo/db/query/classic_stage_builder.cpp
index cc1915510c1..4404e2ab6da 100644
--- a/src/mongo/db/query/classic_stage_builder.cpp
+++ b/src/mongo/db/query/classic_stage_builder.cpp
@@ -79,7 +79,7 @@ std::unique_ptr<PlanStage> ClassicStageBuilder::build(const QuerySolutionNode* r
             CollectionScanParams params;
             params.tailable = csn->tailable;
             params.shouldTrackLatestOplogTimestamp = csn->shouldTrackLatestOplogTimestamp;
-            params.assertTsHasNotFallenOffOplog = csn->assertTsHasNotFallenOffOplog;
+            params.assertTsHasNotFallenOff = csn->assertTsHasNotFallenOff;
             params.direction = (csn->direction == 1) ? CollectionScanParams::FORWARD
                                                      : CollectionScanParams::BACKWARD;
             params.shouldWaitForOplogVisibility = csn->shouldWaitForOplogVisibility;
diff --git a/src/mongo/db/query/datetime/date_time_support.cpp b/src/mongo/db/query/datetime/date_time_support.cpp
index 09badabd4a0..439c1f028d2 100644
--- a/src/mongo/db/query/datetime/date_time_support.cpp
+++ b/src/mongo/db/query/datetime/date_time_support.cpp
@@ -76,6 +76,8 @@ long long seconds(Date_t date) {
 // Format specifier map when parsing a date from a string with a required format.
 //
 const std::vector<timelib_format_specifier> kDateFromStringFormatMap = {
+    {'b', TIMELIB_FORMAT_TEXTUAL_MONTH_3_LETTER},
+    {'B', TIMELIB_FORMAT_TEXTUAL_MONTH_FULL},
     {'d', TIMELIB_FORMAT_DAY_TWO_DIGIT},
     {'G', TIMELIB_FORMAT_YEAR_ISO},
     {'H', TIMELIB_FORMAT_HOUR_TWO_DIGIT_24_MAX},
@@ -775,6 +777,7 @@ static const StringMap<DayOfWeek> dayOfWeekNameToDayOfWeekMap{
     {"sunday", DayOfWeek::sunday},
     {"sun", DayOfWeek::sunday},
 };
+
 }  // namespace
 
 long long dateDiff(Date_t startDate,
diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp
index 414badb8332..568c3da9fe0 100644
--- a/src/mongo/db/query/explain.cpp
+++ b/src/mongo/db/query/explain.cpp
@@ -48,6 +48,7 @@
 #include "mongo/db/query/collection_query_info.h"
 #include "mongo/db/query/explain_common.h"
 #include "mongo/db/query/get_executor.h"
+#include "mongo/db/query/multiple_collection_accessor.h"
 #include "mongo/db/query/plan_cache_key_factory.h"
 #include "mongo/db/query/plan_executor.h"
 #include "mongo/db/query/plan_executor_impl.h"
@@ -79,7 +80,7 @@ namespace {
  * - 'out' is a builder for the explain output.
  */
 void generatePlannerInfo(PlanExecutor* exec,
-                         const CollectionPtr& collection,
+                         const MultipleCollectionAccessor& collections,
                          BSONObj extraInfo,
                          BSONObjBuilder* out) {
     BSONObjBuilder plannerBob(out->subobjStart("queryPlanner"));
@@ -91,22 +92,23 @@ void generatePlannerInfo(PlanExecutor* exec,
     bool indexFilterSet = false;
     boost::optional<uint32_t> queryHash;
     boost::optional<uint32_t> planCacheKeyHash;
-    if (collection && exec->getCanonicalQuery()) {
+    const auto& mainCollection = collections.getMainCollection();
+    if (mainCollection && exec->getCanonicalQuery()) {
         const QuerySettings* querySettings =
-            QuerySettingsDecoration::get(collection->getSharedDecorations());
+            QuerySettingsDecoration::get(mainCollection->getSharedDecorations());
         if (exec->getCanonicalQuery()->isSbeCompatible() &&
             feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
             !exec->getCanonicalQuery()->getForceClassicEngine() &&
-            // TODO(SERVER-61507): Remove pipeline check once lowered pipelines are integrated with
-            // SBE plan cache.
-            exec->getCanonicalQuery()->pipeline().empty()) {
-            const auto planCacheKeyInfo = plan_cache_key_factory::make<sbe::PlanCacheKey>(
-                *exec->getCanonicalQuery(), collection);
+            // TODO SERVER-61507: remove canUseSbePlanCache check when $group pushdown is
+            // integrated with SBE plan cache.
+            canonical_query_encoder::canUseSbePlanCache(*exec->getCanonicalQuery())) {
+            const auto planCacheKeyInfo =
+                plan_cache_key_factory::make(*exec->getCanonicalQuery(), collections);
             planCacheKeyHash = planCacheKeyInfo.planCacheKeyHash();
             queryHash = planCacheKeyInfo.queryHash();
         } else {
-            const auto planCacheKeyInfo =
-                plan_cache_key_factory::make<PlanCacheKey>(*exec->getCanonicalQuery(), collection);
+            const auto planCacheKeyInfo = plan_cache_key_factory::make<PlanCacheKey>(
+                *exec->getCanonicalQuery(), mainCollection);
             planCacheKeyHash = planCacheKeyInfo.planCacheKeyHash();
             queryHash = planCacheKeyInfo.queryHash();
         }
@@ -310,7 +312,7 @@ void appendBasicPlanCacheEntryInfoToBSON(const EntryType& entry, BSONObjBuilder*
 }  // namespace
 
 void Explain::explainStages(PlanExecutor* exec,
-                            const CollectionPtr& collection,
+                            const MultipleCollectionAccessor& collections,
                             ExplainOptions::Verbosity verbosity,
                             Status executePlanStatus,
                             boost::optional<PlanExplainer::PlanStatsDetails> winningPlanTrialStats,
@@ -325,7 +327,7 @@ void Explain::explainStages(PlanExecutor* exec,
     out->appendElements(explainVersionToBson(explainer.getVersion()));
 
     if (verbosity >= ExplainOptions::Verbosity::kQueryPlanner) {
-        generatePlannerInfo(exec, collection, extraInfo, out);
+        generatePlannerInfo(exec, collections, extraInfo, out);
     }
 
     if (verbosity >= ExplainOptions::Verbosity::kExecStats) {
@@ -364,7 +366,7 @@ void Explain::explainPipeline(PlanExecutor* exec,
 }
 
 void Explain::explainStages(PlanExecutor* exec,
-                            const CollectionPtr& collection,
+                            const MultipleCollectionAccessor& collections,
                             ExplainOptions::Verbosity verbosity,
                             BSONObj extraInfo,
                             const BSONObj& command,
@@ -372,9 +374,10 @@ void Explain::explainStages(PlanExecutor* exec,
     auto&& explainer = exec->getPlanExplainer();
     auto winningPlanTrialStats = explainer.getWinningPlanTrialStats();
     Status executePlanStatus = Status::OK();
-    const CollectionPtr* collectionPtr = &collection;
+    const MultipleCollectionAccessor* collectionsPtr = &collections;
 
     // If we need execution stats, then run the plan in order to gather the stats.
+    const MultipleCollectionAccessor emptyCollections;
     if (verbosity >= ExplainOptions::Verbosity::kExecStats) {
         try {
             executePlan(exec);
@@ -386,12 +389,12 @@ void Explain::explainStages(PlanExecutor* exec,
         // then the collection may no longer be valid. We conservatively set our collection pointer
         // to null in case it is invalid.
         if (!executePlanStatus.isOK() && executePlanStatus != ErrorCodes::NoQueryExecutionPlans) {
-            collectionPtr = &CollectionPtr::null;
+            collectionsPtr = &emptyCollections;
         }
     }
 
     explainStages(exec,
-                  *collectionPtr,
+                  *collectionsPtr,
                   verbosity,
                   executePlanStatus,
                   winningPlanTrialStats,
@@ -403,6 +406,15 @@ void Explain::explainStages(PlanExecutor* exec,
     explain_common::generateServerParameters(out);
 }
 
+void Explain::explainStages(PlanExecutor* exec,
+                            const CollectionPtr& collection,
+                            ExplainOptions::Verbosity verbosity,
+                            BSONObj extraInfo,
+                            const BSONObj& command,
+                            BSONObjBuilder* out) {
+    explainStages(exec, MultipleCollectionAccessor(collection), verbosity, extraInfo, command, out);
+}
+
 void Explain::planCacheEntryToBSON(const PlanCacheEntry& entry, BSONObjBuilder* out) {
     out->append("version", "1");
 
diff --git a/src/mongo/db/query/explain.h b/src/mongo/db/query/explain.h
index d41dd3a1725..1dcabdeb7e3 100644
--- a/src/mongo/db/query/explain.h
+++ b/src/mongo/db/query/explain.h
@@ -39,6 +39,7 @@ namespace mongo {
 
 class Collection;
 class CollectionPtr;
+class MultipleCollectionAccessor;
 class OperationContext;
 class PlanExecutorPipeline;
 struct PlanSummaryStats;
@@ -77,15 +78,26 @@ public:
                               BSONObj extraInfo,
                               const BSONObj& command,
                               BSONObjBuilder* out);
+
+    /**
+     * Similar to the above function, but takes in multiple collections instead to support
+     * aggregation that involves multiple collections (e.g. $lookup).
+     */
+    static void explainStages(PlanExecutor* exec,
+                              const MultipleCollectionAccessor& collections,
+                              ExplainOptions::Verbosity verbosity,
+                              BSONObj extraInfo,
+                              const BSONObj& command,
+                              BSONObjBuilder* out);
+
     /**
      * Adds "queryPlanner" and "executionStats" (if requested in verbosity) fields to 'out'. Unlike
      * the other overload of explainStages() above, this one does not add the "serverInfo" section.
      *
      * - 'exec' is the stage tree for the operation being explained.
-     * - 'collection' is the relevant collection. During this call it may be required to execute the
-     * plan to collect statistics. If the PlanExecutor uses 'kLockExternally' lock policy, the
-     * caller should hold at least an IS lock on the collection the that the query runs on, even if
-     * 'collection' parameter is nullptr.
+     * - 'collections' are the relevant main and secondary collections (e.g. for $lookup). If the
+     * PlanExecutor uses 'kLockExternally' lock policy, the caller should hold the necessary db_raii
+     * object on the involved collections.
      * - 'verbosity' is the verbosity level of the explain.
      * - 'extraInfo' specifies additional information to include into the output.
      * - 'executePlanStatus' is the status returned after executing the query (Status::OK if the
@@ -97,7 +109,7 @@ public:
      */
     static void explainStages(
         PlanExecutor* exec,
-        const CollectionPtr& collection,
+        const MultipleCollectionAccessor& collections,
         ExplainOptions::Verbosity verbosity,
         Status executePlanStatus,
         boost::optional<PlanExplainer::PlanStatsDetails> winningPlanTrialStats,
diff --git a/src/mongo/db/query/fle/server_rewrite.cpp b/src/mongo/db/query/fle/server_rewrite.cpp
index f4f02bcb383..2aeb99a4061 100644
--- a/src/mongo/db/query/fle/server_rewrite.cpp
+++ b/src/mongo/db/query/fle/server_rewrite.cpp
@@ -32,6 +32,7 @@
 
 #include <memory>
 
+#include "mongo/bson/bsonmisc.h"
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/bson/bsontypes.h"
@@ -48,9 +49,14 @@
 #include "mongo/db/pipeline/expression.h"
 #include "mongo/db/query/collation/collator_factory_interface.h"
 #include "mongo/db/service_context.h"
+#include "mongo/logv2/log.h"
 #include "mongo/s/grid.h"
 #include "mongo/s/transaction_router_resource_yielder.h"
 #include "mongo/util/assert_util.h"
+#include "mongo/util/intrusive_counter.h"
+
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
 
 namespace mongo::fle {
 
@@ -68,6 +74,56 @@ std::unique_ptr<CollatorInterface> collatorFromBSON(OperationContext* opCtx,
 }
 namespace {
 
+template <typename PayloadT>
+boost::intrusive_ptr<ExpressionInternalFLEEqual> generateFleEqualMatch(StringData path,
+                                                                       const PayloadT& ffp,
+                                                                       ExpressionContext* expCtx) {
+    // Generate { $_internalFleEq: { field: "$field_name", server: f_3, counter: cm, edc: k_EDC]  }
+    auto tokens = ParsedFindPayload(ffp);
+
+    uassert(6672401,
+            "Missing required field server encryption token in find payload",
+            tokens.serverToken.has_value());
+
+    return make_intrusive<ExpressionInternalFLEEqual>(
+        expCtx,
+        ExpressionFieldPath::createPathFromString(
+            expCtx, path.toString(), expCtx->variablesParseState),
+        tokens.serverToken.get().data,
+        tokens.maxCounter.value_or(0LL),
+        tokens.edcToken.data);
+}
+
+
+template <typename PayloadT>
+std::unique_ptr<ExpressionInternalFLEEqual> generateFleEqualMatchUnique(StringData path,
+                                                                        const PayloadT& ffp,
+                                                                        ExpressionContext* expCtx) {
+    // Generate { $_internalFleEq: { field: "$field_name", server: f_3, counter: cm, edc: k_EDC]  }
+    auto tokens = ParsedFindPayload(ffp);
+
+    uassert(6672419,
+            "Missing required field server encryption token in find payload",
+            tokens.serverToken.has_value());
+
+    return std::make_unique<ExpressionInternalFLEEqual>(
+        expCtx,
+        ExpressionFieldPath::createPathFromString(
+            expCtx, path.toString(), expCtx->variablesParseState),
+        tokens.serverToken.get().data,
+        tokens.maxCounter.value_or(0LL),
+        tokens.edcToken.data);
+}
+
+std::unique_ptr<MatchExpression> generateFleEqualMatchAndExpr(StringData path,
+                                                              const BSONElement ffp,
+                                                              ExpressionContext* expCtx) {
+    auto fleEqualMatch = generateFleEqualMatch(path, ffp, expCtx);
+
+    return std::make_unique<ExprMatchExpression>(fleEqualMatch, expCtx);
+}
+
+
 /**
  * This section defines a mapping from DocumentSources to the dispatch function to appropriately
  * handle FLE rewriting for that stage. This should be kept in line with code on the client-side
@@ -128,7 +184,8 @@ public:
      * The final output will look like
      * {$or: [{$in: [tag0, "$__safeContent__"]}, {$in: [tag1, "$__safeContent__"]}, ...]}.
      */
-    std::unique_ptr<Expression> rewriteComparisonsToEncryptedField(
+    std::unique_ptr<Expression> rewriteInToEncryptedField(
+        const Expression* leftExpr,
         const std::vector<boost::intrusive_ptr<Expression>>& equalitiesList) {
         size_t numFFPs = 0;
         std::vector<boost::intrusive_ptr<Expression>> orListElems;
@@ -140,11 +197,122 @@ public:
                     continue;
                 }
 
-                // ... rewrite the payload to a list of tags...
                 numFFPs++;
+            }
+        }
+
+        // Finally, construct an $or of all of the $ins.
+        if (numFFPs == 0) {
+            return nullptr;
+        }
+
+        uassert(
+            6334102,
+            "If any elements in an comparison expression are encrypted, then all elements should "
+            "be encrypted.",
+            numFFPs == equalitiesList.size());
+
+        auto leftFieldPath = dynamic_cast<const ExpressionFieldPath*>(leftExpr);
+        uassert(6672417,
+                "$in is only supported with Queryable Encryption when the first argument is a "
+                "field path",
+                leftFieldPath != nullptr);
+
+        if (!queryRewriter->isForceHighCardinality()) {
+            try {
+                for (auto& equality : equalitiesList) {
+                    // For each expression representing a FleFindPayload...
+                    if (auto constChild = dynamic_cast<ExpressionConstant*>(equality.get())) {
+                        // ... rewrite the payload to a list of tags...
+                        auto tags = queryRewriter->rewritePayloadAsTags(constChild->getValue());
+                        for (auto&& tagElt : tags) {
+                            // ... and for each tag, construct expression {$in: [tag,
+                            // "$__safeContent__"]}.
+                            std::vector<boost::intrusive_ptr<Expression>> inVec{
+                                ExpressionConstant::create(queryRewriter->expCtx(), tagElt),
+                                ExpressionFieldPath::createPathFromString(
+                                    queryRewriter->expCtx(),
+                                    kSafeContent,
+                                    queryRewriter->expCtx()->variablesParseState)};
+                            orListElems.push_back(make_intrusive<ExpressionIn>(
+                                queryRewriter->expCtx(), std::move(inVec)));
+                        }
+                    }
+                }
+
+                didRewrite = true;
+
+                return std::make_unique<ExpressionOr>(queryRewriter->expCtx(),
+                                                      std::move(orListElems));
+            } catch (const ExceptionFor<ErrorCodes::FLEMaxTagLimitExceeded>& ex) {
+                LOGV2_DEBUG(6672403,
+                            2,
+                            "FLE Max tag limit hit during aggregation $in rewrite",
+                            "__error__"_attr = ex.what());
+
+                if (queryRewriter->getHighCardinalityMode() !=
+                    FLEQueryRewriter::HighCardinalityMode::kUseIfNeeded) {
+                    throw;
+                }
+
+                // fall through
+            }
+        }
+
+        for (auto& equality : equalitiesList) {
+            if (auto constChild = dynamic_cast<ExpressionConstant*>(equality.get())) {
+                auto fleEqExpr = generateFleEqualMatch(
+                    leftFieldPath->getFieldPathWithoutCurrentPrefix().fullPath(),
+                    constChild->getValue(),
+                    queryRewriter->expCtx());
+                orListElems.push_back(fleEqExpr);
+            }
+        }
+
+        didRewrite = true;
+        return std::make_unique<ExpressionOr>(queryRewriter->expCtx(), std::move(orListElems));
+    }
+
+    // Rewrite a [$eq : [$fieldpath, constant]] or [$eq: [constant, $fieldpath]]
+    // to _internalFleEq: {field: $fieldpath, edc: edcToken, counter: N, server: serverToken}
+    std::unique_ptr<Expression> rewriteComparisonsToEncryptedField(
+        const std::vector<boost::intrusive_ptr<Expression>>& equalitiesList) {
+
+        auto leftConstant = dynamic_cast<ExpressionConstant*>(equalitiesList[0].get());
+        auto rightConstant = dynamic_cast<ExpressionConstant*>(equalitiesList[1].get());
+
+        bool isLeftFFP = leftConstant && queryRewriter->isFleFindPayload(leftConstant->getValue());
+        bool isRightFFP =
+            rightConstant && queryRewriter->isFleFindPayload(rightConstant->getValue());
+
+        uassert(6334100,
+                "Cannot compare two encrypted constants to each other",
+                !(isLeftFFP && isRightFFP));
+
+        // No FLE Find Payload
+        if (!isLeftFFP && !isRightFFP) {
+            return nullptr;
+        }
+
+        auto leftFieldPath = dynamic_cast<ExpressionFieldPath*>(equalitiesList[0].get());
+        auto rightFieldPath = dynamic_cast<ExpressionFieldPath*>(equalitiesList[1].get());
+
+        uassert(
+            6672413,
+            "Queryable Encryption only supports comparisons between a field path and a constant",
+            leftFieldPath || rightFieldPath);
+
+        auto fieldPath = leftFieldPath ? leftFieldPath : rightFieldPath;
+        auto constChild = isLeftFFP ? leftConstant : rightConstant;
+
+        if (!queryRewriter->isForceHighCardinality()) {
+            try {
+                std::vector<boost::intrusive_ptr<Expression>> orListElems;
+
                 auto tags = queryRewriter->rewritePayloadAsTags(constChild->getValue());
                 for (auto&& tagElt : tags) {
-                    // ... and for each tag, construct expression {$in: [tag, "$__safeContent__"]}.
+                    // ... and for each tag, construct expression {$in: [tag,
+                    // "$__safeContent__"]}.
                     std::vector<boost::intrusive_ptr<Expression>> inVec{
                         ExpressionConstant::create(queryRewriter->expCtx(), tagElt),
                         ExpressionFieldPath::createPathFromString(
@@ -154,21 +322,33 @@ public:
                     orListElems.push_back(
                         make_intrusive<ExpressionIn>(queryRewriter->expCtx(), std::move(inVec)));
                 }
+
+                didRewrite = true;
+                return std::make_unique<ExpressionOr>(queryRewriter->expCtx(),
+                                                      std::move(orListElems));
+
+            } catch (const ExceptionFor<ErrorCodes::FLEMaxTagLimitExceeded>& ex) {
+                LOGV2_DEBUG(6672409,
+                            2,
+                            "FLE Max tag limit hit during query $in rewrite",
+                            "__error__"_attr = ex.what());
+
+                if (queryRewriter->getHighCardinalityMode() !=
+                    FLEQueryRewriter::HighCardinalityMode::kUseIfNeeded) {
+                    throw;
+                }
+
+                // fall through
             }
         }
 
-        // Finally, construct an $or of all of the $ins.
-        if (numFFPs == 0) {
-            return nullptr;
-        }
-        uassert(
-            6334102,
-            "If any elements in an comparison expression are encrypted, then all elements should "
-            "be encrypted.",
-            numFFPs == equalitiesList.size());
+        auto fleEqExpr =
+            generateFleEqualMatchUnique(fieldPath->getFieldPathWithoutCurrentPrefix().fullPath(),
+                                        constChild->getValue(),
+                                        queryRewriter->expCtx());
 
         didRewrite = true;
-        return std::make_unique<ExpressionOr>(queryRewriter->expCtx(), std::move(orListElems));
+        return fleEqExpr;
     }
 
     std::unique_ptr<Expression> postVisit(Expression* exp) {
@@ -177,30 +357,28 @@ public:
             // ignored when rewrites are done; there is no extra information in that child that
             // doesn't exist in the FFPs in the $in list.
             if (auto inList = dynamic_cast<ExpressionArray*>(inExpr->getOperandList()[1].get())) {
-                return rewriteComparisonsToEncryptedField(inList->getChildren());
+                return rewriteInToEncryptedField(inExpr->getOperandList()[0].get(),
+                                                 inList->getChildren());
             }
         } else if (auto eqExpr = dynamic_cast<ExpressionCompare*>(exp); eqExpr &&
                    (eqExpr->getOp() == ExpressionCompare::EQ ||
                     eqExpr->getOp() == ExpressionCompare::NE)) {
             // Rewrite an $eq comparing an encrypted field and an encrypted constant to an $or.
-            // Either child may be the constant, so try rewriting both.
-            auto or0 = rewriteComparisonsToEncryptedField({eqExpr->getChildren()[0]});
-            auto or1 = rewriteComparisonsToEncryptedField({eqExpr->getChildren()[1]});
-            uassert(6334100, "Cannot compare two encrypted constants to each other", !or0 || !or1);
+            auto newExpr = rewriteComparisonsToEncryptedField(eqExpr->getChildren());
 
             // Neither child is an encrypted constant, and no rewriting needs to be done.
-            if (!or0 && !or1) {
+            if (!newExpr) {
                 return nullptr;
             }
 
             // Exactly one child was an encrypted constant. The other child can be ignored; there is
             // no extra information in that child that doesn't exist in the FFP.
             if (eqExpr->getOp() == ExpressionCompare::NE) {
-                std::vector<boost::intrusive_ptr<Expression>> notChild{(or0 ? or0 : or1).release()};
+                std::vector<boost::intrusive_ptr<Expression>> notChild{newExpr.release()};
                 return std::make_unique<ExpressionNot>(queryRewriter->expCtx(),
                                                        std::move(notChild));
             }
-            return std::move(or0 ? or0 : or1);
+            return newExpr;
         }
 
         return nullptr;
@@ -213,11 +391,14 @@ public:
 BSONObj rewriteEncryptedFilter(const FLEStateCollectionReader& escReader,
                                const FLEStateCollectionReader& eccReader,
                                boost::intrusive_ptr<ExpressionContext> expCtx,
-                               BSONObj filter) {
+                               BSONObj filter,
+                               HighCardinalityModeAllowed mode) {
+
     if (auto rewritten =
-            FLEQueryRewriter(expCtx, escReader, eccReader).rewriteMatchExpression(filter)) {
+            FLEQueryRewriter(expCtx, escReader, eccReader, mode).rewriteMatchExpression(filter)) {
         return rewritten.get();
     }
+
     return filter;
 }
 
@@ -273,16 +454,18 @@ public:
     FilterRewrite(boost::intrusive_ptr<ExpressionContext> expCtx,
                   const NamespaceString& nss,
                   const EncryptionInformation& encryptInfo,
-                  const BSONObj toRewrite)
-        : RewriteBase(expCtx, nss, encryptInfo), userFilter(toRewrite) {}
+                  const BSONObj toRewrite,
+                  HighCardinalityModeAllowed mode)
+        : RewriteBase(expCtx, nss, encryptInfo), userFilter(toRewrite), _mode(mode) {}
 
     ~FilterRewrite(){};
     void doRewrite(FLEStateCollectionReader& escReader, FLEStateCollectionReader& eccReader) final {
-        rewrittenFilter = rewriteEncryptedFilter(escReader, eccReader, expCtx, userFilter);
+        rewrittenFilter = rewriteEncryptedFilter(escReader, eccReader, expCtx, userFilter, _mode);
     }
 
     const BSONObj userFilter;
     BSONObj rewrittenFilter;
+    HighCardinalityModeAllowed _mode;
 };
 
 // This helper executes the rewrite(s) inside a transaction. The transaction runs in a separate
@@ -324,7 +507,8 @@ BSONObj rewriteEncryptedFilterInsideTxn(FLEQueryInterface* queryImpl,
                                         StringData db,
                                         const EncryptedFieldConfig& efc,
                                         boost::intrusive_ptr<ExpressionContext> expCtx,
-                                        BSONObj filter) {
+                                        BSONObj filter,
+                                        HighCardinalityModeAllowed mode) {
     auto makeCollectionReader = [&](FLEQueryInterface* queryImpl, const StringData& coll) {
         NamespaceString nss(db, coll);
         auto docCount = queryImpl->countDocuments(nss);
@@ -332,7 +516,8 @@ BSONObj rewriteEncryptedFilterInsideTxn(FLEQueryInterface* queryImpl,
     };
     auto escReader = makeCollectionReader(queryImpl, efc.getEscCollection().get());
     auto eccReader = makeCollectionReader(queryImpl, efc.getEccCollection().get());
-    return rewriteEncryptedFilter(escReader, eccReader, expCtx, filter);
+
+    return rewriteEncryptedFilter(escReader, eccReader, expCtx, filter, mode);
 }
 
 BSONObj rewriteQuery(OperationContext* opCtx,
@@ -340,8 +525,9 @@ BSONObj rewriteQuery(OperationContext* opCtx,
                      const NamespaceString& nss,
                      const EncryptionInformation& info,
                      BSONObj filter,
-                     GetTxnCallback getTransaction) {
-    auto sharedBlock = std::make_shared<FilterRewrite>(expCtx, nss, info, filter);
+                     GetTxnCallback getTransaction,
+                     HighCardinalityModeAllowed mode) {
+    auto sharedBlock = std::make_shared<FilterRewrite>(expCtx, nss, info, filter, mode);
     doFLERewriteInTxn(opCtx, sharedBlock, getTransaction);
     return sharedBlock->rewrittenFilter.getOwned();
 }
@@ -365,7 +551,8 @@ void processFindCommand(OperationContext* opCtx,
                                         nss,
                                         findCommand->getEncryptionInformation().get(),
                                         findCommand->getFilter().getOwned(),
-                                        getTransaction));
+                                        getTransaction,
+                                        HighCardinalityModeAllowed::kAllow));
     // The presence of encryptionInformation is a signal that this is a FLE request that requires
     // special processing. Once we've rewritten the query, it's no longer a "special" FLE query, but
     // a normal query that can be executed by the query system like any other, so remove
@@ -389,7 +576,8 @@ void processCountCommand(OperationContext* opCtx,
                                         nss,
                                         countCommand->getEncryptionInformation().get(),
                                         countCommand->getQuery().getOwned(),
-                                        getTxn));
+                                        getTxn,
+                                        HighCardinalityModeAllowed::kAllow));
     // The presence of encryptionInformation is a signal that this is a FLE request that requires
     // special processing. Once we've rewritten the query, it's no longer a "special" FLE query, but
     // a normal query that can be executed by the query system like any other, so remove
@@ -504,59 +692,112 @@ std::vector<Value> FLEQueryRewriter::rewritePayloadAsTags(Value fleFindPayload)
     return tagVec;
 }
 
-std::unique_ptr<InMatchExpression> FLEQueryRewriter::rewriteEq(
-    const EqualityMatchExpression* expr) {
+
+std::unique_ptr<MatchExpression> FLEQueryRewriter::rewriteEq(const EqualityMatchExpression* expr) {
     auto ffp = expr->getData();
     if (!isFleFindPayload(ffp)) {
         return nullptr;
     }
 
-    auto obj = rewritePayloadAsTags(ffp);
-
-    auto tags = std::vector<BSONElement>();
-    obj.elems(tags);
+    if (_mode != HighCardinalityMode::kForceAlways) {
+        try {
+            auto obj = rewritePayloadAsTags(ffp);
+
+            auto tags = std::vector<BSONElement>();
+            obj.elems(tags);
+
+            auto inExpr = std::make_unique<InMatchExpression>(kSafeContent);
+            inExpr->setBackingBSON(std::move(obj));
+            auto status = inExpr->setEqualities(std::move(tags));
+            uassertStatusOK(status);
+            _rewroteLastExpression = true;
+            return inExpr;
+        } catch (const ExceptionFor<ErrorCodes::FLEMaxTagLimitExceeded>& ex) {
+            LOGV2_DEBUG(6672410,
+                        2,
+                        "FLE Max tag limit hit during query $eq rewrite",
+                        "__error__"_attr = ex.what());
+
+            if (_mode != HighCardinalityMode::kUseIfNeeded) {
+                throw;
+            }
 
-    auto inExpr = std::make_unique<InMatchExpression>(kSafeContent);
-    inExpr->setBackingBSON(std::move(obj));
-    auto status = inExpr->setEqualities(std::move(tags));
-    uassertStatusOK(status);
+            // fall through
+        }
+    }
 
+    auto exprMatch = generateFleEqualMatchAndExpr(expr->path(), ffp, _expCtx.get());
     _rewroteLastExpression = true;
-    return inExpr;
+    return exprMatch;
 }
 
-std::unique_ptr<InMatchExpression> FLEQueryRewriter::rewriteIn(const InMatchExpression* expr) {
-    auto backingBSONBuilder = BSONArrayBuilder();
+std::unique_ptr<MatchExpression> FLEQueryRewriter::rewriteIn(const InMatchExpression* expr) {
     size_t numFFPs = 0;
     for (auto& eq : expr->getEqualities()) {
         if (isFleFindPayload(eq)) {
-            auto obj = rewritePayloadAsTags(eq);
             ++numFFPs;
-            for (auto&& elt : obj) {
-                backingBSONBuilder.append(elt);
-            }
         }
     }
+
     if (numFFPs == 0) {
         return nullptr;
     }
+
     // All elements in an encrypted $in expression should be FFPs.
     uassert(
         6329400,
         "If any elements in a $in expression are encrypted, then all elements should be encrypted.",
         numFFPs == expr->getEqualities().size());
 
-    auto backingBSON = backingBSONBuilder.arr();
-    auto allTags = std::vector<BSONElement>();
-    backingBSON.elems(allTags);
+    if (_mode != HighCardinalityMode::kForceAlways) {
+
+        try {
+            auto backingBSONBuilder = BSONArrayBuilder();
+
+            for (auto& eq : expr->getEqualities()) {
+                auto obj = rewritePayloadAsTags(eq);
+                for (auto&& elt : obj) {
+                    backingBSONBuilder.append(elt);
+                }
+            }
 
-    auto inExpr = std::make_unique<InMatchExpression>(kSafeContent);
-    inExpr->setBackingBSON(std::move(backingBSON));
-    auto status = inExpr->setEqualities(std::move(allTags));
-    uassertStatusOK(status);
+            auto backingBSON = backingBSONBuilder.arr();
+            auto allTags = std::vector<BSONElement>();
+            backingBSON.elems(allTags);
+
+            auto inExpr = std::make_unique<InMatchExpression>(kSafeContent);
+            inExpr->setBackingBSON(std::move(backingBSON));
+            auto status = inExpr->setEqualities(std::move(allTags));
+            uassertStatusOK(status);
+
+            _rewroteLastExpression = true;
+            return inExpr;
+
+        } catch (const ExceptionFor<ErrorCodes::FLEMaxTagLimitExceeded>& ex) {
+            LOGV2_DEBUG(6672411,
+                        2,
+                        "FLE Max tag limit hit during query $in rewrite",
+                        "__error__"_attr = ex.what());
+
+            if (_mode != HighCardinalityMode::kUseIfNeeded) {
+                throw;
+            }
+
+            // fall through
+        }
+    }
+
+    std::vector<std::unique_ptr<MatchExpression>> matches;
+    matches.reserve(numFFPs);
+
+    for (auto& eq : expr->getEqualities()) {
+        auto exprMatch = generateFleEqualMatchAndExpr(expr->path(), eq, _expCtx.get());
+        matches.push_back(std::move(exprMatch));
+    }
 
+    auto orExpr = std::make_unique<OrMatchExpression>(std::move(matches));
     _rewroteLastExpression = true;
-    return inExpr;
+    return orExpr;
 }
 
 }  // namespace mongo::fle
diff --git a/src/mongo/db/query/fle/server_rewrite.h b/src/mongo/db/query/fle/server_rewrite.h
index ed84ea283c5..bf02eeebd4e 100644
--- a/src/mongo/db/query/fle/server_rewrite.h
+++ b/src/mongo/db/query/fle/server_rewrite.h
@@ -31,7 +31,7 @@
 
 #include <memory>
 
-#include "boost/smart_ptr/intrusive_ptr.hpp"
+#include <boost/smart_ptr/intrusive_ptr.hpp>
 
 #include "mongo/bson/bsonobj.h"
 #include "mongo/crypto/fle_crypto.h"
@@ -47,6 +47,14 @@ class FLEQueryInterface;
 namespace fle {
 
 /**
+ * Low Selectivity rewrites use $expr which is not supported in all commands such as upserts.
+ */
+enum class HighCardinalityModeAllowed {
+    kAllow,
+    kDisallow,
+};
+
+/**
  * Make a collator object from its BSON representation. Useful when creating ExpressionContext
  * objects for parsing MatchExpressions as part of the server-side rewrite.
  */
@@ -62,7 +70,8 @@ BSONObj rewriteQuery(OperationContext* opCtx,
                      const NamespaceString& nss,
                      const EncryptionInformation& info,
                      BSONObj filter,
-                     GetTxnCallback getTransaction);
+                     GetTxnCallback getTransaction,
+                     HighCardinalityModeAllowed mode);
 
 /**
  * Process a find command with encryptionInformation in-place, rewriting the filter condition so
@@ -100,11 +109,13 @@ std::unique_ptr<Pipeline, PipelineDeleter> processPipeline(
  * from inside an existing transaction using a FLEQueryInterface constructed from a
  * transaction client.
  */
-BSONObj rewriteEncryptedFilterInsideTxn(FLEQueryInterface* queryImpl,
-                                        StringData db,
-                                        const EncryptedFieldConfig& efc,
-                                        boost::intrusive_ptr<ExpressionContext> expCtx,
-                                        BSONObj filter);
+BSONObj rewriteEncryptedFilterInsideTxn(
+    FLEQueryInterface* queryImpl,
+    StringData db,
+    const EncryptedFieldConfig& efc,
+    boost::intrusive_ptr<ExpressionContext> expCtx,
+    BSONObj filter,
+    HighCardinalityModeAllowed mode = HighCardinalityModeAllowed::kDisallow);
 
 /**
  * Class which handles rewriting filter MatchExpressions for FLE2. The functionality is encapsulated
@@ -116,14 +127,37 @@ BSONObj rewriteEncryptedFilterInsideTxn(FLEQueryInterface* queryImpl,
  */
 class FLEQueryRewriter {
 public:
+    enum class HighCardinalityMode {
+        // Always use high cardinality filters, used by tests
+        kForceAlways,
+
+        // Use high cardinality mode if $in rewrites do not fit in the
+        // internalQueryFLERewriteMemoryLimit memory limit
+        kUseIfNeeded,
+
+        // Do not rewrite into high cardinality filter, throw exceptions instead
+        // Some contexts like upsert do not support $expr
+        kDisallow,
+    };
+
     /**
      * Takes in references to collection readers for the ESC and ECC that are used during tag
      * computation.
      */
     FLEQueryRewriter(boost::intrusive_ptr<ExpressionContext> expCtx,
                      const FLEStateCollectionReader& escReader,
-                     const FLEStateCollectionReader& eccReader)
+                     const FLEStateCollectionReader& eccReader,
+                     HighCardinalityModeAllowed mode = HighCardinalityModeAllowed::kAllow)
         : _expCtx(expCtx), _escReader(&escReader), _eccReader(&eccReader) {
+
+        if (internalQueryFLEAlwaysUseHighCardinalityMode.load()) {
+            _mode = HighCardinalityMode::kForceAlways;
+        }
+
+        if (mode == HighCardinalityModeAllowed::kDisallow) {
+            _mode = HighCardinalityMode::kDisallow;
+        }
+
         // This isn't the "real" query so we don't want to increment Expression
         // counters here.
         _expCtx->stopExpressionCounters();
@@ -184,6 +218,18 @@ public:
         return _expCtx.get();
     }
 
+    bool isForceHighCardinality() const {
+        return _mode == HighCardinalityMode::kForceAlways;
+    }
+
+    void setForceHighCardinalityForTest() {
+        _mode = HighCardinalityMode::kForceAlways;
+    }
+
+    HighCardinalityMode getHighCardinalityMode() const {
+        return _mode;
+    }
+
 protected:
     // This constructor should only be used for mocks in testing.
     FLEQueryRewriter(boost::intrusive_ptr<ExpressionContext> expCtx)
@@ -196,8 +242,8 @@ private:
     std::unique_ptr<MatchExpression> _rewrite(MatchExpression* me);
 
     virtual BSONObj rewritePayloadAsTags(BSONElement fleFindPayload) const;
-    std::unique_ptr<InMatchExpression> rewriteEq(const EqualityMatchExpression* expr);
-    std::unique_ptr<InMatchExpression> rewriteIn(const InMatchExpression* expr);
+    std::unique_ptr<MatchExpression> rewriteEq(const EqualityMatchExpression* expr);
+    std::unique_ptr<MatchExpression> rewriteIn(const InMatchExpression* expr);
 
     boost::intrusive_ptr<ExpressionContext> _expCtx;
 
@@ -208,6 +254,9 @@ private:
 
     // True if the last Expression or MatchExpression processed by this rewriter was rewritten.
     bool _rewroteLastExpression = false;
+
+    // Controls how query rewriter rewrites the query
+    HighCardinalityMode _mode{HighCardinalityMode::kUseIfNeeded};
 };
 
 
diff --git a/src/mongo/db/query/fle/server_rewrite_test.cpp b/src/mongo/db/query/fle/server_rewrite_test.cpp
index cb81656dcb6..034de8f0aa9 100644
--- a/src/mongo/db/query/fle/server_rewrite_test.cpp
+++ b/src/mongo/db/query/fle/server_rewrite_test.cpp
@@ -31,7 +31,9 @@
 #include <memory>
 
 #include "mongo/bson/bsonelement.h"
+#include "mongo/bson/bsonmisc.h"
 #include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/bson/bsontypes.h"
 #include "mongo/db/matcher/expression_leaf.h"
 #include "mongo/db/pipeline/expression_context_for_test.h"
 #include "mongo/db/query/fle/server_rewrite.h"
@@ -42,9 +44,19 @@
 namespace mongo {
 namespace {
 
-class MockFLEQueryRewriter : public fle::FLEQueryRewriter {
+class BasicMockFLEQueryRewriter : public fle::FLEQueryRewriter {
 public:
-    MockFLEQueryRewriter() : fle::FLEQueryRewriter(new ExpressionContextForTest()), _tags() {}
+    BasicMockFLEQueryRewriter() : fle::FLEQueryRewriter(new ExpressionContextForTest()) {}
+
+    BSONObj rewriteMatchExpressionForTest(const BSONObj& obj) {
+        auto res = rewriteMatchExpression(obj);
+        return res ? res.get() : obj;
+    }
+};
+
+class MockFLEQueryRewriter : public BasicMockFLEQueryRewriter {
+public:
+    MockFLEQueryRewriter() : _tags() {}
 
     bool isFleFindPayload(const BSONElement& fleFindPayload) const override {
         return _encryptedFields.find(fleFindPayload.fieldNameStringData()) !=
@@ -56,11 +68,6 @@ public:
         _tags[fieldvalue] = tags;
     }
 
-    BSONObj rewriteMatchExpressionForTest(const BSONObj& obj) {
-        auto res = rewriteMatchExpression(obj);
-        return res ? res.get() : obj;
-    }
-
 private:
     BSONObj rewritePayloadAsTags(BSONElement fleFindPayload) const override {
         ASSERT(fleFindPayload.isNumber());  // Only accept numbers as mock FFPs.
@@ -72,6 +79,7 @@ private:
     std::map<std::pair<StringData, int>, BSONObj> _tags;
     std::set<StringData> _encryptedFields;
 };
+
 class FLEServerRewriteTest : public unittest::Test {
 public:
     FLEServerRewriteTest() {}
@@ -361,5 +369,290 @@ TEST_F(FLEServerRewriteTest, ComparisonToObjectIgnored) {
     }
 }
 
+template <typename T>
+std::vector<uint8_t> toEncryptedVector(EncryptedBinDataType dt, T t) {
+    BSONObj obj = t.toBSON();
+
+    std::vector<uint8_t> buf(obj.objsize() + 1);
+    buf[0] = static_cast<uint8_t>(dt);
+
+    std::copy(obj.objdata(), obj.objdata() + obj.objsize(), buf.data() + 1);
+
+    return buf;
+}
+
+template <typename T>
+void toEncryptedBinData(StringData field, EncryptedBinDataType dt, T t, BSONObjBuilder* builder) {
+    auto buf = toEncryptedVector(dt, t);
+
+    builder->appendBinData(field, buf.size(), BinDataType::Encrypt, buf.data());
+}
+
+constexpr auto kIndexKeyId = "12345678-1234-9876-1234-123456789012"_sd;
+constexpr auto kUserKeyId = "ABCDEFAB-1234-9876-1234-123456789012"_sd;
+static UUID indexKeyId = uassertStatusOK(UUID::parse(kIndexKeyId.toString()));
+static UUID userKeyId = uassertStatusOK(UUID::parse(kUserKeyId.toString()));
+
+std::vector<char> testValue = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19};
+std::vector<char> testValue2 = {0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29};
+
+const FLEIndexKey& getIndexKey() {
+    static std::string indexVec = hexblob::decode(
+        "7dbfebc619aa68a659f64b8e23ccd21644ac326cb74a26840c3d2420176c40ae088294d00ad6cae9684237b21b754cf503f085c25cd320bf035c3417416e1e6fe3d9219f79586582112740b2add88e1030d91926ae8afc13ee575cfb8bb965b7"_sd);
+    static FLEIndexKey indexKey(KeyMaterial(indexVec.begin(), indexVec.end()));
+    return indexKey;
+}
+
+const FLEUserKey& getUserKey() {
+    static std::string userVec = hexblob::decode(
+        "a7ddbc4c8be00d51f68d9d8e485f351c8edc8d2206b24d8e0e1816d005fbe520e489125047d647b0d8684bfbdbf09c304085ed086aba6c2b2b1677ccc91ced8847a733bf5e5682c84b3ee7969e4a5fe0e0c21e5e3ee190595a55f83147d8de2a"_sd);
+    static FLEUserKey userKey(KeyMaterial(userVec.begin(), userVec.end()));
+    return userKey;
+}
+
+
+BSONObj generateFFP(StringData path, int value) {
+    auto indexKey = getIndexKey();
+    FLEIndexKeyAndId indexKeyAndId(indexKey.data, indexKeyId);
+    auto userKey = getUserKey();
+    FLEUserKeyAndId userKeyAndId(userKey.data, indexKeyId);
+
+    BSONObj doc = BSON("value" << value);
+    auto element = doc.firstElement();
+    auto fpp = FLEClientCrypto::serializeFindPayload(indexKeyAndId, userKeyAndId, element, 0);
+
+    BSONObjBuilder builder;
+    toEncryptedBinData(path, EncryptedBinDataType::kFLE2FindEqualityPayload, fpp, &builder);
+    return builder.obj();
+}
+
+class FLEServerHighCardRewriteTest : public unittest::Test {
+public:
+    FLEServerHighCardRewriteTest() {}
+
+    void setUp() override {}
+
+    void tearDown() override {}
+
+protected:
+    BasicMockFLEQueryRewriter _mock;
+};
+
+
+TEST_F(FLEServerHighCardRewriteTest, HighCard_TopLevel_Equality) {
+    _mock.setForceHighCardinalityForTest();
+
+    auto match = generateFFP("ssn", 1);
+    auto expected = fromjson(R"({
+    "$expr": {
+        "$_internalFleEq": {
+            "field": "$ssn",
+            "edc": {
+                "$binary": {
+                    "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                    "subType": "6"
+                }
+            },
+            "counter": {
+                "$numberLong": "0"
+            },
+            "server": {
+                "$binary": {
+                    "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                    "subType": "6"
+                }
+            }
+        }
+    }
+})");
+
+    auto actual = _mock.rewriteMatchExpressionForTest(match);
+    ASSERT_BSONOBJ_EQ(actual, expected);
+}
+
+
+TEST_F(FLEServerHighCardRewriteTest, HighCard_TopLevel_In) {
+    _mock.setForceHighCardinalityForTest();
+
+    auto ffp1 = generateFFP("ssn", 1);
+    auto ffp2 = generateFFP("ssn", 2);
+    auto ffp3 = generateFFP("ssn", 3);
+    auto expected = fromjson(R"({
+    "$or": [
+        {
+            "$expr": {
+                "$_internalFleEq": {
+                    "field": "$ssn",
+                    "edc": {
+                        "$binary": {
+                            "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                            "subType": "6"
+                        }
+                    },
+                    "counter": {
+                        "$numberLong": "0"
+                    },
+                    "server": {
+                        "$binary": {
+                            "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                            "subType": "6"
+                        }
+                    }
+                }
+            }
+        },
+        {
+            "$expr": {
+                "$_internalFleEq": {
+                    "field": "$ssn",
+                    "edc": {
+                        "$binary": {
+                            "base64": "CLpCo6rNuYMVT+6n1HCX15MNrVYDNqf6udO46ayo43Sw",
+                            "subType": "6"
+                        }
+                    },
+                    "counter": {
+                        "$numberLong": "0"
+                    },
+                    "server": {
+                        "$binary": {
+                            "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                            "subType": "6"
+                        }
+                    }
+                }
+            }
+        },
+        {
+            "$expr": {
+                "$_internalFleEq": {
+                    "field": "$ssn",
+                    "edc": {
+                        "$binary": {
+                            "base64": "CPi44oCQHnNDeRqHsNLzbdCeHt2DK/wCly0g2dxU5fqN",
+                            "subType": "6"
+                        }
+                    },
+                    "counter": {
+                        "$numberLong": "0"
+                    },
+                    "server": {
+                        "$binary": {
+                            "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                            "subType": "6"
+                        }
+                    }
+                }
+            }
+        }
+    ]
+})");
+
+    auto match =
+        BSON("ssn" << BSON("$in" << BSON_ARRAY(ffp1.firstElement()
+                                               << ffp2.firstElement() << ffp3.firstElement())));
+
+    auto actual = _mock.rewriteMatchExpressionForTest(match);
+    ASSERT_BSONOBJ_EQ(actual, expected);
+}
+
+
+TEST_F(FLEServerHighCardRewriteTest, HighCard_TopLevel_Expr) {
+
+    _mock.setForceHighCardinalityForTest();
+
+    auto ffp = generateFFP("$ssn", 1);
+    int len;
+    auto v = ffp.firstElement().binDataClean(len);
+    auto match = BSON("$expr" << BSON("$eq" << BSON_ARRAY(ffp.firstElement().fieldName()
+                                                          << BSONBinData(v, len, Encrypt))));
+
+    auto expected = fromjson(R"({ "$expr": {
+                "$_internalFleEq": {
+                    "field": "$ssn",
+                    "edc": {
+                        "$binary": {
+                            "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                            "subType": "6"
+                        }
+                    },
+                    "counter": {
+                        "$numberLong": "0"
+                    },
+                    "server": {
+                        "$binary": {
+                            "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                            "subType": "6"
+                        }
+                    }
+                }
+            }
+    })");
+
+    auto actual = _mock.rewriteMatchExpressionForTest(match);
+    ASSERT_BSONOBJ_EQ(actual, expected);
+}
+
+TEST_F(FLEServerHighCardRewriteTest, HighCard_TopLevel_Expr_In) {
+
+    _mock.setForceHighCardinalityForTest();
+
+    auto ffp = generateFFP("$ssn", 1);
+    int len;
+    auto v = ffp.firstElement().binDataClean(len);
+
+    auto ffp2 = generateFFP("$ssn", 1);
+    int len2;
+    auto v2 = ffp2.firstElement().binDataClean(len2);
+
+    auto match = BSON(
+        "$expr" << BSON("$in" << BSON_ARRAY(ffp.firstElement().fieldName()
+                                            << BSON_ARRAY(BSONBinData(v, len, Encrypt)
+                                                          << BSONBinData(v2, len2, Encrypt)))));
+
+    auto expected = fromjson(R"({ "$expr": { "$or" : [ {
+                "$_internalFleEq": {
+                    "field": "$ssn",
+                    "edc": {
+                        "$binary": {
+                            "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                            "subType": "6"
+                        }
+                    },
+                    "counter": {
+                        "$numberLong": "0"
+                    },
+                    "server": {
+                        "$binary": {
+                            "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                            "subType": "6"
+                        }
+                    }
+                }},
+                {
+                "$_internalFleEq": {
+                    "field": "$ssn",
+                    "edc": {
+                        "$binary": {
+                            "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+                            "subType": "6"
+                        }
+                    },
+                    "counter": {
+                        "$numberLong": "0"
+                    },
+                    "server": {
+                        "$binary": {
+                            "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+                            "subType": "6"
+                        }
+                    }
+                }}
+            ]}})");
+
+    auto actual = _mock.rewriteMatchExpressionForTest(match);
+    ASSERT_BSONOBJ_EQ(actual, expected);
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 5c22beab210..6c77f43ae1a 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -647,7 +647,7 @@ public:
             _cq->setCollator(mainColl->getDefaultCollator()->clone());
         }
 
-        auto planCacheKey = plan_cache_key_factory::make<KeyType>(*_cq, mainColl);
+        auto planCacheKey = buildPlanCacheKey();
         // Fill in some opDebug information, unless it has already been filled by an outer pipeline.
         OpDebug& opDebug = CurOp::get(_opCtx)->debug();
         if (!opDebug.queryHash) {
@@ -743,6 +743,11 @@ protected:
     virtual PlanStageType buildExecutableTree(const QuerySolution& solution) const = 0;
 
     /**
+     * Constructs the plan cache key.
+     */
+    virtual KeyType buildPlanCacheKey() const = 0;
+
+    /**
      * Either constructs a PlanStage tree from a cached plan (if exists in the plan cache), or
      * constructs a "id hack" PlanStage tree. Returns nullptr if no cached plan or id hack plan can
      * be constructed.
@@ -879,6 +884,10 @@ protected:
         return result;
     }
 
+    PlanCacheKey buildPlanCacheKey() const {
+        return plan_cache_key_factory::make<PlanCacheKey>(*_cq, _collection);
+    }
+
     std::unique_ptr<ClassicPrepareExecutionResult> buildCachedPlan(
         const PlanCacheKey& planCacheKey) final {
         initializePlannerParamsIfNeeded();
@@ -1083,13 +1092,17 @@ protected:
         return result;
     }
 
+    sbe::PlanCacheKey buildPlanCacheKey() const {
+        return plan_cache_key_factory::make(*_cq, _collections);
+    }
+
     std::unique_ptr<SlotBasedPrepareExecutionResult> buildCachedPlan(
         const sbe::PlanCacheKey& planCacheKey) final {
         if (shouldCacheQuery(*_cq)) {
-            // TODO SERVER-61507: remove _cq->pipeline().empty() check when $group pushdown is
+            // TODO SERVER-61507: remove canUseSbePlanCache check when $group pushdown is
             // integrated with SBE plan cache.
             if (!feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() ||
-                !_cq->pipeline().empty()) {
+                !canonical_query_encoder::canUseSbePlanCache(*_cq)) {
                 // If the feature flag is off, we first try to build an "id hack" plan because the
                 // id hack plans are not cached in the classic cache. We then fall back to use the
                 // classic plan cache.
@@ -1346,18 +1359,19 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getSlotBasedExe
     // No need for runtime planning, just use the constructed plan stage tree.
     invariant(solutions.size() == 1);
     invariant(roots.size() == 1);
-    if (!cq->pipeline().empty()) {
-        // Need to extend the solution with the agg pipeline and rebuild the execution tree.
-        solutions[0] = QueryPlanner::extendWithAggPipeline(
-            *cq,
-            std::move(solutions[0]),
-            fillOutSecondaryCollectionsInformation(opCtx, collections, cq.get()));
-        roots[0] = helper.buildExecutableTree(*(solutions[0]));
-    }
     auto&& [root, data] = roots[0];
+
     if (!planningResult->recoveredPinnedCacheEntry()) {
-        plan_cache_util::updatePlanCache(
-            opCtx, collections.getMainCollection(), *cq, *solutions[0], *root, data);
+        if (!cq->pipeline().empty()) {
+            // Need to extend the solution with the agg pipeline and rebuild the execution tree.
+            solutions[0] = QueryPlanner::extendWithAggPipeline(
+                *cq,
+                std::move(solutions[0]),
+                fillOutSecondaryCollectionsInformation(opCtx, collections, cq.get()));
+            roots[0] = helper.buildExecutableTree(*(solutions[0]));
+        }
+
+        plan_cache_util::updatePlanCache(opCtx, collections, *cq, *solutions[0], *root, data);
     }
 
     // Prepare the SBE tree for execution.
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
index 20ca265bcbb..e913108679c 100644
--- a/src/mongo/db/query/get_executor.h
+++ b/src/mongo/db/query/get_executor.h
@@ -42,6 +42,7 @@
 #include "mongo/db/query/multiple_collection_accessor.h"
 #include "mongo/db/query/parsed_distinct.h"
 #include "mongo/db/query/plan_executor.h"
+#include "mongo/db/query/query_planner.h"
 #include "mongo/db/query/query_planner_params.h"
 #include "mongo/db/query/query_settings.h"
 #include "mongo/db/query/query_solution.h"
@@ -157,7 +158,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutor(
     std::unique_ptr<CanonicalQuery> canonicalQuery,
     std::function<void(CanonicalQuery*)> extractAndAttachPipelineStages,
     PlanYieldPolicy::YieldPolicy yieldPolicy,
-    size_t plannerOptions = 0);
+    const QueryPlannerParams& plannerOptions);
 
 StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutor(
     OperationContext* opCtx,
@@ -192,7 +193,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorFind
     std::unique_ptr<CanonicalQuery> canonicalQuery,
     std::function<void(CanonicalQuery*)> extractAndAttachPipelineStages,
     bool permitYield = false,
-    size_t plannerOptions = QueryPlannerParams::DEFAULT);
+    QueryPlannerParams plannerOptions = QueryPlannerParams{});
 
 StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorFind(
     OperationContext* opCtx,
diff --git a/src/mongo/db/query/interval_evaluation_tree.h b/src/mongo/db/query/interval_evaluation_tree.h
index 5b48ee0ae11..cc432129357 100644
--- a/src/mongo/db/query/interval_evaluation_tree.h
+++ b/src/mongo/db/query/interval_evaluation_tree.h
@@ -55,7 +55,7 @@ using IET =
  *  ConstNode is a node that represents an interval with constant bounds, such as (MinKey,
  * MaxKey).
  */
-class ConstNode : public optimizer::algebra::OpSpecificArity<IET, ConstNode, 0> {
+class ConstNode : public optimizer::algebra::OpSpecificArity<IET, 0> {
 public:
     explicit ConstNode(const OrderedIntervalList& oil) : oil{oil} {}
 
@@ -66,7 +66,7 @@ public:
  * EvalNode is a node that evaluates an interval from a simple predicate such as {$gt: p1} where
  * p1 is a parameter value known at runtime.
  */
-class EvalNode : public optimizer::algebra::OpSpecificArity<IET, EvalNode, 0> {
+class EvalNode : public optimizer::algebra::OpSpecificArity<IET, 0> {
 public:
     using InputParamId = MatchExpression::InputParamId;
 
@@ -89,9 +89,9 @@ private:
 /**
  * IntersectNode is a node that represents an intersection of two intervals.
  */
-class IntersectNode : public optimizer::algebra::OpSpecificArity<IET, IntersectNode, 2> {
+class IntersectNode : public optimizer::algebra::OpSpecificArity<IET, 2> {
 public:
-    using Base = optimizer::algebra::OpSpecificArity<IET, IntersectNode, 2>;
+    using Base = optimizer::algebra::OpSpecificArity<IET, 2>;
 
     IntersectNode(IET lhs, IET rhs) : Base(std::move(lhs), std::move(rhs)) {}
 };
@@ -99,9 +99,9 @@ public:
 /**
  * UnionNode is a node that represents a union of two intervals.
  */
-class UnionNode : public optimizer::algebra::OpSpecificArity<IET, UnionNode, 2> {
+class UnionNode : public optimizer::algebra::OpSpecificArity<IET, 2> {
 public:
-    using Base = optimizer::algebra::OpSpecificArity<IET, UnionNode, 2>;
+    using Base = optimizer::algebra::OpSpecificArity<IET, 2>;
 
     UnionNode(IET lhs, IET rhs) : Base(std::move(lhs), std::move(rhs)) {}
 };
@@ -109,9 +109,9 @@ public:
 /**
  * ComplementNode is a node that complements its child.
  */
-class ComplementNode : public optimizer::algebra::OpSpecificArity<IET, ComplementNode, 1> {
+class ComplementNode : public optimizer::algebra::OpSpecificArity<IET, 1> {
 public:
-    using Base = optimizer::algebra::OpSpecificArity<IET, ComplementNode, 1>;
+    using Base = optimizer::algebra::OpSpecificArity<IET, 1>;
 
     ComplementNode(IET child) : Base(std::move(child)) {}
 };
diff --git a/src/mongo/db/query/optimizer/algebra/algebra_test.cpp b/src/mongo/db/query/optimizer/algebra/algebra_test.cpp
index 48e668a6e32..013c5a67568 100644
--- a/src/mongo/db/query/optimizer/algebra/algebra_test.cpp
+++ b/src/mongo/db/query/optimizer/algebra/algebra_test.cpp
@@ -44,41 +44,40 @@ using Tree = PolyValue<Leaf, BinaryNode, NaryNode, AtLeastBinaryNode>;
 /**
  * A leaf in the tree. Just contains data - in this case a double.
  */
-class Leaf : public OpSpecificArity<Tree, Leaf, 0> {
+class Leaf : public OpSpecificArity<Tree, 0> {
 public:
     Leaf(double x) : x(x) {}
+
     double x;
 };
 
 /**
  * An inner node in the tree with exactly two children.
  */
-class BinaryNode : public OpSpecificArity<Tree, BinaryNode, 2> {
+class BinaryNode : public OpSpecificArity<Tree, 2> {
 public:
     BinaryNode(Tree left, Tree right)
-        : OpSpecificArity<Tree, BinaryNode, 2>(std::move(left), std::move(right)) {}
+        : OpSpecificArity<Tree, 2>(std::move(left), std::move(right)) {}
 };
 
 /**
  * An inner node in the tree with any number of children, zero or greater.
  */
-class NaryNode : public OpSpecificDynamicArity<Tree, NaryNode, 0> {
+class NaryNode : public OpSpecificDynamicArity<Tree, 0> {
 public:
-    NaryNode(std::vector<Tree> children)
-        : OpSpecificDynamicArity<Tree, NaryNode, 0>(std::move(children)) {}
+    NaryNode(std::vector<Tree> children) : OpSpecificDynamicArity<Tree, 0>(std::move(children)) {}
 };
 
 /**
  * An inner node in the tree with 2 or more nodes.
  */
-class AtLeastBinaryNode : public OpSpecificDynamicArity<Tree, AtLeastBinaryNode, 2> {
+class AtLeastBinaryNode : public OpSpecificDynamicArity<Tree, 2> {
 public:
     /**
      * Notice the required number of nodes are given as separate arguments from the vector.
      */
     AtLeastBinaryNode(std::vector<Tree> children, Tree left, Tree right)
-        : OpSpecificDynamicArity<Tree, AtLeastBinaryNode, 2>(
-              std::move(children), std::move(left), std::move(right)) {}
+        : OpSpecificDynamicArity<Tree, 2>(std::move(children), std::move(left), std::move(right)) {}
 };
 
 /**
diff --git a/src/mongo/db/query/optimizer/algebra/operator.h b/src/mongo/db/query/optimizer/algebra/operator.h
index fb6dbc4d474..aa6220b53f1 100644
--- a/src/mongo/db/query/optimizer/algebra/operator.h
+++ b/src/mongo/db/query/optimizer/algebra/operator.h
@@ -29,67 +29,66 @@
 
 #pragma once
 
+#include <stddef.h>
+#include <utility>
 #include <vector>
 
-#include "mongo/db/query/optimizer/algebra/polyvalue.h"
+#include "mongo/util/concepts.h"
 
 namespace mongo::optimizer {
 namespace algebra {
 
+/**
+ * Concrete storage for 'S' items of type 'T'. This class is an alias for a static array, useful in
+ * a tree representation to store a node's children.
+ */
 template <typename T, int S>
 struct OpNodeStorage {
-    T _nodes[S];
-
     template <typename... Ts>
     OpNodeStorage(Ts&&... vals) : _nodes{std::forward<Ts>(vals)...} {}
+
+protected:
+    T _nodes[S];
 };
 
+/**
+ * Stub for nodes with no children.
+ */
 template <typename T>
 struct OpNodeStorage<T, 0> {};
 
-/*=====-----
- *
- * Arity of operator can be:
- * 1. statically known - A, A, A, ...
- * 2. dynamic prefix with optional statically know - vector<A>, A, A, A, ...
- *
- * Denotations map A to some B.
- * So static arity <A,A,A> is mapped to <B,B,B>.
- * Similarly, arity <vector<A>,A> is mapped to <vector<B>,B>
- *
- * There is a wrinkle when B is a reference (if allowed at all)
- * Arity <vector<A>, A, A> is mapped to <vector<B>&, B&, B&> - note that the reference is lifted
- * outside of the vector.
- *
+/**
+ * Nodes which have a specific arity (number of children) should derive from this class. The 'Slot'
+ * determines the generic type to hold for each child.
  */
-template <typename Slot, typename Derived, int Arity>
+template <typename Slot, int Arity>
 class OpSpecificArity : public OpNodeStorage<Slot, Arity> {
     using Base = OpNodeStorage<Slot, Arity>;
 
 public:
-    template <typename... Ts>
-    OpSpecificArity(Ts&&... vals) : Base({std::forward<Ts>(vals)...}) {
-        static_assert(sizeof...(Ts) == Arity, "constructor paramaters do not match");
-    }
+    TEMPLATE(typename... Ts)
+    REQUIRES(sizeof...(Ts) == Arity)
+    OpSpecificArity(Ts&&... vals) : Base({std::forward<Ts>(vals)...}) {}
 
-    template <int I, std::enable_if_t<(I >= 0 && I < Arity), int> = 0>
+    TEMPLATE(int I)
+    REQUIRES(I >= 0 && I < Arity)
     auto& get() noexcept {
         return this->_nodes[I];
     }
 
-    template <int I, std::enable_if_t<(I >= 0 && I < Arity), int> = 0>
+    TEMPLATE(int I)
+    REQUIRES(I >= 0 && I < Arity)
     const auto& get() const noexcept {
         return this->_nodes[I];
     }
 };
-/*=====-----
- *
- * Operator with dynamic arity
- *
+
+/**
+ * Nodes which have a known, minimum arity but may optionally contain more children.
  */
-template <typename Slot, typename Derived, int Arity>
-class OpSpecificDynamicArity : public OpSpecificArity<Slot, Derived, Arity> {
-    using Base = OpSpecificArity<Slot, Derived, Arity>;
+template <typename Slot, int Arity>
+class OpSpecificDynamicArity : public OpSpecificArity<Slot, Arity> {
+    using Base = OpSpecificArity<Slot, Arity>;
 
     std::vector<Slot> _dyNodes;
 
@@ -106,10 +105,8 @@ public:
     }
 };
 
-/*=====-----
- *
- * Semantic transport interface
- *
+/**
+ * Semantic transport interface.
  */
 namespace detail {
 template <typename D, typename T, typename... Args>
@@ -132,25 +129,35 @@ inline constexpr auto has_prepare_v =
                        has_prepare<void, call_prepare_slot_t, N, D, T, Args...>,
                        has_prepare<void, call_prepare_t, D, T, Args...>>::value;
 
-template <typename Slot, typename Derived, int Arity>
-inline constexpr int get_arity(const OpSpecificArity<Slot, Derived, Arity>*) {
+template <typename Slot, int Arity>
+inline constexpr int get_arity(const OpSpecificArity<Slot, Arity>*) {
     return Arity;
 }
 
-template <typename Slot, typename Derived, int Arity>
-inline constexpr bool is_dynamic(const OpSpecificArity<Slot, Derived, Arity>*) {
+template <typename Slot, int Arity>
+inline constexpr bool is_dynamic(const OpSpecificArity<Slot, Arity>*) {
     return false;
 }
 
-template <typename Slot, typename Derived, int Arity>
-inline constexpr bool is_dynamic(const OpSpecificDynamicArity<Slot, Derived, Arity>*) {
+template <typename Slot, int Arity>
+inline constexpr bool is_dynamic(const OpSpecificDynamicArity<Slot, Arity>*) {
     return true;
 }
 
 template <typename T>
 using OpConcreteType = typename std::remove_reference_t<T>::template get_t<0>;
+
 }  // namespace detail
 
+/**
+ * A transporter is similar to a tree walker that utilizes knowledge of the underlying Operator
+ * types to visit each node of an Operator tree in a bottom-up fashion. The Domain class
+ * 'D' is used as a callback mechanism by matching the relevant 'transport' overload with
+ * the particular node type and children results.
+ *
+ * The caller may optionally supply 'withSlot' to include a reference to the base PolyValue type as
+ * a first argument to the transport callbacks.
+ */
 template <typename D, bool withSlot>
 class OpTransporter {
     D& _domain;
@@ -271,6 +278,12 @@ public:
     }
 };
 
+/**
+ * Walker for the Operator* types. Accepts a domain 'D' of 'walk' callback overloads.
+ *
+ * The caller may optionally supply 'withSlot' to include a reference to base PolyValue as a first
+ * argument to the walk callbacks.
+ */
 template <typename D, bool withSlot>
 class OpWalker {
     D& _domain;
@@ -327,11 +340,31 @@ public:
     }
 };
 
+/**
+ * Post-order traversal over the tree given by 'node', with domain D of 'transport' callbacks for
+ * each node type. The domain may optionally contain 'prepare' method overloads to pre-visit a node
+ * before traversing its children.
+ *
+ * This method also allows propagating results from the traversal implicitly via the return type of
+ * the methods in D. For instance, to return an integer after traversal and a node which has two
+ * children, the signature would look something like this:
+ *
+ *      int transport(const NodeType&, int childResult0, int childResult1)
+ *
+ */
 template <bool withSlot = false, typename D, typename N, typename... Args>
 auto transport(N&& node, D& domain, Args&&... args) {
     return node.visit(OpTransporter<D, withSlot>{domain}, std::forward<Args>(args)...);
 }
 
+/**
+ * Visits 'node' by invoking the appropriate 'walk' overload in domain D. The 'walk' methods should
+ * accept the node as the first argument and its children as subsequent arguments with generic type
+ * N.
+ *
+ * Note that this method does not actually traverse the tree given in 'node'; the caller is
+ * responsible for manually walking.
+ */
 template <bool withSlot = false, typename D, typename N, typename... Args>
 auto walk(N&& node, D& domain, Args&&... args) {
     return node.visit(OpWalker<D, withSlot>{domain}, std::forward<Args>(args)...);
diff --git a/src/mongo/db/query/optimizer/algebra/polyvalue.h b/src/mongo/db/query/optimizer/algebra/polyvalue.h
index 63f5965c50c..185080d916c 100644
--- a/src/mongo/db/query/optimizer/algebra/polyvalue.h
+++ b/src/mongo/db/query/optimizer/algebra/polyvalue.h
@@ -33,32 +33,30 @@
 #include <stdexcept>
 #include <type_traits>
 
-namespace mongo::optimizer {
-namespace algebra {
+#include "mongo/util/assert_util.h"
+
+namespace mongo::optimizer::algebra {
 namespace detail {
 
 template <typename T, typename... Args>
 inline constexpr bool is_one_of_v = std::disjunction_v<std::is_same<T, Args>...>;
 
-template <typename T, typename... Args>
-inline constexpr bool is_one_of_f() {
-    return is_one_of_v<T, Args...>;
-}
-
 template <typename... Args>
 struct is_unique_t : std::true_type {};
 
 template <typename H, typename... T>
 struct is_unique_t<H, T...>
-    : std::bool_constant<!is_one_of_f<H, T...>() && is_unique_t<T...>::value> {};
+    : std::bool_constant<!is_one_of_v<H, T...> && is_unique_t<T...>::value> {};
 
 template <typename... Args>
 inline constexpr bool is_unique_v = is_unique_t<Args...>::value;
 
-// Given the type T find its index in Ts
+/**
+ * Given the type T find its index in Ts.
+ */
 template <typename T, typename... Ts>
 static inline constexpr int find_index() {
-    static_assert(detail::is_unique_v<Ts...>, "Types must be unique");
+    static_assert(is_unique_v<Ts...>, "Types must be unique");
     constexpr bool matchVector[] = {std::is_same<T, Ts>::value...};
 
     for (int index = 0; index < static_cast<int>(sizeof...(Ts)); ++index) {
@@ -85,35 +83,10 @@ using get_type_by_index = typename get_type_by_index_impl<I, Ts...>::type;
 
 }  // namespace detail
 
-/*=====-----
- *
- * The overload trick to construct visitors from lambdas.
- *
- */
-template <class... Ts>
-struct overload : Ts... {
-    using Ts::operator()...;
-};
-template <class... Ts>
-overload(Ts...)->overload<Ts...>;
-
-/*=====-----
- *
- * Forward declarations
- *
- */
-template <typename... Ts>
-class PolyValue;
-
-template <typename T, typename... Ts>
-class ControlBlockVTable;
-
-/*=====-----
- *
+/**
  * The base control block that PolyValue holds.
  *
- * It does not contain anything else by the runtime tag.
- *
+ * It does not contain anything else except for the runtime tag.
  */
 template <typename... Ts>
 class ControlBlock {
@@ -128,13 +101,10 @@ public:
     }
 };
 
-/*=====-----
- *
+/**
  * The concrete control block VTable generator.
  *
- * It must be empty ad PolyValue derives from the generators
- * and we want EBO to kick in.
- *
+ * It must be empty as PolyValue derives from the generators and we want EBO to kick in.
  */
 template <typename T, typename... Ts>
 class ControlBlockVTable {
@@ -144,13 +114,9 @@ protected:
 
     using AbstractType = ControlBlock<Ts...>;
 
-    /*=====-----
-     *
-     * The concrete control block for every type T of Ts.
-     *
-     * It derives from the ControlBlock. All methods are private and only
-     * the friend class ControlBlockVTable can call them.
-     *
+    /**
+     * The concrete control block for every type T of Ts. Derives from a ControlBlock which holds
+     * the runtime type tag for T.
      */
     class ConcreteType : public AbstractType {
         T _t;
@@ -222,18 +188,21 @@ public:
         }
     }
 
-    template <typename V, typename N, typename... Args>
-    static auto visit(V&& v, N& holder, AbstractType* block, Args&&... args) {
-        return v(holder, *cast<T>(block), std::forward<Args>(args)...);
+    template <typename Callback, typename N, typename... Args>
+    static auto visit(Callback&& cb, N& holder, AbstractType* block, Args&&... args) {
+        return cb(holder, *cast<T>(block), std::forward<Args>(args)...);
     }
 
-    template <typename V, typename N, typename... Args>
-    static auto visitConst(V&& v, const N& holder, const AbstractType* block, Args&&... args) {
-        return v(holder, *castConst<T>(block), std::forward<Args>(args)...);
+    template <typename Callback, typename N, typename... Args>
+    static auto visitConst(Callback&& cb,
+                           const N& holder,
+                           const AbstractType* block,
+                           Args&&... args) {
+        return cb(holder, *castConst<T>(block), std::forward<Args>(args)...);
     }
 };
 
-/*=====-----
+/**
  *
  * This is a variation on variant and polymorphic value theme.
  *
@@ -257,6 +226,9 @@ private:
     static_assert(std::conjunction_v<std::is_empty<ControlBlockVTable<Ts, Ts...>>...>,
                   "VTable base classes must be empty");
 
+    // Static array that allows lookup into methods on ControlBlockVTable using the PolyValue tag.
+    static constexpr std::array cloneTbl = {&ControlBlockVTable<Ts, Ts...>::clone...};
+
     ControlBlock<Ts...>* _object{nullptr};
 
     PolyValue(ControlBlock<Ts...>* object) noexcept : _object(object) {}
@@ -266,9 +238,7 @@ private:
     }
 
     static void check(const ControlBlock<Ts...>* object) {
-        if (!object) {
-            throw std::logic_error("PolyValue is empty");
-        }
+        tassert(6232700, "PolyValue is empty", object != nullptr);
     }
 
     static void destroy(ControlBlock<Ts...>* object) noexcept {
@@ -336,35 +306,38 @@ private:
             return tag();
         }
 
-
-        template <typename V, typename... Args>
-        auto visit(V&& v, Args&&... args) {
+        template <typename Callback, typename... Args>
+        auto visit(Callback&& cb, Args&&... args) {
             // unfortunately gcc rejects much nicer code, clang and msvc accept
             // static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
             // visit<V>... };
 
             using FunPtrType = decltype(
-                &ControlBlockVTable<get_t<0>, Ts...>::template visit<V, Reference, Args...>);
+                &ControlBlockVTable<get_t<0>, Ts...>::template visit<Callback, Reference, Args...>);
             static constexpr FunPtrType visitTbl[] = {
-                &ControlBlockVTable<Ts, Ts...>::template visit<V, Reference, Args...>...};
+                &ControlBlockVTable<Ts, Ts...>::template visit<Callback, Reference, Args...>...};
 
             check(_object);
-            return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+            return visitTbl[tag()](
+                std::forward<Callback>(cb), *this, _object, std::forward<Args>(args)...);
         }
 
-        template <typename V, typename... Args>
-        auto visit(V&& v, Args&&... args) const {
+        template <typename Callback, typename... Args>
+        auto visit(Callback&& cb, Args&&... args) const {
             // unfortunately gcc rejects much nicer code, clang and msvc accept
             // static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
             // visitConst<V>... };
 
             using FunPtrType = decltype(
-                &ControlBlockVTable<get_t<0>, Ts...>::template visitConst<V, Reference, Args...>);
+                &ControlBlockVTable<get_t<0>,
+                                    Ts...>::template visitConst<Callback, Reference, Args...>);
             static constexpr FunPtrType visitTbl[] = {
-                &ControlBlockVTable<Ts, Ts...>::template visitConst<V, Reference, Args...>...};
+                &ControlBlockVTable<Ts,
+                                    Ts...>::template visitConst<Callback, Reference, Args...>...};
 
             check(_object);
-            return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+            return visitTbl[tag()](
+                std::forward<Callback>(cb), *this, _object, std::forward<Args>(args)...);
         }
 
         template <typename T>
@@ -420,21 +393,18 @@ public:
 
     key_type tagOf() const {
         check(_object);
-
         return tag();
     }
 
     PolyValue() = delete;
 
     PolyValue(const PolyValue& other) {
-        static constexpr std::array cloneTbl = {&ControlBlockVTable<Ts, Ts...>::clone...};
         if (other._object) {
             _object = cloneTbl[other.tag()](other._object);
         }
     }
 
     PolyValue(const Reference& other) {
-        static constexpr std::array cloneTbl = {&ControlBlockVTable<Ts, Ts...>::clone...};
         if (other._object) {
             _object = cloneTbl[other.tag()](other._object);
         }
@@ -463,34 +433,37 @@ public:
     template <int I>
     using get_t = detail::get_type_by_index<I, Ts...>;
 
-    template <typename V, typename... Args>
-    auto visit(V&& v, Args&&... args) {
+    template <typename Callback, typename... Args>
+    auto visit(Callback&& cb, Args&&... args) {
         // unfortunately gcc rejects much nicer code, clang and msvc accept
         // static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
         // visit<V>... };
 
-        using FunPtrType =
-            decltype(&ControlBlockVTable<get_t<0>, Ts...>::template visit<V, PolyValue, Args...>);
+        using FunPtrType = decltype(
+            &ControlBlockVTable<get_t<0>, Ts...>::template visit<Callback, PolyValue, Args...>);
         static constexpr FunPtrType visitTbl[] = {
-            &ControlBlockVTable<Ts, Ts...>::template visit<V, PolyValue, Args...>...};
+            &ControlBlockVTable<Ts, Ts...>::template visit<Callback, PolyValue, Args...>...};
 
         check(_object);
-        return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+        return visitTbl[tag()](
+            std::forward<Callback>(cb), *this, _object, std::forward<Args>(args)...);
     }
 
-    template <typename V, typename... Args>
-    auto visit(V&& v, Args&&... args) const {
+    template <typename Callback, typename... Args>
+    auto visit(Callback&& cb, Args&&... args) const {
         // unfortunately gcc rejects much nicer code, clang and msvc accept
         // static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
         // visitConst<V>... };
 
-        using FunPtrType = decltype(
-            &ControlBlockVTable<get_t<0>, Ts...>::template visitConst<V, PolyValue, Args...>);
+        using FunPtrType =
+            decltype(&ControlBlockVTable<get_t<0>,
+                                         Ts...>::template visitConst<Callback, PolyValue, Args...>);
         static constexpr FunPtrType visitTbl[] = {
-            &ControlBlockVTable<Ts, Ts...>::template visitConst<V, PolyValue, Args...>...};
+            &ControlBlockVTable<Ts, Ts...>::template visitConst<Callback, PolyValue, Args...>...};
 
         check(_object);
-        return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+        return visitTbl[tag()](
+            std::forward<Callback>(cb), *this, _object, std::forward<Args>(args)...);
     }
 
     template <typename T>
@@ -517,13 +490,13 @@ public:
     }
 
     bool operator==(const PolyValue& rhs) const noexcept {
-        static constexpr std::array cmp = {ControlBlockVTable<Ts, Ts...>::compareEq...};
-        return cmp[tag()](_object, rhs._object);
+        static constexpr std::array cmpTbl = {ControlBlockVTable<Ts, Ts...>::compareEq...};
+        return cmpTbl[tag()](_object, rhs._object);
     }
 
     bool operator==(const Reference& rhs) const noexcept {
-        static constexpr std::array cmp = {ControlBlockVTable<Ts, Ts...>::compareEq...};
-        return cmp[tag()](_object, rhs._object);
+        static constexpr std::array cmpTbl = {ControlBlockVTable<Ts, Ts...>::compareEq...};
+        return cmpTbl[tag()](_object, rhs._object);
     }
 
     auto ref() {
@@ -537,5 +510,4 @@ public:
     }
 };
 
-}  // namespace algebra
-}  // namespace mongo::optimizer
+}  // namespace mongo::optimizer::algebra
diff --git a/src/mongo/db/query/optimizer/bool_expression.h b/src/mongo/db/query/optimizer/bool_expression.h
index bf00f907504..b4bdf0a6a11 100644
--- a/src/mongo/db/query/optimizer/bool_expression.h
+++ b/src/mongo/db/query/optimizer/bool_expression.h
@@ -53,8 +53,8 @@ struct BoolExpr {
     using NodeVector = std::vector<Node>;
 
 
-    class Atom final : public algebra::OpSpecificArity<Node, Atom, 0> {
-        using Base = algebra::OpSpecificArity<Node, Atom, 0>;
+    class Atom final : public algebra::OpSpecificArity<Node, 0> {
+        using Base = algebra::OpSpecificArity<Node, 0>;
 
     public:
         Atom(T expr) : Base(), _expr(std::move(expr)) {}
@@ -74,8 +74,8 @@ struct BoolExpr {
         T _expr;
     };
 
-    class Conjunction final : public algebra::OpSpecificDynamicArity<Node, Conjunction, 0> {
-        using Base = algebra::OpSpecificDynamicArity<Node, Conjunction, 0>;
+    class Conjunction final : public algebra::OpSpecificDynamicArity<Node, 0> {
+        using Base = algebra::OpSpecificDynamicArity<Node, 0>;
 
     public:
         Conjunction(NodeVector children) : Base(std::move(children)) {
@@ -87,8 +87,8 @@ struct BoolExpr {
         }
     };
 
-    class Disjunction final : public algebra::OpSpecificDynamicArity<Node, Disjunction, 0> {
-        using Base = algebra::OpSpecificDynamicArity<Node, Disjunction, 0>;
+    class Disjunction final : public algebra::OpSpecificDynamicArity<Node, 0> {
+        using Base = algebra::OpSpecificDynamicArity<Node, 0>;
 
     public:
         Disjunction(NodeVector children) : Base(std::move(children)) {
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index fd6bf9e1e40..4ecaf2c0795 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -624,16 +624,17 @@ static void convertFilterToSargableNode(ABT::reference_type node,
         return;
     }
 
-    PartialSchemaReqConversion conversion = convertExprToPartialSchemaReq(filterNode.getFilter());
-    if (!conversion._success) {
+    auto conversion =
+        convertExprToPartialSchemaReq(filterNode.getFilter(), true /*isFilterContext*/);
+    if (!conversion) {
         return;
     }
-    if (conversion._hasEmptyInterval) {
+    if (conversion->_hasEmptyInterval) {
         addEmptyValueScanNode(ctx);
         return;
     }
 
-    for (const auto& entry : conversion._reqMap) {
+    for (const auto& entry : conversion->_reqMap) {
         uassert(6624111,
                 "Filter partial schema requirement must contain a variable name.",
                 !entry.first._projectionName.empty());
@@ -648,29 +649,29 @@ static void convertFilterToSargableNode(ABT::reference_type node,
     // If in substitution mode, disallow retaining original predicate. If in exploration mode, only
     // allow retaining the original predicate and if we have at least one index available.
     if constexpr (isSubstitution) {
-        if (conversion._retainPredicate) {
+        if (conversion->_retainPredicate) {
             return;
         }
-    } else if (!conversion._retainPredicate || scanDef.getIndexDefs().empty()) {
+    } else if (!conversion->_retainPredicate || scanDef.getIndexDefs().empty()) {
         return;
     }
 
     bool hasEmptyInterval = false;
     auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
                                                       indexingAvailability.getScanProjection(),
-                                                      conversion._reqMap,
+                                                      conversion->_reqMap,
                                                       scanDef,
                                                       hasEmptyInterval);
 
     if (hasEmptyInterval) {
         addEmptyValueScanNode(ctx);
     } else {
-        ABT sargableNode = make<SargableNode>(std::move(conversion._reqMap),
+        ABT sargableNode = make<SargableNode>(std::move(conversion->_reqMap),
                                               std::move(candidateIndexMap),
                                               IndexReqTarget::Complete,
                                               filterNode.getChild());
 
-        if (conversion._retainPredicate) {
+        if (conversion->_retainPredicate) {
             const GroupIdType childGroupId =
                 filterNode.getChild().cast<MemoLogicalDelegatorNode>()->getGroupId();
             if (childGroupId == indexingAvailability.getScanGroupId()) {
@@ -813,22 +814,24 @@ struct SubstituteConvert<EvaluationNode> {
         }
 
         // We still want to extract sargable nodes from EvalNode to use for PhysicalScans.
-        PartialSchemaReqConversion conversion =
-            convertExprToPartialSchemaReq(evalNode.getProjection());
+        auto conversion =
+            convertExprToPartialSchemaReq(evalNode.getProjection(), false /*isFilterContext*/);
+        if (!conversion) {
+            return;
+        }
         uassert(6624165,
                 "Should not be getting retainPredicate set for EvalNodes",
-                !conversion._retainPredicate);
-
-        if (!conversion._success || conversion._reqMap.size() != 1) {
+                !conversion->_retainPredicate);
+        if (conversion->_reqMap.size() != 1) {
             // For evaluation nodes we expect to create a single entry.
             return;
         }
-        if (conversion._hasEmptyInterval) {
+        if (conversion->_hasEmptyInterval) {
             addEmptyValueScanNode(ctx);
             return;
         }
 
-        for (auto& entry : conversion._reqMap) {
+        for (auto& entry : conversion->_reqMap) {
             PartialSchemaRequirement& req = entry.second;
             req.setBoundProjectionName(evalNode.getProjectionName());
 
@@ -842,12 +845,12 @@ struct SubstituteConvert<EvaluationNode> {
 
         bool hasEmptyInterval = false;
         auto candidateIndexMap = computeCandidateIndexMap(
-            ctx.getPrefixId(), scanProjName, conversion._reqMap, scanDef, hasEmptyInterval);
+            ctx.getPrefixId(), scanProjName, conversion->_reqMap, scanDef, hasEmptyInterval);
 
         if (hasEmptyInterval) {
             addEmptyValueScanNode(ctx);
         } else {
-            ABT newNode = make<SargableNode>(std::move(conversion._reqMap),
+            ABT newNode = make<SargableNode>(std::move(conversion->_reqMap),
                                              std::move(candidateIndexMap),
                                              IndexReqTarget::Complete,
                                              evalNode.getChild());
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 6f6f6c743ed..58cbf9dee2b 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -4310,13 +4310,15 @@ TEST(PhysRewriter, PartialIndex1) {
     // TODO: Test cases where partial filter bound is a range which subsumes the query
     // requirement
     // TODO: (e.g. half open interval)
-    auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
-        make<PathGet>("b",
-                      make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(2)))),
-        make<Variable>("root")));
-    ASSERT_TRUE(conversionResult._success);
-    ASSERT_FALSE(conversionResult._hasEmptyInterval);
-    ASSERT_FALSE(conversionResult._retainPredicate);
+    auto conversionResult = convertExprToPartialSchemaReq(
+        make<EvalFilter>(
+            make<PathGet>(
+                "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(2)))),
+            make<Variable>("root")),
+        true /*isFilterContext*/);
+    ASSERT_TRUE(conversionResult.has_value());
+    ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+    ASSERT_FALSE(conversionResult->_retainPredicate);
 
     OptPhaseManager phaseManager(
         {OptPhaseManager::OptPhase::MemoSubstitutionPhase,
@@ -4329,7 +4331,7 @@ TEST(PhysRewriter, PartialIndex1) {
                             IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
                                             true /*isMultiKey*/,
                                             {DistributionType::Centralized},
-                                            std::move(conversionResult._reqMap)}}}}}}},
+                                            std::move(conversionResult->_reqMap)}}}}}}},
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = rootNode;
@@ -4387,13 +4389,15 @@ TEST(PhysRewriter, PartialIndex2) {
     ABT rootNode =
         make<RootNode>(ProjectionRequirement{ProjectionNameVector{"root"}}, std::move(filterANode));
 
-    auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
-        make<PathGet>("a",
-                      make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(3)))),
-        make<Variable>("root")));
-    ASSERT_TRUE(conversionResult._success);
-    ASSERT_FALSE(conversionResult._hasEmptyInterval);
-    ASSERT_FALSE(conversionResult._retainPredicate);
+    auto conversionResult = convertExprToPartialSchemaReq(
+        make<EvalFilter>(
+            make<PathGet>(
+                "a", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(3)))),
+            make<Variable>("root")),
+        true /*isFilterContext*/);
+    ASSERT_TRUE(conversionResult.has_value());
+    ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+    ASSERT_FALSE(conversionResult->_retainPredicate);
 
     OptPhaseManager phaseManager(
         {OptPhaseManager::OptPhase::MemoSubstitutionPhase,
@@ -4406,7 +4410,7 @@ TEST(PhysRewriter, PartialIndex2) {
                             IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
                                             true /*isMultiKey*/,
                                             {DistributionType::Centralized},
-                                            std::move(conversionResult._reqMap)}}}}}}},
+                                            std::move(conversionResult->_reqMap)}}}}}}},
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = rootNode;
@@ -4462,13 +4466,15 @@ TEST(PhysRewriter, PartialIndexReject) {
     ABT rootNode =
         make<RootNode>(ProjectionRequirement{ProjectionNameVector{"root"}}, std::move(filterBNode));
 
-    auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
-        make<PathGet>("b",
-                      make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(4)))),
-        make<Variable>("root")));
-    ASSERT_TRUE(conversionResult._success);
-    ASSERT_FALSE(conversionResult._hasEmptyInterval);
-    ASSERT_FALSE(conversionResult._retainPredicate);
+    auto conversionResult = convertExprToPartialSchemaReq(
+        make<EvalFilter>(
+            make<PathGet>(
+                "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(4)))),
+            make<Variable>("root")),
+        true /*isFilterContext*/);
+    ASSERT_TRUE(conversionResult.has_value());
+    ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+    ASSERT_FALSE(conversionResult->_retainPredicate);
 
     OptPhaseManager phaseManager(
         {OptPhaseManager::OptPhase::MemoSubstitutionPhase,
@@ -4481,7 +4487,7 @@ TEST(PhysRewriter, PartialIndexReject) {
                             IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
                                             true /*isMultiKey*/,
                                             {DistributionType::Centralized},
-                                            std::move(conversionResult._reqMap)}}}}}}},
+                                            std::move(conversionResult->_reqMap)}}}}}}},
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = rootNode;
diff --git a/src/mongo/db/query/optimizer/rewrites/const_eval.cpp b/src/mongo/db/query/optimizer/rewrites/const_eval.cpp
index 89bfe74551f..0278e20700e 100644
--- a/src/mongo/db/query/optimizer/rewrites/const_eval.cpp
+++ b/src/mongo/db/query/optimizer/rewrites/const_eval.cpp
@@ -86,7 +86,7 @@ void ConstEval::removeUnusedEvalNodes() {
             // TODO: consider caching.
             // TODO: consider deriving IndexingAvailability.
             if (!_disableSargableInlining ||
-                !convertExprToPartialSchemaReq(k->getProjection())._success) {
+                !convertExprToPartialSchemaReq(k->getProjection(), false /*isFilterContext*/)) {
                 // Schedule node inlining as there is exactly one reference.
                 _singleRef.emplace(v.front());
                 _changed = true;
diff --git a/src/mongo/db/query/optimizer/syntax/syntax.h b/src/mongo/db/query/optimizer/syntax/syntax.h
index 7c46df654d8..0abfb54a0f1 100644
--- a/src/mongo/db/query/optimizer/syntax/syntax.h
+++ b/src/mongo/db/query/optimizer/syntax/syntax.h
@@ -93,10 +93,10 @@ using ABT = algebra::PolyValue<Blackhole,
                                ExpressionBinder>;
 
 template <typename Derived, size_t Arity>
-using Operator = algebra::OpSpecificArity<ABT, Derived, Arity>;
+using Operator = algebra::OpSpecificArity<ABT, Arity>;
 
 template <typename Derived, size_t Arity>
-using OperatorDynamic = algebra::OpSpecificDynamicArity<ABT, Derived, Arity>;
+using OperatorDynamic = algebra::OpSpecificDynamicArity<ABT, Arity>;
 
 template <typename Derived>
 using OperatorDynamicHomogenous = OperatorDynamic<Derived, 0>;
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index 322ae174570..da4be863228 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -62,9 +62,12 @@ std::vector<ABT::reference_type> collectComposed(const ABT& n) {
     return {n.ref()};
 }
 
-FieldNameType getSimpleField(const ABT& node) {
-    const PathGet* pathGet = node.cast<PathGet>();
-    return pathGet != nullptr ? pathGet->name() : "";
+bool isSimplePath(const ABT& node) {
+    if (auto getPtr = node.cast<PathGet>();
+        getPtr != nullptr && getPtr->getPath().is<PathIdentity>()) {
+        return true;
+    }
+    return false;
 }
 
 std::string PrefixId::getNextId(const std::string& key) {
@@ -337,18 +340,8 @@ VariableNameSetType collectVariableReferences(const ABT& n) {
     return NodeVariableTracker::collect(n);
 }
 
-PartialSchemaReqConversion::PartialSchemaReqConversion()
-    : _success(false),
-      _bound(),
-      _reqMap(),
-      _hasIntersected(false),
-      _hasTraversed(false),
-      _hasEmptyInterval(false),
-      _retainPredicate(false) {}
-
 PartialSchemaReqConversion::PartialSchemaReqConversion(PartialSchemaRequirements reqMap)
-    : _success(true),
-      _bound(),
+    : _bound(),
       _reqMap(std::move(reqMap)),
       _hasIntersected(false),
       _hasTraversed(false),
@@ -356,8 +349,7 @@ PartialSchemaReqConversion::PartialSchemaReqConversion(PartialSchemaRequirements
       _retainPredicate(false) {}
 
 PartialSchemaReqConversion::PartialSchemaReqConversion(ABT bound)
-    : _success(true),
-      _bound(std::move(bound)),
+    : _bound(std::move(bound)),
       _reqMap(),
       _hasIntersected(false),
       _hasTraversed(false),
@@ -369,23 +361,24 @@ PartialSchemaReqConversion::PartialSchemaReqConversion(ABT bound)
  */
 class PartialSchemaReqConverter {
 public:
-    PartialSchemaReqConverter() = default;
+    using ResultType = boost::optional<PartialSchemaReqConversion>;
+
+    PartialSchemaReqConverter(const bool isFilterContext) : _isFilterContext(isFilterContext) {}
 
-    PartialSchemaReqConversion handleEvalPathAndEvalFilter(PartialSchemaReqConversion pathResult,
-                                                           PartialSchemaReqConversion inputResult) {
-        if (!pathResult._success || !inputResult._success) {
+    ResultType handleEvalPathAndEvalFilter(ResultType pathResult, ResultType inputResult) {
+        if (!pathResult || !inputResult) {
             return {};
         }
-        if (pathResult._bound.has_value() || !inputResult._bound.has_value() ||
-            !inputResult._reqMap.empty()) {
+        if (pathResult->_bound.has_value() || !inputResult->_bound.has_value() ||
+            !inputResult->_reqMap.empty()) {
             return {};
         }
 
-        if (auto boundPtr = inputResult._bound->cast<Variable>(); boundPtr != nullptr) {
+        if (auto boundPtr = inputResult->_bound->cast<Variable>(); boundPtr != nullptr) {
             const ProjectionName& boundVarName = boundPtr->name();
             PartialSchemaRequirements newMap;
 
-            for (auto& [key, req] : pathResult._reqMap) {
+            for (auto& [key, req] : pathResult->_reqMap) {
                 if (!key._projectionName.empty()) {
                     return {};
                 }
@@ -393,40 +386,40 @@ public:
             }
 
             PartialSchemaReqConversion result{std::move(newMap)};
-            result._hasEmptyInterval = pathResult._hasEmptyInterval;
-            result._retainPredicate = pathResult._retainPredicate;
+            result._hasEmptyInterval = pathResult->_hasEmptyInterval;
+            result._retainPredicate = pathResult->_retainPredicate;
             return result;
         }
 
         return {};
     }
 
-    PartialSchemaReqConversion transport(const ABT& n,
-                                         const EvalPath& evalPath,
-                                         PartialSchemaReqConversion pathResult,
-                                         PartialSchemaReqConversion inputResult) {
+    ResultType transport(const ABT& n,
+                         const EvalPath& evalPath,
+                         ResultType pathResult,
+                         ResultType inputResult) {
         return handleEvalPathAndEvalFilter(std::move(pathResult), std::move(inputResult));
     }
 
-    PartialSchemaReqConversion transport(const ABT& n,
-                                         const EvalFilter& evalFilter,
-                                         PartialSchemaReqConversion pathResult,
-                                         PartialSchemaReqConversion inputResult) {
+    ResultType transport(const ABT& n,
+                         const EvalFilter& evalFilter,
+                         ResultType pathResult,
+                         ResultType inputResult) {
         return handleEvalPathAndEvalFilter(std::move(pathResult), std::move(inputResult));
     }
 
-    static PartialSchemaReqConversion handleComposition(const bool isMultiplicative,
-                                                        PartialSchemaReqConversion leftResult,
-                                                        PartialSchemaReqConversion rightResult) {
-        if (!leftResult._success || !rightResult._success) {
+    static ResultType handleComposition(const bool isMultiplicative,
+                                        ResultType leftResult,
+                                        ResultType rightResult) {
+        if (!leftResult || !rightResult) {
             return {};
         }
-        if (leftResult._bound.has_value() || rightResult._bound.has_value()) {
+        if (leftResult->_bound.has_value() || rightResult->_bound.has_value()) {
             return {};
         }
 
-        auto& leftReqMap = leftResult._reqMap;
-        auto& rightReqMap = rightResult._reqMap;
+        auto& leftReqMap = leftResult->_reqMap;
+        auto& rightReqMap = rightResult->_reqMap;
         if (isMultiplicative) {
             {
                 ProjectionRenames projectionRenames;
@@ -438,7 +431,7 @@ public:
                 }
             }
 
-            if (!leftResult._hasTraversed && !rightResult._hasTraversed) {
+            if (!leftResult->_hasTraversed && !rightResult->_hasTraversed) {
                 // Intersect intervals only if we have not traversed. E.g. (-inf, 90) ^ (70, +inf)
                 // becomes (70, 90).
                 for (auto& [key, req] : leftReqMap) {
@@ -446,7 +439,7 @@ public:
                     if (newIntervals) {
                         req.getIntervals() = std::move(newIntervals.get());
                     } else {
-                        leftResult._hasEmptyInterval = true;
+                        leftResult->_hasEmptyInterval = true;
                         break;
                     }
                 }
@@ -455,7 +448,7 @@ public:
                 return {};
             }
 
-            leftResult._hasIntersected = true;
+            leftResult->_hasIntersected = true;
             return leftResult;
         }
 
@@ -534,32 +527,40 @@ public:
             rightPath.is<PathIdentity>()) {
             // leftPath = Id, rightPath = Traverse Id.
             combineIntervalsDNF(false /*intersect*/, leftIntervals, newInterval);
-            leftResult._retainPredicate = true;
+            leftResult->_retainPredicate = true;
             return leftResult;
         } else if (const auto rightTraversePtr = rightPath.cast<PathTraverse>();
                    rightTraversePtr != nullptr && rightTraversePtr->getPath().is<PathIdentity>() &&
                    leftPath.is<PathIdentity>()) {
             // leftPath = Traverse Id, rightPath = Id.
             combineIntervalsDNF(false /*intersect*/, rightIntervals, newInterval);
-            rightResult._retainPredicate = true;
+            rightResult->_retainPredicate = true;
             return rightResult;
         }
 
         return {};
     }
 
-    PartialSchemaReqConversion transport(const ABT& n,
-                                         const PathComposeM& pathComposeM,
-                                         PartialSchemaReqConversion leftResult,
-                                         PartialSchemaReqConversion rightResult) {
+    ResultType transport(const ABT& n,
+                         const PathComposeM& pathComposeM,
+                         ResultType leftResult,
+                         ResultType rightResult) {
+        if (!_isFilterContext) {
+            return {};
+        }
+
         return handleComposition(
             true /*isMultiplicative*/, std::move(leftResult), std::move(rightResult));
     }
 
-    PartialSchemaReqConversion transport(const ABT& n,
-                                         const PathComposeA& pathComposeA,
-                                         PartialSchemaReqConversion leftResult,
-                                         PartialSchemaReqConversion rightResult) {
+    ResultType transport(const ABT& n,
+                         const PathComposeA& pathComposeA,
+                         ResultType leftResult,
+                         ResultType rightResult) {
+        if (!_isFilterContext) {
+            return {};
+        }
+
         const auto& path1 = pathComposeA.getPath1();
         const auto& path2 = pathComposeA.getPath2();
         const auto& eqNull = make<PathCompare>(Operations::Eq, Constant::null());
@@ -571,9 +572,9 @@ public:
 
             auto intervalExpr = IntervalReqExpr::makeSingularDNF(IntervalRequirement{
                 {true /*inclusive*/, Constant::null()}, {true /*inclusive*/, Constant::null()}});
-            return {PartialSchemaRequirements{
+            return {{PartialSchemaRequirements{
                 {PartialSchemaKey{},
-                 PartialSchemaRequirement{"" /*boundProjectionName*/, std::move(intervalExpr)}}}};
+                 PartialSchemaRequirement{"" /*boundProjectionName*/, std::move(intervalExpr)}}}}};
         }
 
         return handleComposition(
@@ -581,19 +582,18 @@ public:
     }
 
     template <class T>
-    static PartialSchemaReqConversion handleGetAndTraverse(const ABT& n,
-                                                           PartialSchemaReqConversion inputResult) {
-        if (!inputResult._success) {
+    static ResultType handleGetAndTraverse(const ABT& n, ResultType inputResult) {
+        if (!inputResult) {
             return {};
         }
-        if (inputResult._bound.has_value()) {
+        if (inputResult->_bound.has_value()) {
             return {};
         }
 
         // New map has keys with appended paths.
         PartialSchemaRequirements newMap;
 
-        for (auto& entry : inputResult._reqMap) {
+        for (auto& entry : inputResult->_reqMap) {
             if (!entry.first._projectionName.empty()) {
                 return {};
             }
@@ -608,41 +608,39 @@ public:
             newMap.emplace(PartialSchemaKey{"", std::move(path)}, std::move(entry.second));
         }
 
-        inputResult._reqMap = std::move(newMap);
+        inputResult->_reqMap = std::move(newMap);
         return inputResult;
     }
 
-    PartialSchemaReqConversion transport(const ABT& n,
-                                         const PathGet& pathGet,
-                                         PartialSchemaReqConversion inputResult) {
+    ResultType transport(const ABT& n, const PathGet& pathGet, ResultType inputResult) {
         return handleGetAndTraverse<PathGet>(n, std::move(inputResult));
     }
 
-    PartialSchemaReqConversion transport(const ABT& n,
-                                         const PathTraverse& pathTraverse,
-                                         PartialSchemaReqConversion inputResult) {
-        if (inputResult._reqMap.size() > 1) {
+    ResultType transport(const ABT& n, const PathTraverse& pathTraverse, ResultType inputResult) {
+        if (!inputResult) {
+            return {};
+        }
+        if (inputResult->_reqMap.size() > 1) {
             // Cannot append traverse if we have more than one requirement.
             return {};
         }
 
-        PartialSchemaReqConversion result =
-            handleGetAndTraverse<PathTraverse>(n, std::move(inputResult));
-        result._hasTraversed = true;
+        auto result = handleGetAndTraverse<PathTraverse>(n, std::move(inputResult));
+        if (result) {
+            result->_hasTraversed = true;
+        }
         return result;
     }
 
-    PartialSchemaReqConversion transport(const ABT& n,
-                                         const PathCompare& pathCompare,
-                                         PartialSchemaReqConversion inputResult) {
-        if (!inputResult._success) {
+    ResultType transport(const ABT& n, const PathCompare& pathCompare, ResultType inputResult) {
+        if (!inputResult) {
             return {};
         }
-        if (!inputResult._bound.has_value() || !inputResult._reqMap.empty()) {
+        if (!inputResult->_bound.has_value() || !inputResult->_reqMap.empty()) {
             return {};
         }
 
-        const auto& bound = inputResult._bound;
+        const auto& bound = inputResult->_bound;
         bool lowBoundInclusive = false;
         boost::optional<ABT> lowBound;
         bool highBoundInclusive = false;
@@ -678,51 +676,53 @@ public:
 
         auto intervalExpr = IntervalReqExpr::makeSingularDNF(IntervalRequirement{
             {lowBoundInclusive, std::move(lowBound)}, {highBoundInclusive, std::move(highBound)}});
-        return {PartialSchemaRequirements{
+        return {{PartialSchemaRequirements{
             {PartialSchemaKey{},
-             PartialSchemaRequirement{"" /*boundProjectionName*/, std::move(intervalExpr)}}}};
+             PartialSchemaRequirement{"" /*boundProjectionName*/, std::move(intervalExpr)}}}}};
     }
 
-    PartialSchemaReqConversion transport(const ABT& n, const PathIdentity& pathIdentity) {
-        return {PartialSchemaRequirements{{{}, {}}}};
+    ResultType transport(const ABT& n, const PathIdentity& pathIdentity) {
+        return {{PartialSchemaRequirements{{{}, {}}}}};
     }
 
-    PartialSchemaReqConversion transport(const ABT& n, const Constant& c) {
+    ResultType transport(const ABT& n, const Constant& c) {
         if (c.isNull()) {
             // Cannot create bounds with just NULL.
             return {};
         }
-        return {n};
+        return {{n}};
     }
 
     template <typename T, typename... Ts>
-    PartialSchemaReqConversion transport(const ABT& n, const T& node, Ts&&...) {
+    ResultType transport(const ABT& n, const T& node, Ts&&...) {
         if constexpr (std::is_base_of_v<ExpressionSyntaxSort, T>) {
             // We allow expressions to participate in bounds.
-            return {n};
+            return {{n}};
         }
         // General case. Reject conversion.
         return {};
     }
 
-    PartialSchemaReqConversion convert(const ABT& input) {
+    ResultType convert(const ABT& input) {
         return algebra::transport<true>(input, *this);
     }
+
+private:
+    const bool _isFilterContext;
 };
 
-PartialSchemaReqConversion convertExprToPartialSchemaReq(const ABT& expr) {
-    PartialSchemaReqConverter converter;
-    PartialSchemaReqConversion result = converter.convert(expr);
-    if (result._reqMap.empty()) {
-        result._success = false;
-        return result;
+boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
+    const ABT& expr, const bool isFilterContext) {
+    PartialSchemaReqConverter converter(isFilterContext);
+    auto result = converter.convert(expr);
+    if (!result || result->_reqMap.empty()) {
+        return {};
     }
 
-    for (const auto& entry : result._reqMap) {
+    for (const auto& entry : result->_reqMap) {
         if (entry.first.emptyPath() && isIntervalReqFullyOpenDNF(entry.second.getIntervals())) {
             // We need to determine either path or interval (or both).
-            result._success = false;
-            return result;
+            return {};
         }
     }
     return result;
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index 42845f0ce95..d3164d10db6 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -65,10 +65,9 @@ size_t roundUpToNextPow2(size_t v, size_t maxPower);
 std::vector<ABT::reference_type> collectComposed(const ABT& n);
 
 /**
- * Returns the path represented by 'node' as a simple dotted string. Returns an empty string if
- * 'node' is not a path.
+ * Returns true if the path represented by 'node' is of the form PathGet "field" PathId
  */
-FieldNameType getSimpleField(const ABT& node);
+bool isSimplePath(const ABT& node);
 
 template <class Element = PathComposeM>
 inline void maybeComposePath(ABT& composition, ABT child) {
@@ -155,13 +154,9 @@ private:
 };
 
 struct PartialSchemaReqConversion {
-    PartialSchemaReqConversion();
     PartialSchemaReqConversion(PartialSchemaRequirements reqMap);
     PartialSchemaReqConversion(ABT bound);
 
-    // Is our current bottom-up conversion successful. If not shortcut to top.
-    bool _success;
-
     // If set, contains a Constant or Variable bound of an (yet unknown) interval.
     boost::optional<ABT> _bound;
 
@@ -186,9 +181,11 @@ struct PartialSchemaReqConversion {
 /**
  * Takes an expression that comes from an Filter or Evaluation node, and attempt to convert
  * to a PartialSchemaReqConversion. This is done independent of the availability of indexes.
- * Essentially this means to extract intervals over paths whenever possible.
+ * Essentially this means to extract intervals over paths whenever possible. If the conversion is
+ * not possible, return empty result.
  */
-PartialSchemaReqConversion convertExprToPartialSchemaReq(const ABT& expr);
+boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(const ABT& expr,
+                                                                          bool isFilterContext);
 
 bool intersectPartialSchemaReq(PartialSchemaRequirements& target,
                                const PartialSchemaRequirements& source,
diff --git a/src/mongo/db/query/plan_cache_key_factory.cpp b/src/mongo/db/query/plan_cache_key_factory.cpp
index 6b154b29105..b330fa5ccd6 100644
--- a/src/mongo/db/query/plan_cache_key_factory.cpp
+++ b/src/mongo/db/query/plan_cache_key_factory.cpp
@@ -89,12 +89,6 @@ PlanCacheKeyInfo makePlanCacheKeyInfo(const CanonicalQuery& query,
     return PlanCacheKeyInfo(shapeString, indexabilityKeyBuilder.str());
 }
 
-PlanCacheKey make(const CanonicalQuery& query,
-                  const CollectionPtr& collection,
-                  PlanCacheKeyTag<PlanCacheKey>) {
-    return {makePlanCacheKeyInfo(query, collection)};
-}
-
 namespace {
 /**
  * Returns the highest index commit timestamp associated with an index on 'collection' that is
@@ -129,24 +123,62 @@ boost::optional<Timestamp> computeNewestVisibleIndexTimestamp(OperationContext*
 
     return currentNewestVisible.isNull() ? boost::optional<Timestamp>{} : currentNewestVisible;
 }
+
+sbe::PlanCacheKeyCollectionState computeCollectionState(OperationContext* opCtx,
+                                                        const CollectionPtr& collection,
+                                                        bool isSecondaryColl) {
+    boost::optional<sbe::PlanCacheKeyShardingEpoch> keyShardingEpoch;
+    // We don't version secondary collections in the current shard versioning protocol. Also, since
+    // currently we only push down $lookup to SBE when secondary collections (and main collection)
+    // are unsharded, it's OK to not encode the sharding information here.
+    if (!isSecondaryColl) {
+        const auto shardVersion{
+            OperationShardingState::get(opCtx).getShardVersion(collection->ns())};
+        if (shardVersion) {
+            keyShardingEpoch =
+                sbe::PlanCacheKeyShardingEpoch{shardVersion->epoch(), shardVersion->getTimestamp()};
+        }
+    }
+    return {collection->uuid(),
+            CollectionQueryInfo::get(collection).getPlanCacheInvalidatorVersion(),
+            plan_cache_detail::computeNewestVisibleIndexTimestamp(opCtx, collection),
+            keyShardingEpoch};
+}
 }  // namespace
 
+PlanCacheKey make(const CanonicalQuery& query,
+                  const CollectionPtr& collection,
+                  PlanCacheKeyTag<PlanCacheKey> tag) {
+    return {plan_cache_detail::makePlanCacheKeyInfo(query, collection)};
+}
+
 sbe::PlanCacheKey make(const CanonicalQuery& query,
                        const CollectionPtr& collection,
-                       PlanCacheKeyTag<sbe::PlanCacheKey>) {
-    OperationContext* opCtx = query.getOpCtx();
-    auto collectionVersion = CollectionQueryInfo::get(collection).getPlanCacheInvalidatorVersion();
-    const auto shardVersion{OperationShardingState::get(opCtx).getShardVersion(collection->ns())};
-    const auto keyShardingEpoch = shardVersion
-        ? boost::make_optional(
-              sbe::PlanCacheKeyShardingEpoch{shardVersion->epoch(), shardVersion->getTimestamp()})
-        : boost::none;
-
-    return {makePlanCacheKeyInfo(query, collection),
-            collection->uuid(),
-            collectionVersion,
-            computeNewestVisibleIndexTimestamp(opCtx, collection),
-            keyShardingEpoch};
+                       PlanCacheKeyTag<sbe::PlanCacheKey> tag) {
+    return plan_cache_key_factory::make(query, MultipleCollectionAccessor(collection));
 }
 }  // namespace plan_cache_detail
+
+namespace plan_cache_key_factory {
+sbe::PlanCacheKey make(const CanonicalQuery& query, const MultipleCollectionAccessor& collections) {
+    OperationContext* opCtx = query.getOpCtx();
+    auto mainCollectionState = plan_cache_detail::computeCollectionState(
+        opCtx, collections.getMainCollection(), false /* isSecondaryColl */);
+    std::vector<sbe::PlanCacheKeyCollectionState> secondaryCollectionStates;
+    secondaryCollectionStates.reserve(collections.getSecondaryCollections().size());
+    // We always use the collection order saved in MultipleCollectionAccessor to populate the plan
+    // cache key, which is ordered by the secondary collection namespaces.
+    for (auto& [_, collection] : collections.getSecondaryCollections()) {
+        if (collection) {
+            secondaryCollectionStates.emplace_back(plan_cache_detail::computeCollectionState(
+                opCtx, collection, true /* isSecondaryColl */));
+        }
+    }
+
+    return {plan_cache_detail::makePlanCacheKeyInfo(query, collections.getMainCollection()),
+            std::move(mainCollectionState),
+            std::move(secondaryCollectionStates)};
+}
+}  // namespace plan_cache_key_factory
+
 }  // namespace mongo
diff --git a/src/mongo/db/query/plan_cache_key_factory.h b/src/mongo/db/query/plan_cache_key_factory.h
index 8d811793211..663297093c7 100644
--- a/src/mongo/db/query/plan_cache_key_factory.h
+++ b/src/mongo/db/query/plan_cache_key_factory.h
@@ -52,14 +52,14 @@ template <typename KeyType>
 struct PlanCacheKeyTag {};
 
 /**
- * Creates a key for the classic plan cache from the canonical query and collection instances.
+ * Creates a key for the classic plan cache from the canonical query and a single collection.
  */
 PlanCacheKey make(const CanonicalQuery& query,
                   const CollectionPtr& collection,
                   PlanCacheKeyTag<PlanCacheKey> tag);
 
 /**
- * Creates a key for the SBE plan cache from the canonical query and collection instances.
+ * Similar to above, but for the SBE plan cache key.
  */
 sbe::PlanCacheKey make(const CanonicalQuery& query,
                        const CollectionPtr& collection,
@@ -77,5 +77,12 @@ template <typename Key>
 Key make(const CanonicalQuery& query, const CollectionPtr& collection) {
     return plan_cache_detail::make(query, collection, plan_cache_detail::PlanCacheKeyTag<Key>{});
 }
+
+/**
+ * Similar to above, a factory helper to make a SBE plan cache key, but used for agg queries that
+ * might involve multiple collections.
+ */
+sbe::PlanCacheKey make(const CanonicalQuery& query, const MultipleCollectionAccessor& collections);
+
 }  // namespace plan_cache_key_factory
 }  // namespace mongo
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index ee41d15d84c..99b2fd8fefa 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -38,6 +38,10 @@ namespace {
 MONGO_FAIL_POINT_DEFINE(planExecutorAlwaysFails);
 }  // namespace
 
+const OperationContext::Decoration<boost::optional<SharedSemiFuture<void>>>
+    planExecutorShardingCriticalSectionFuture =
+        OperationContext::declareDecoration<boost::optional<SharedSemiFuture<void>>>();
+
 std::string PlanExecutor::stateToStr(ExecState execState) {
     switch (execState) {
         case PlanExecutor::ADVANCED:
diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h
index bf7799dd3b3..a94e87648dd 100644
--- a/src/mongo/db/query/plan_executor.h
+++ b/src/mongo/db/query/plan_executor.h
@@ -56,6 +56,15 @@ class RecordId;
 extern const OperationContext::Decoration<repl::OpTime> clientsLastKnownCommittedOpTime;
 
 /**
+ * If a plan yielded because it encountered a sharding critical section,
+ * 'planExecutorShardingCriticalSectionFuture' will be set to a future that becomes ready when the
+ * critical section ends. This future can be waited on to hold off resuming the plan execution while
+ * the critical section is still active.
+ */
+extern const OperationContext::Decoration<boost::optional<SharedSemiFuture<void>>>
+    planExecutorShardingCriticalSectionFuture;
+
+/**
  * A PlanExecutor is the abstraction that knows how to crank a tree of stages into execution.
  * The executor is usually part of a larger abstraction that is interacting with the cache
  * and/or the query optimizer.
diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp
index 76559f3d003..808b0800d23 100644
--- a/src/mongo/db/query/plan_executor_impl.cpp
+++ b/src/mongo/db/query/plan_executor_impl.cpp
@@ -60,6 +60,7 @@
 #include "mongo/db/query/plan_yield_policy_impl.h"
 #include "mongo/db/query/yield_policy_callbacks_impl.h"
 #include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/s/operation_sharding_state.h"
 #include "mongo/db/service_context.h"
 #include "mongo/logv2/log.h"
 #include "mongo/util/fail_point.h"
@@ -361,8 +362,25 @@ PlanExecutor::ExecState PlanExecutorImpl::_getNextImpl(Snapshotted<Document>* ob
         //   2) some stage requested a yield, or
         //   3) we need to yield and retry due to a WriteConflictException.
         // In all cases, the actual yielding happens here.
+
+        const auto whileYieldingFn = [&]() {
+            // If we yielded because we encountered a sharding critical section, wait for the
+            // critical section to end before continuing. By waiting for the critical section to be
+            // exited we avoid busy spinning immediately and encountering the same critical section
+            // again. It is important that this wait happens after having released the lock
+            // hierarchy -- otherwise deadlocks could happen, or the very least, locks would be
+            // unnecessarily held while waiting.
+            const auto& shardingCriticalSection = planExecutorShardingCriticalSectionFuture(_opCtx);
+            if (shardingCriticalSection) {
+                OperationShardingState::waitForCriticalSectionToComplete(_opCtx,
+                                                                         *shardingCriticalSection)
+                    .ignore();
+                planExecutorShardingCriticalSectionFuture(_opCtx).reset();
+            }
+        };
+
         if (_yieldPolicy->shouldYieldOrInterrupt(_opCtx)) {
-            uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx));
+            uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx, whileYieldingFn));
         }
 
         WorkingSetID id = WorkingSet::INVALID_ID;
diff --git a/src/mongo/db/query/plan_executor_sbe.h b/src/mongo/db/query/plan_executor_sbe.h
index f906d48d843..71b894c9f60 100644
--- a/src/mongo/db/query/plan_executor_sbe.h
+++ b/src/mongo/db/query/plan_executor_sbe.h
@@ -128,9 +128,8 @@ public:
     BSONObj getPostBatchResumeToken() const override;
 
     /**
-     * Even though the leaves of '_root' will acquire AutoGet objects, the caller must acquire a top
-     * level AutoGet object outside of this PlanExecutor in order to open a storage transaction and
-     * establish a consistent view of the catalog.
+     * The caller must acquire a top level AutoGet object outside of this PlanExecutor in order to
+     * open a storage transaction and establish a consistent view of the catalog.
      */
     LockPolicy lockPolicy() const override {
         return LockPolicy::kLockExternally;
diff --git a/src/mongo/db/query/plan_yield_policy.cpp b/src/mongo/db/query/plan_yield_policy.cpp
index 545460f083e..dd660ef657e 100644
--- a/src/mongo/db/query/plan_yield_policy.cpp
+++ b/src/mongo/db/query/plan_yield_policy.cpp
@@ -90,7 +90,7 @@ Status PlanYieldPolicy::yieldOrInterrupt(OperationContext* opCtx,
 
     for (int attempt = 1; true; attempt++) {
         try {
-            // Saving and restoring can modifies '_yieldable', so we make a copy before we start.
+            // Saving and restoring can modify '_yieldable', so we make a copy before we start.
             const Yieldable* yieldable = _yieldable;
 
             try {
@@ -122,7 +122,8 @@ Status PlanYieldPolicy::yieldOrInterrupt(OperationContext* opCtx,
                 invariant(!opCtx->isLockFreeReadsOp());
                 opCtx->recoveryUnit()->abandonSnapshot();
             } else {
-                performYield(opCtx, yieldable, whileYieldingFn);
+                invariant(yieldable);
+                performYield(opCtx, *yieldable, whileYieldingFn);
             }
 
             restoreState(opCtx, yieldable);
@@ -144,7 +145,7 @@ Status PlanYieldPolicy::yieldOrInterrupt(OperationContext* opCtx,
 }
 
 void PlanYieldPolicy::performYield(OperationContext* opCtx,
-                                   const Yieldable* yieldable,
+                                   const Yieldable& yieldable,
                                    std::function<void()> whileYieldingFn) {
     // Things have to happen here in a specific order:
     //   * Release 'yieldable'.
@@ -162,9 +163,7 @@ void PlanYieldPolicy::performYield(OperationContext* opCtx,
 
     // Since the locks are not recursively held, this is a top level operation and we can safely
     // clear the 'yieldable' state before unlocking and then re-establish it after re-locking.
-    if (yieldable) {
-        yieldable->yield();
-    }
+    yieldable.yield();
 
     Locker::LockSnapshot snapshot;
     auto unlocked = locker->saveLockStateAndUnlock(&snapshot);
@@ -179,9 +178,7 @@ void PlanYieldPolicy::performYield(OperationContext* opCtx,
     if (!unlocked) {
         // Nothing was unlocked. Recursively held locks are not the only reason locks cannot be
         // released. Restore the 'yieldable' state before returning.
-        if (yieldable) {
-            yieldable->restore();
-        }
+        yieldable.restore();
         return;
     }
 
@@ -199,15 +196,10 @@ void PlanYieldPolicy::performYield(OperationContext* opCtx,
 
     locker->restoreLockState(opCtx, snapshot);
 
-    // A yield has occurred, but there still may not be a 'yieldable'. This is true, for example,
-    // when executing a getMore for the slot-based execution engine. SBE uses the "locks internally"
-    // lock policy, and therefore the getMore code path does not acquire any db_raii object. As a
-    // result, there is no db_raii object to restore here when executing a getMore against a cursor
-    // using SBE.
-    if (yieldable) {
-        // Yieldable restore may set a new read source if necessary.
-        yieldable->restore();
-    }
+    // A yield has occurred, but there still may not be a 'yieldable' if the PlanExecutor
+    // has a 'locks internally' lock policy.
+    // Yieldable restore may set a new read source if necessary.
+    yieldable.restore();
 }
 
 }  // namespace mongo
diff --git a/src/mongo/db/query/plan_yield_policy.h b/src/mongo/db/query/plan_yield_policy.h
index 9d44ba8e903..4cc060abf3a 100644
--- a/src/mongo/db/query/plan_yield_policy.h
+++ b/src/mongo/db/query/plan_yield_policy.h
@@ -301,7 +301,7 @@ private:
      * storage engine snapshot.
      */
     void performYield(OperationContext* opCtx,
-                      const Yieldable* yieldable,
+                      const Yieldable& yieldable,
                       std::function<void()> whileYieldingFn);
 
     const YieldPolicy _policy;
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index f283979cc5c..c226061c03b 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -374,7 +374,9 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
     const BSONObj& hint = query.getFindCommandRequest().getHint();
     if (!hint.isEmpty()) {
         BSONElement natural = hint[query_request_helper::kNaturalSortField];
-        if (natural) {
+        // If we have a natural hint and a time series traversal preference, let the traversal
+        // preference decide what order to scan, so that we can avoid a blocking sort.
+        if (natural && !params.traversalPreference) {
             // If the hint is {$natural: +-1} this changes the direction of the collection scan.
             csn->direction = natural.safeNumberInt() >= 0 ? 1 : -1;
         }
@@ -384,8 +386,8 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
     // the collection scan to return timestamp-based tokens. Otherwise, we should
     // return generic RecordId-based tokens.
     if (query.getFindCommandRequest().getRequestResumeToken()) {
-        csn->shouldTrackLatestOplogTimestamp = query.nss().isOplog();
-        csn->requestResumeToken = !query.nss().isOplog();
+        csn->shouldTrackLatestOplogTimestamp = query.nss().isOplogOrChangeCollection();
+        csn->requestResumeToken = !query.nss().isOplogOrChangeCollection();
     }
 
     // Extract and assign the RecordId from the 'resumeAfter' token, if present.
@@ -397,26 +399,31 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
 
     const bool assertMinTsHasNotFallenOffOplog =
         params.options & QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG;
-    if (query.nss().isOplog() && csn->direction == 1) {
+    if (query.nss().isOplogOrChangeCollection() && csn->direction == 1) {
+        // Takes Timestamp 'ts' as input, transforms it to the RecordIdBound and assigns it to the
+        // output parameter 'recordId'. The RecordId format for the change collection is a string,
+        // where as the RecordId format for the oplog is a long integer. The timestamp should be
+        // converted to the required format before assigning it to the 'recordId'.
+        auto assignRecordIdFromTimestamp = [&](auto& ts, auto* recordId) {
+            auto keyFormat = query.nss().isChangeCollection() ? KeyFormat::String : KeyFormat::Long;
+            auto status = record_id_helpers::keyForOptime(ts, keyFormat);
+            if (status.isOK()) {
+                *recordId = RecordIdBound(status.getValue());
+            }
+        };
+
         // Optimizes the start and end location parameters for a collection scan for an oplog
         // collection. Not compatible with $_resumeAfter so we do not optimize in that case.
         if (resumeAfterObj.isEmpty()) {
             auto [minTs, maxTs] = extractTsRange(query.root());
             if (minTs) {
-                StatusWith<RecordId> goal = record_id_helpers::keyForOptime(*minTs);
-                if (goal.isOK()) {
-                    csn->minRecord = RecordIdBound(goal.getValue());
-                }
-
+                assignRecordIdFromTimestamp(*minTs, &csn->minRecord);
                 if (assertMinTsHasNotFallenOffOplog) {
-                    csn->assertTsHasNotFallenOffOplog = *minTs;
+                    csn->assertTsHasNotFallenOff = *minTs;
                 }
             }
             if (maxTs) {
-                StatusWith<RecordId> goal = record_id_helpers::keyForOptime(*maxTs);
-                if (goal.isOK()) {
-                    csn->maxRecord = RecordIdBound(goal.getValue());
-                }
+                assignRecordIdFromTimestamp(*maxTs, &csn->maxRecord);
             }
         }
 
@@ -433,9 +440,9 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
     // specify a minimum timestamp. This is not a valid request, so we throw InvalidOptions.
     if (assertMinTsHasNotFallenOffOplog) {
         uassert(ErrorCodes::InvalidOptions,
-                str::stream() << "assertTsHasNotFallenOffOplog cannot be applied to a query "
+                str::stream() << "assertTsHasNotFallenOff cannot be applied to a query "
                                  "which does not imply a minimum 'ts' value ",
-                csn->assertTsHasNotFallenOffOplog);
+                csn->assertTsHasNotFallenOff);
     }
 
     auto queryCollator = query.getCollator();
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 3a133aae486..6ea44830415 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -35,6 +35,7 @@
 #include "mongo/db/query/index_bounds_builder.h"
 #include "mongo/db/query/index_tag.h"
 #include "mongo/db/query/interval_evaluation_tree.h"
+#include "mongo/db/query/query_planner.h"
 #include "mongo/db/query/query_planner_params.h"
 #include "mongo/db/query/query_solution.h"
 
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index 9f3460a1cc6..921b79dc70b 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -873,6 +873,41 @@ bool QueryPlannerAnalysis::explodeForSort(const CanonicalQuery& query,
     return true;
 }
 
+// This function is used to check if the given index pattern and direction in the traversal
+// preference can be used to satisfy the given sort pattern (specifically for time series
+// collections).
+bool sortMatchesTraversalPreference(const TraversalPreference& traversalPreference,
+                                    const BSONObj& indexPattern) {
+    BSONObjIterator sortIter(traversalPreference.sortPattern);
+    BSONObjIterator indexIter(indexPattern);
+    while (sortIter.more() && indexIter.more()) {
+        BSONElement sortPart = sortIter.next();
+        BSONElement indexPart = indexIter.next();
+
+        if (!sortPart.isNumber() || !indexPart.isNumber()) {
+            return false;
+        }
+
+        // If the field doesn't match or the directions don't match, we return false.
+        if (strcmp(sortPart.fieldName(), indexPart.fieldName()) != 0 ||
+            (sortPart.safeNumberInt() > 0) != (indexPart.safeNumberInt() > 0)) {
+            return false;
+        }
+    }
+
+    if (!indexIter.more() && sortIter.more()) {
+        // The sort still has more, so it cannot be a prefix of the index.
+        return false;
+    }
+    return true;
+}
+
+bool isShardedCollScan(QuerySolutionNode* solnRoot) {
+    return solnRoot->getType() == StageType::STAGE_SHARDING_FILTER &&
+        solnRoot->children.size() == 1 &&
+        solnRoot->children[0]->getType() == StageType::STAGE_COLLSCAN;
+}
+
 // static
 std::unique_ptr<QuerySolutionNode> QueryPlannerAnalysis::analyzeSort(
     const CanonicalQuery& query,
@@ -882,6 +917,28 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAnalysis::analyzeSort(
     *blockingSortOut = false;
 
     const FindCommandRequest& findCommand = query.getFindCommandRequest();
+    if (params.traversalPreference) {
+        // If we've been passed a traversal preference, we might want to reverse the order we scan
+        // the data to avoid a blocking sort later in the pipeline.
+        auto providedSorts = solnRoot->providedSorts();
+
+        BSONObj solnSortPattern;
+        if (solnRoot->getType() == StageType::STAGE_COLLSCAN || isShardedCollScan(solnRoot.get())) {
+            BSONObjBuilder builder;
+            builder.append(params.traversalPreference->clusterField, 1);
+            solnSortPattern = builder.obj();
+        } else {
+            solnSortPattern = providedSorts.getBaseSortPattern();
+        }
+
+        if (sortMatchesTraversalPreference(params.traversalPreference.get(), solnSortPattern) &&
+            QueryPlannerCommon::scanDirectionsEqual(solnRoot.get(),
+                                                    -params.traversalPreference->direction)) {
+            QueryPlannerCommon::reverseScans(solnRoot.get(), true);
+            return solnRoot;
+        }
+    }
+
     const BSONObj& sortObj = findCommand.getSort();
 
     if (sortObj.isEmpty()) {
diff --git a/src/mongo/db/query/planner_analysis.h b/src/mongo/db/query/planner_analysis.h
index d7473336384..b4b8979c29d 100644
--- a/src/mongo/db/query/planner_analysis.h
+++ b/src/mongo/db/query/planner_analysis.h
@@ -30,6 +30,7 @@
 #pragma once
 
 #include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/query_planner.h"
 #include "mongo/db/query/query_planner_params.h"
 #include "mongo/db/query/query_solution.h"
 
diff --git a/src/mongo/db/query/projection.cpp b/src/mongo/db/query/projection.cpp
index 3c93367ae8c..af5fedfe780 100644
--- a/src/mongo/db/query/projection.cpp
+++ b/src/mongo/db/query/projection.cpp
@@ -104,6 +104,7 @@ public:
     void visit(const ProjectionElemMatchASTNode* node) final {
         _deps->requiresDocument = true;
         _deps->hasExpressions = true;
+        _deps->containsElemMatch = true;
     }
 
     void visit(const ExpressionASTNode* node) final {
diff --git a/src/mongo/db/query/projection.h b/src/mongo/db/query/projection.h
index 9987dd641db..914567e87ed 100644
--- a/src/mongo/db/query/projection.h
+++ b/src/mongo/db/query/projection.h
@@ -46,6 +46,7 @@ struct ProjectionDependencies {
     // Whether the entire document is required to do the projection.
     bool requiresDocument = false;
     bool hasExpressions = false;
+    bool containsElemMatch = false;
 
     // Which fields are necessary to perform the projection, or boost::none if all are required.
     boost::optional<std::set<std::string>> requiredFields;
@@ -137,6 +138,10 @@ public:
             _deps.metadataRequested.none() && !_deps.requiresDocument && !_deps.hasExpressions;
     }
 
+    bool containsElemMatch() const {
+        return _deps.containsElemMatch;
+    }
+
 private:
     ProjectionPathASTNode _root;
     ProjectType _type;
diff --git a/src/mongo/db/query/query_feature_flags.idl b/src/mongo/db/query/query_feature_flags.idl
index eb6ae4782fd..61e50906c87 100644
--- a/src/mongo/db/query/query_feature_flags.idl
+++ b/src/mongo/db/query/query_feature_flags.idl
@@ -48,12 +48,6 @@ feature_flags:
       default: true
       version: 5.1
 
-    featureFlagSBEGroupPushdown:
-      description: "Feature flag for allowing SBE $group pushdown"
-      cpp_varname: gFeatureFlagSBEGroupPushdown
-      default: true
-      version: 5.2
-
     featureFlagExactTopNAccumulator:
       description: "Feature flag for allowing use of topN family of accumulators"
       cpp_varname: gFeatureFlagExactTopNAccumulator
@@ -115,7 +109,7 @@ feature_flags:
 
     featureFlagCommonQueryFramework:
       description: "Feature flag for allowing use of Cascades-based query optimizer"
-      cpp_varname: gfeatureFlagCommonQueryFramework
+      cpp_varname: gFeatureFlagCommonQueryFramework
       default: false
 
     featureFlagLastPointQuery:
@@ -124,12 +118,6 @@ feature_flags:
       default: true
       version: 6.0
 
-    featureFlagSBELookupPushdown:
-      description: "Feature flag for allowing SBE $lookup pushdown"
-      cpp_varname: gFeatureFlagSBELookupPushdown
-      default: true
-      version: 6.0
-
     featureFlagSearchShardedFacets:
       description: "Enable use of $$SEARCH_META on sharded collections"
       cpp_varname: gFeatureFlagSearchShardedFacets
@@ -155,5 +143,9 @@ feature_flags:
     featureFlagSbeFull:
       description: "Feature flag to enable using SBE for a larger number of queries"
       cpp_varname: gFeatureFlagSbeFull
-      default: true 
-      version: 6.0
+      default: false
+
+    featureFlagTimeSeriesChangeStreams:
+      description: "Feature flag for $changeStream support for time series"
+      cpp_varname: gFeatureFlagTimeSeriesChangeStreams
+      default: false
diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl
index 18851f0ddb9..53c1e5e7617 100644
--- a/src/mongo/db/query/query_knobs.idl
+++ b/src/mongo/db/query/query_knobs.idl
@@ -728,7 +728,7 @@ server_parameters:
     set_at: [ startup, runtime ]
     cpp_varname: "internalQueryEnableCascadesOptimizer"
     cpp_vartype: AtomicWord<bool>
-    default: false
+    default: true
 
   internalCascadesOptimizerDisableScan:
     description: "Disable full collection scans in the Cascades optimizer."
@@ -780,6 +780,14 @@ server_parameters:
     cpp_vartype: AtomicWord<bool>
     default: false
 
+  internalQueryForceCommonQueryFramework:
+    description: "Set to always use the bonsai optimizer, regardless of the query."
+    set_at: [ startup, runtime ]
+    cpp_varname: "internalQueryForceCommonQueryFramework"
+    cpp_vartype: AtomicWord<bool>
+    test_only: true
+    default: false
+
   internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin:
     description: "Up to what number of documents do we choose the hash join algorithm when $lookup
     is translated to a SBE plan."
@@ -863,6 +871,14 @@ server_parameters:
         gt: 0
         lt: 16777216
 
+  internalQueryFLEAlwaysUseHighCardinalityMode:
+    description: "Boolean flag to force FLE to always use low selectivity mode"
+    set_at: [ startup, runtime ]
+    cpp_varname: "internalQueryFLEAlwaysUseHighCardinalityMode"
+    cpp_vartype: AtomicWord<bool>
+    default:
+        expr: false
+
 # Note for adding additional query knobs:
 #
 # When adding a new query knob, you should consider whether or not you need to add an 'on_update'
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index de727ab6190..0a4f40fea4a 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -30,15 +30,15 @@
 
 #include "mongo/platform/basic.h"
 
-#include "mongo/db/query/query_planner.h"
-
 #include <boost/optional.hpp>
 #include <vector>
 
 #include "mongo/base/string_data.h"
+#include "mongo/bson/bsonobj.h"
 #include "mongo/bson/simple_bsonelement_comparator.h"
 #include "mongo/db/bson/dotted_path_support.h"
 #include "mongo/db/catalog/clustered_collection_util.h"
+#include "mongo/db/exec/bucket_unpacker.h"
 #include "mongo/db/index/wildcard_key_generator.h"
 #include "mongo/db/index_names.h"
 #include "mongo/db/matcher/expression_algo.h"
@@ -50,14 +50,19 @@
 #include "mongo/db/query/classic_plan_cache.h"
 #include "mongo/db/query/collation/collation_index_key.h"
 #include "mongo/db/query/collation/collator_interface.h"
+#include "mongo/db/query/internal_plans.h"
 #include "mongo/db/query/plan_cache.h"
 #include "mongo/db/query/plan_enumerator.h"
 #include "mongo/db/query/planner_access.h"
 #include "mongo/db/query/planner_analysis.h"
 #include "mongo/db/query/planner_ixselect.h"
+#include "mongo/db/query/projection_parser.h"
+#include "mongo/db/query/query_knobs_gen.h"
+#include "mongo/db/query/query_planner.h"
 #include "mongo/db/query/query_planner_common.h"
 #include "mongo/db/query/query_solution.h"
 #include "mongo/logv2/log.h"
+#include "mongo/util/assert_util_core.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
 
@@ -168,8 +173,8 @@ bool hintMatchesClusterKey(const boost::optional<ClusteredCollectionInfo>& clust
 }
 
 /**
- * Returns the dependencies for the CanoncialQuery, split by those needed to answer the filter, and
- * those needed for "everything else" which is the project and sort.
+ * Returns the dependencies for the CanoncialQuery, split by those needed to answer the filter,
+ * and those needed for "everything else" which is the project and sort.
  */
 std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params,
                                                 const CanonicalQuery& query) {
@@ -189,8 +194,8 @@ std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params
             outputDeps.fields.emplace(field.fieldNameStringData());
         }
     }
-    // There's no known way a sort would depend on the whole document, and we already verified that
-    // the projection doesn't depend on the whole document.
+    // There's no known way a sort would depend on the whole document, and we already verified
+    // that the projection doesn't depend on the whole document.
     tassert(6430503, "Unexpectedly required entire object", !outputDeps.needWholeDocument);
     return {std::move(filterDeps), std::move(outputDeps)};
 }
@@ -285,8 +290,8 @@ string optionString(size_t options) {
         ss << "DEFAULT ";
     }
     while (options) {
-        // The expression (x & (x - 1)) yields x with the lowest bit cleared.  Then the exclusive-or
-        // of the result with the original yields the lowest bit by itself.
+        // The expression (x & (x - 1)) yields x with the lowest bit cleared.  Then the
+        // exclusive-or of the result with the original yields the lowest bit by itself.
         size_t new_options = options & (options - 1);
         QueryPlannerParams::Options opt = QueryPlannerParams::Options(new_options ^ options);
         options = new_options;
@@ -477,12 +482,16 @@ std::unique_ptr<QuerySolution> buildCollscanSoln(const CanonicalQuery& query,
     return QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot));
 }
 
-std::unique_ptr<QuerySolution> buildWholeIXSoln(const IndexEntry& index,
-                                                const CanonicalQuery& query,
-                                                const QueryPlannerParams& params,
-                                                int direction = 1) {
+std::unique_ptr<QuerySolution> buildWholeIXSoln(
+    const IndexEntry& index,
+    const CanonicalQuery& query,
+    const QueryPlannerParams& params,
+    const boost::optional<int>& direction = boost::none) {
+    tassert(6499400,
+            "Cannot pass both an explicit direction and a traversal preference",
+            !(direction.has_value() && params.traversalPreference));
     std::unique_ptr<QuerySolutionNode> solnRoot(
-        QueryPlannerAccess::scanWholeIndex(index, query, params, direction));
+        QueryPlannerAccess::scanWholeIndex(index, query, params, direction.value_or(1)));
     return QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot));
 }
 
@@ -702,7 +711,8 @@ StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::planFromCache(
         return s;
     }
 
-    // The MatchExpression tree is in canonical order. We must order the nodes for access planning.
+    // The MatchExpression tree is in canonical order. We must order the nodes for access
+    // planning.
     prepareForAccessPlanning(clone.get());
 
     LOGV2_DEBUG(20965, 5, "Tagged tree", "tree"_attr = redact(clone->debugString()));
@@ -733,8 +743,8 @@ StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::planFromCache(
 
 StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
     const CanonicalQuery& query, const QueryPlannerParams& params) {
-    // It's a little silly to ask for a count and for owned data. This could indicate a bug earlier
-    // on.
+    // It's a little silly to ask for a count and for owned data. This could indicate a bug
+    // earlier on.
     tassert(5397500,
             "Count and owned data requested",
             !((params.options & QueryPlannerParams::IS_COUNT) &&
@@ -780,10 +790,10 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
         const BSONObj& hintObj = query.getFindCommandRequest().getHint();
         const auto naturalHint = hintObj[query_request_helper::kNaturalSortField];
         if (naturalHint || hintMatchesClusterKey(params.clusteredInfo, hintObj)) {
-            // The hint can be {$natural: +/-1}. If this happens, output a collscan. We expect any
-            // $natural sort to have been normalized to a $natural hint upstream. Additionally, if
-            // the hint matches the collection's cluster key, we also output a collscan utilizing
-            // the cluster key.
+            // The hint can be {$natural: +/-1}. If this happens, output a collscan. We expect
+            // any $natural sort to have been normalized to a $natural hint upstream.
+            // Additionally, if the hint matches the collection's cluster key, we also output a
+            // collscan utilizing the cluster key.
 
             if (naturalHint) {
                 // Perform validation specific to $natural.
@@ -804,8 +814,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
 
                 const auto clusterKey = params.clusteredInfo->getIndexSpec().getKey();
 
-                // Check if the query collator is compatible with the collection collator for the
-                // provided min and max values.
+                // Check if the query collator is compatible with the collection collator for
+                // the provided min and max values.
                 if ((!minObj.isEmpty() &&
                      !indexCompatibleMaxMin(minObj,
                                             query.getCollator(),
@@ -846,17 +856,17 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
         }
     }
 
-    // Hints require us to only consider the hinted index. If index filters in the query settings
-    // were used to override the allowed indices for planning, we should not use the hinted index
-    // requested in the query.
+    // Hints require us to only consider the hinted index. If index filters in the query
+    // settings were used to override the allowed indices for planning, we should not use the
+    // hinted index requested in the query.
     BSONObj hintedIndex;
     if (!params.indexFiltersApplied) {
         hintedIndex = query.getFindCommandRequest().getHint();
     }
 
-    // Either the list of indices passed in by the caller, or the list of indices filtered according
-    // to the hint. This list is later expanded in order to allow the planner to handle wildcard
-    // indexes.
+    // Either the list of indices passed in by the caller, or the list of indices filtered
+    // according to the hint. This list is later expanded in order to allow the planner to
+    // handle wildcard indexes.
     std::vector<IndexEntry> fullIndexList;
 
     // Will hold a copy of the index entry chosen by the hint.
@@ -896,7 +906,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
     } else {
         relevantIndices = fullIndexList;
 
-        // Relevant indices should only ever exceed a size of 1 when there is a hint in the case of
+        // Relevant indices should only ever exceed a size of 1 when there is a hint in the case
+        // of
         // $** index.
         if (relevantIndices.size() > 1) {
             for (auto&& entry : relevantIndices) {
@@ -931,13 +942,13 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
         invariant(*hintedIndexEntry == fullIndexList.front());
 
         // In order to be fully compatible, the min has to be less than the max according to the
-        // index key pattern ordering. The first step in verifying this is "finish" the min and max
-        // by replacing empty objects and stripping field names.
+        // index key pattern ordering. The first step in verifying this is "finish" the min and
+        // max by replacing empty objects and stripping field names.
         BSONObj finishedMinObj = finishMinObj(*hintedIndexEntry, minObj, maxObj);
         BSONObj finishedMaxObj = finishMaxObj(*hintedIndexEntry, minObj, maxObj);
 
-        // Now we have the final min and max. This index is only relevant for the min/max query if
-        // min < max.
+        // Now we have the final min and max. This index is only relevant for the min/max query
+        // if min < max.
         if (finishedMinObj.woCompare(finishedMaxObj, hintedIndexEntry->keyPattern, false) >= 0) {
             return Status(ErrorCodes::Error(51175),
                           "The value provided for min() does not come before the value provided "
@@ -1069,9 +1080,9 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
                         "About to build solntree from tagged tree",
                         "tree"_attr = redact(nextTaggedTree->debugString()));
 
-            // Store the plan cache index tree before calling prepareForAccessingPlanning(), so that
-            // the PlanCacheIndexTree has the same sort as the MatchExpression used to generate the
-            // plan cache key.
+            // Store the plan cache index tree before calling prepareForAccessingPlanning(), so
+            // that the PlanCacheIndexTree has the same sort as the MatchExpression used to
+            // generate the plan cache key.
             std::unique_ptr<MatchExpression> clone(nextTaggedTree->shallowClone());
             std::unique_ptr<PlanCacheIndexTree> cacheData;
             auto statusWithCacheData = cacheDataFromTaggedTree(clone.get(), relevantIndices);
@@ -1084,8 +1095,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
                 cacheData = std::move(statusWithCacheData.getValue());
             }
 
-            // We have already cached the tree in canonical order, so now we can order the nodes for
-            // access planning.
+            // We have already cached the tree in canonical order, so now we can order the nodes
+            // for access planning.
             prepareForAccessPlanning(nextTaggedTree.get());
 
             // This can fail if enumeration makes a mistake.
@@ -1134,7 +1145,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
 
     // An index was hinted. If there are any solutions, they use the hinted index.  If not, we
     // scan the entire index to provide results and output that as our plan.  This is the
-    // desired behavior when an index is hinted that is not relevant to the query. In the case that
+    // desired behavior when an index is hinted that is not relevant to the query. In the case
+    // that
     // $** index is hinted, we do not want this behavior.
     if (!hintedIndex.isEmpty() && relevantIndices.size() == 1) {
         if (out.size() > 0) {
@@ -1145,6 +1157,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
                 ErrorCodes::NoQueryExecutionPlans,
                 "$hint: refusing to build whole-index solution, because it's a wildcard index");
         }
+
         // Return hinted index solution if found.
         auto soln = buildWholeIXSoln(relevantIndices.front(), query, params);
         if (!soln) {
@@ -1177,8 +1190,9 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
         if (!usingIndexToSort) {
             for (size_t i = 0; i < fullIndexList.size(); ++i) {
                 const IndexEntry& index = fullIndexList[i];
-                // Only a regular index or the non-hashed prefix of a compound hashed index can be
-                // used to provide a sort. In addition, the index needs to be a non-sparse index.
+                // Only a regular index or the non-hashed prefix of a compound hashed index can
+                // be used to provide a sort. In addition, the index needs to be a non-sparse
+                // index.
                 //
                 // TODO: Sparse indexes can't normally provide a sort, because non-indexed
                 // documents could potentially be missing from the result set.  However, if the
@@ -1198,14 +1212,14 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
                     continue;
                 }
 
-                // If the index collation differs from the query collation, the index should not be
-                // used to provide a sort, because strings will be ordered incorrectly.
+                // If the index collation differs from the query collation, the index should not
+                // be used to provide a sort, because strings will be ordered incorrectly.
                 if (!CollatorInterface::collatorsMatch(index.collator, query.getCollator())) {
                     continue;
                 }
 
-                // Partial indexes can only be used to provide a sort only if the query predicate is
-                // compatible.
+                // Partial indexes can only be used to provide a sort only if the query
+                // predicate is compatible.
                 if (index.filterExpr && !expression::isSubsetOf(query.root(), index.filterExpr)) {
                     continue;
                 }
@@ -1264,10 +1278,10 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
                 if (direction != 0) {
                     auto soln = buildCollscanSoln(query, isTailable, params, direction);
                     if (soln) {
-                        LOGV2_DEBUG(
-                            6082401,
-                            5,
-                            "Planner: outputting soln that uses clustered index to provide sort");
+                        LOGV2_DEBUG(6082401,
+                                    5,
+                                    "Planner: outputting soln that uses clustered index to "
+                                    "provide sort");
                         SolutionCacheData* scd = new SolutionCacheData();
                         scd->solnType = SolutionCacheData::COLLSCAN_SOLN;
                         scd->wholeIXSolnDir = direction;
@@ -1280,8 +1294,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
         }
     }
 
-    // If a projection exists, there may be an index that allows for a covered plan, even if none
-    // were considered earlier.
+    // If a projection exists, there may be an index that allows for a covered plan, even if
+    // none were considered earlier.
     const auto projection = query.getProj();
     if (params.options & QueryPlannerParams::GENERATE_COVERED_IXSCANS && out.size() == 0 &&
         query.getQueryObj().isEmpty() && projection && !projection->requiresDocument()) {
diff --git a/src/mongo/db/query/query_planner_common.cpp b/src/mongo/db/query/query_planner_common.cpp
index 013c2b88378..b6c3253fd28 100644
--- a/src/mongo/db/query/query_planner_common.cpp
+++ b/src/mongo/db/query/query_planner_common.cpp
@@ -33,7 +33,10 @@
 #include "mongo/base/exact_cast.h"
 #include "mongo/db/query/projection_ast_path_tracking_visitor.h"
 #include "mongo/db/query/query_planner_common.h"
+#include "mongo/db/query/query_solution.h"
+#include "mongo/db/query/stage_types.h"
 #include "mongo/db/query/tree_walker.h"
+#include "mongo/logv2/log.h"
 #include "mongo/logv2/redaction.h"
 #include "mongo/util/assert_util.h"
 
@@ -42,7 +45,38 @@
 
 namespace mongo {
 
-void QueryPlannerCommon::reverseScans(QuerySolutionNode* node) {
+bool QueryPlannerCommon::scanDirectionsEqual(QuerySolutionNode* node, int direction) {
+    StageType type = node->getType();
+
+    boost::optional<int> scanDir;
+    if (STAGE_IXSCAN == type) {
+        IndexScanNode* isn = static_cast<IndexScanNode*>(node);
+        scanDir = isn->direction;
+    } else if (STAGE_DISTINCT_SCAN == type) {
+        DistinctNode* dn = static_cast<DistinctNode*>(node);
+        scanDir = dn->direction;
+    } else if (STAGE_COLLSCAN == type) {
+        CollectionScanNode* collScan = static_cast<CollectionScanNode*>(node);
+        scanDir = collScan->direction;
+    } else {
+        // We shouldn't encounter a sort stage.
+        invariant(!isSortStageType(type));
+    }
+
+    // If we found something with a direction, and the direction doesn't match, we return false.
+    if (scanDir && scanDir != direction) {
+        return false;
+    }
+
+    for (size_t i = 0; i < node->children.size(); ++i) {
+        if (!scanDirectionsEqual(node->children[i].get(), direction)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+void QueryPlannerCommon::reverseScans(QuerySolutionNode* node, bool reverseCollScans) {
     StageType type = node->getType();
 
     if (STAGE_IXSCAN == type) {
@@ -72,6 +106,9 @@ void QueryPlannerCommon::reverseScans(QuerySolutionNode* node) {
         // reverse direction of comparison for merge
         MergeSortNode* msn = static_cast<MergeSortNode*>(node);
         msn->sort = reverseSortObj(msn->sort);
+    } else if (reverseCollScans && STAGE_COLLSCAN == type) {
+        CollectionScanNode* collScan = static_cast<CollectionScanNode*>(node);
+        collScan->direction *= -1;
     } else {
         // Reversing scans is done in order to determine whether or not we need to add an explicit
         // SORT stage. There shouldn't already be one present in the plan.
@@ -79,7 +116,7 @@ void QueryPlannerCommon::reverseScans(QuerySolutionNode* node) {
     }
 
     for (size_t i = 0; i < node->children.size(); ++i) {
-        reverseScans(node->children[i].get());
+        reverseScans(node->children[i].get(), reverseCollScans);
     }
 }
 
diff --git a/src/mongo/db/query/query_planner_common.h b/src/mongo/db/query/query_planner_common.h
index 3c3bb88936c..6d441155b54 100644
--- a/src/mongo/db/query/query_planner_common.h
+++ b/src/mongo/db/query/query_planner_common.h
@@ -79,10 +79,17 @@ public:
     }
 
     /**
+     * Traverses the tree rooted at 'node'. Tests scan directions recursively to see if they are
+     * equal to the given direction argument. Returns true if they are and false otherwise.
+     */
+    static bool scanDirectionsEqual(QuerySolutionNode* node, int direction);
+
+    /**
      * Traverses the tree rooted at 'node'.  For every STAGE_IXSCAN encountered, reverse
-     * the scan direction and index bounds.
+     * the scan direction and index bounds, unless reverseCollScans equals true, in which case
+     * STAGE_COLLSCAN is reversed as well.
      */
-    static void reverseScans(QuerySolutionNode* node);
+    static void reverseScans(QuerySolutionNode* node, bool reverseCollScans = false);
 
     /**
      * Extracts all field names for the sortKey meta-projection and stores them in the returned
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index af643a632c8..3a7dc82c79f 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -58,9 +58,24 @@ struct SecondaryCollectionInfo {
     long long storageSizeBytes{0};
 };
 
+
+// This holds information about the internal traversal preference used for time series. If we choose
+// an index that involves fields we're interested in, we prefer a specific direction to avoid a
+// blocking sort.
+struct TraversalPreference {
+    // If we end up with an index that provides {sortPattern}, we prefer to scan it in direction
+    // {direction}.
+    BSONObj sortPattern;
+    int direction;
+    // Cluster key for the collection this query accesses (for time-series it's control.min.time).
+    // If a collection scan is chosen, this will be compared against the sortPattern to see if we
+    // can satisfy the traversal preference.
+    std::string clusterField;
+};
+
 struct QueryPlannerParams {
-    QueryPlannerParams()
-        : options(DEFAULT),
+    QueryPlannerParams(size_t options = DEFAULT)
+        : options(options),
           indexFiltersApplied(false),
           maxIndexedSolutions(internalQueryPlannerMaxIndexedSolutions.load()),
           clusteredCollectionCollator(nullptr) {}
@@ -178,6 +193,8 @@ struct QueryPlannerParams {
 
     // List of information about any secondary collections that can be executed against.
     std::map<NamespaceString, SecondaryCollectionInfo> secondaryCollectionsInfo;
+
+    boost::optional<TraversalPreference> traversalPreference = boost::none;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/query/query_request_helper.cpp b/src/mongo/db/query/query_request_helper.cpp
index 53e8092de75..370a9a1137e 100644
--- a/src/mongo/db/query/query_request_helper.cpp
+++ b/src/mongo/db/query/query_request_helper.cpp
@@ -36,7 +36,6 @@
 #include "mongo/base/status.h"
 #include "mongo/base/status_with.h"
 #include "mongo/bson/simple_bsonobj_comparator.h"
-#include "mongo/client/query.h"
 #include "mongo/db/commands/test_commands_enabled.h"
 #include "mongo/db/dbmessage.h"
 
diff --git a/src/mongo/db/query/query_request_helper.h b/src/mongo/db/query/query_request_helper.h
index 4d3ec6143c8..4edad47e067 100644
--- a/src/mongo/db/query/query_request_helper.h
+++ b/src/mongo/db/query/query_request_helper.h
@@ -40,15 +40,13 @@
 
 namespace mongo {
 
-class QueryMessage;
 class Status;
-class Query;
 template <typename T>
 class StatusWith;
 
 /**
- * Parses the QueryMessage or find command received from the user and makes the various fields
- * more easily accessible.
+ * Parses the find command received from the user and makes the various fields more easily
+ * accessible.
  */
 namespace query_request_helper {
 
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 893fef833e0..b62b54c386c 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -332,7 +332,7 @@ std::unique_ptr<QuerySolutionNode> CollectionScanNode::clone() const {
     copy->tailable = this->tailable;
     copy->direction = this->direction;
     copy->shouldTrackLatestOplogTimestamp = this->shouldTrackLatestOplogTimestamp;
-    copy->assertTsHasNotFallenOffOplog = this->assertTsHasNotFallenOffOplog;
+    copy->assertTsHasNotFallenOff = this->assertTsHasNotFallenOff;
     copy->shouldWaitForOplogVisibility = this->shouldWaitForOplogVisibility;
     copy->clusteredIndex = this->clusteredIndex;
     copy->hasCompatibleCollation = this->hasCompatibleCollation;
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 455c5aabcaa..27a4ff33977 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -489,7 +489,7 @@ struct CollectionScanNode : public QuerySolutionNodeWithSortSet {
     bool shouldTrackLatestOplogTimestamp = false;
 
     // Assert that the specified timestamp has not fallen off the oplog.
-    boost::optional<Timestamp> assertTsHasNotFallenOffOplog = boost::none;
+    boost::optional<Timestamp> assertTsHasNotFallenOff = boost::none;
 
     int direction{1};
 
diff --git a/src/mongo/db/query/sbe_cached_solution_planner.cpp b/src/mongo/db/query/sbe_cached_solution_planner.cpp
index 5f1b8f008d6..0ecd5ba50f5 100644
--- a/src/mongo/db/query/sbe_cached_solution_planner.cpp
+++ b/src/mongo/db/query/sbe_cached_solution_planner.cpp
@@ -53,10 +53,17 @@ CandidatePlans CachedSolutionPlanner::plan(
 
     // If the cached plan is accepted we'd like to keep the results from the trials even if there
     // are parts of agg pipelines being lowered into SBE, so we run the trial with the extended
-    // plan. This works because TrialRunTracker, attached to HashAgg stage, tracks as "results" the
-    // results of its child stage. Thus, we can use the number of reads the plan was cached with
-    // during multiplanning even though multiplanning ran trials of pre-extended plans.
-    if (!_cq.pipeline().empty()) {
+    // plan. This works because TrialRunTracker, attached to HashAgg stage in $group queries, tracks
+    // as "results" the results of its child stage. For $lookup queries, the TrialRunTracker will
+    // only track the number of reads from the local side. Thus, we can use the number of reads the
+    // plan was cached with during multiplanning even though multiplanning ran trials of
+    // pre-extended plans.
+    //
+    // TODO SERVER-61507: Remove canUseSbePlanCache check once $group pushdown is integrated with
+    // SBE plan cache.
+    if (!_cq.pipeline().empty() &&
+        !(feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
+          canonical_query_encoder::canUseSbePlanCache(_cq))) {
         _yieldPolicy->clearRegisteredPlans();
         auto secondaryCollectionsInfo =
             fillOutSecondaryCollectionsInformation(_opCtx, _collections, &_cq);
@@ -184,7 +191,7 @@ CandidatePlans CachedSolutionPlanner::replan(bool shouldCache, std::string reaso
         cache->deactivate(plan_cache_key_factory::make<mongo::PlanCacheKey>(_cq, mainColl));
         if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
             auto&& sbePlanCache = sbe::getPlanCache(_opCtx);
-            sbePlanCache.deactivate(plan_cache_key_factory::make<sbe::PlanCacheKey>(_cq, mainColl));
+            sbePlanCache.deactivate(plan_cache_key_factory::make(_cq, _collections));
         }
     }
 
diff --git a/src/mongo/db/query/sbe_multi_planner.cpp b/src/mongo/db/query/sbe_multi_planner.cpp
index b9966e74683..c4ba4f7efad 100644
--- a/src/mongo/db/query/sbe_multi_planner.cpp
+++ b/src/mongo/db/query/sbe_multi_planner.cpp
@@ -130,13 +130,13 @@ CandidatePlans MultiPlanner::finalizeExecutionPlans(
         winner.root->open(false);
     }
 
-    // Writes a cache entry for the winning plan to the plan cache if possible.
-    plan_cache_util::updatePlanCache(_opCtx,
-                                     _collections.getMainCollection(),
-                                     _cachingMode,
-                                     _cq,
-                                     std::move(decision),
-                                     candidates);
+    // If there is a pushed down pipeline that cannot use SBE plan cache, then write a cache entry
+    // before extending the pipeline.
+    // TODO SERVER-61507: Remove this block once $group pushdown is integrated with SBE plan cache.
+    if (!canonical_query_encoder::canUseSbePlanCache(_cq)) {
+        plan_cache_util::updatePlanCache(
+            _opCtx, _collections, _cachingMode, _cq, std::move(decision), candidates);
+    }
 
     // Extend the winning candidate with the agg pipeline and rebuild the execution tree. Because
     // the trial was done with find-only part of the query, we cannot reuse the results. The
@@ -152,10 +152,16 @@ CandidatePlans MultiPlanner::finalizeExecutionPlans(
         // The winner might have been replanned. So, pass through the replanning reason to the new
         // plan.
         data.replanReason = std::move(winner.data.replanReason);
+
+        // We need to clone the plan here for the plan cache to use. The clone will be stored in the
+        // cache prior to preparation, whereas the original copy of the tree will be prepared and
+        // used to execute this query.
+        auto clonedPlan = std::make_pair(rootStage->clone(), stage_builder::PlanStageData(data));
         stage_builder::prepareSlotBasedExecutableTree(
             _opCtx, rootStage.get(), &data, _cq, _collections, _yieldPolicy);
         candidates[winnerIdx] = sbe::plan_ranker::CandidatePlan{
             std::move(solution), std::move(rootStage), std::move(data)};
+        candidates[winnerIdx].clonedPlan.emplace(std::move(clonedPlan));
         candidates[winnerIdx].root->open(false);
 
         if (_cq.getExplain()) {
@@ -173,6 +179,16 @@ CandidatePlans MultiPlanner::finalizeExecutionPlans(
         }
     }
 
+    // If pipeline can use SBE plan cache or there is no pushed down pipeline, then write a cache
+    // entry after extending the pipeline.
+    // TODO SERVER-61507: Remove canUseSbePlanCache check once $group pushdown is
+    // integrated with SBE plan cache.
+    if (canonical_query_encoder::canUseSbePlanCache(_cq)) {
+        // Writes a cache entry for the winning plan to the plan cache if possible.
+        plan_cache_util::updatePlanCache(
+            _opCtx, _collections, _cachingMode, _cq, std::move(decision), candidates);
+    }
+
     return {std::move(candidates), winnerIdx};
 }
 }  // namespace mongo::sbe
diff --git a/src/mongo/db/query/sbe_plan_cache.cpp b/src/mongo/db/query/sbe_plan_cache.cpp
index 0d7a90e9ed5..bbd6db6418a 100644
--- a/src/mongo/db/query/sbe_plan_cache.cpp
+++ b/src/mongo/db/query/sbe_plan_cache.cpp
@@ -160,8 +160,17 @@ void clearPlanCacheEntriesWith(ServiceContext* serviceCtx,
             sbe::getPlanCache(serviceCtx)
                 .removeIf([&collectionUuid, collectionVersion](const PlanCacheKey& key,
                                                                const sbe::PlanCacheEntry& entry) {
-                    return key.getCollectionVersion() == collectionVersion &&
-                        key.getCollectionUuid() == collectionUuid;
+                    if (key.getMainCollectionState().version == collectionVersion &&
+                        key.getMainCollectionState().uuid == collectionUuid) {
+                        return true;
+                    }
+                    for (auto& collectionState : key.getSecondaryCollectionStates()) {
+                        if (collectionState.version == collectionVersion &&
+                            collectionState.uuid == collectionUuid) {
+                            return true;
+                        }
+                    }
+                    return false;
                 });
 
         LOGV2_DEBUG(6006600,
diff --git a/src/mongo/db/query/sbe_plan_cache.h b/src/mongo/db/query/sbe_plan_cache.h
index 6e7853fa817..b33488ade0f 100644
--- a/src/mongo/db/query/sbe_plan_cache.h
+++ b/src/mongo/db/query/sbe_plan_cache.h
@@ -56,35 +56,91 @@ struct PlanCacheKeyShardingEpoch {
     Timestamp ts;
 };
 
+struct PlanCacheKeyCollectionState {
+    bool operator==(const PlanCacheKeyCollectionState& other) const {
+        return other.uuid == uuid && other.version == version &&
+            other.newestVisibleIndexTimestamp == newestVisibleIndexTimestamp &&
+            other.shardVersion == shardVersion;
+    }
+
+    size_t hashCode() const {
+        size_t hash = UUID::Hash{}(uuid);
+        boost::hash_combine(hash, version);
+        if (newestVisibleIndexTimestamp) {
+            boost::hash_combine(hash, newestVisibleIndexTimestamp->asULL());
+        }
+        if (shardVersion) {
+            shardVersion->epoch.hash_combine(hash);
+            boost::hash_combine(hash, shardVersion->ts.asULL());
+        }
+        return hash;
+    }
+
+    UUID uuid;
+
+    // There is a special collection versioning scheme associated with the SBE plan cache. Whenever
+    // an action against a collection is made which should invalidate the plan cache entries for the
+    // collection -- in particular index builds and drops -- the version number is incremented.
+    // Readers specify the version number that they are reading at so that they only pick up cache
+    // entries with the right set of indexes.
+    //
+    // We also clean up all cache entries for a particular (collectionUuid, versionNumber) pair when
+    // all readers seeing this version of the collection have drained.
+    size_t version;
+
+    // The '_collectionVersion' is not currently sufficient in order to ensure that the indexes
+    // visible to the reader are consistent with the indexes present in the cache entry. The reason
+    // is that all readers see the latest copy-on-write version of the 'Collection' object, even
+    // though they are allowed to read at an older timestamp, potentially at a time before an index
+    // build completed.
+    //
+    // To solve this problem, we incorporate the timestamp of the newest index visible to the reader
+    // into the plan cache key. This ensures that the set of indexes visible to the reader match
+    // those present in the plan cache entry, preventing a situation where the plan cache entry
+    // reflects a newer version of the index catalog than the one visible to the reader.
+    //
+    // In the future, this could instead be solved with point-in-time catalog lookups.
+    boost::optional<Timestamp> newestVisibleIndexTimestamp;
+
+    // Ensures that a cached SBE plan cannot be reused if the collection has since become sharded or
+    // changed its shard key. The cached plan may no longer be valid after sharding or shard key
+    // refining since the structure of the plan depends on whether the collection is sharded, and if
+    // sharded depends on the shard key.
+    const boost::optional<PlanCacheKeyShardingEpoch> shardVersion;
+};
+
 /**
  * Represents the "key" used in the PlanCache mapping from query shape -> query plan.
  */
 class PlanCacheKey {
 public:
     PlanCacheKey(PlanCacheKeyInfo&& info,
-                 UUID collectionUuid,
-                 size_t collectionVersion,
-                 boost::optional<Timestamp> newestVisibleIndexTimestamp,
-                 boost::optional<PlanCacheKeyShardingEpoch> shardVersion)
+                 PlanCacheKeyCollectionState mainCollectionState,
+                 std::vector<PlanCacheKeyCollectionState> secondaryCollectionStates)
         : _info{std::move(info)},
-          _collectionUuid{collectionUuid},
-          _collectionVersion{collectionVersion},
-          _newestVisibleIndexTimestamp{newestVisibleIndexTimestamp},
-          _shardVersion{shardVersion} {}
+          _mainCollectionState{std::move(mainCollectionState)},
+          _secondaryCollectionStates{std::move(secondaryCollectionStates)} {
+        // For secondary collections, we don't encode shard version in the key since we don't shard
+        // version these collections. This is OK because we only push down $lookup queries to SBE
+        // when involved collections are unsharded.
+        for (const auto& collState : _secondaryCollectionStates) {
+            tassert(6443202,
+                    "Secondary collections should not encode shard version in plan cache key",
+                    collState.shardVersion == boost::none);
+        }
+    }
 
-    const UUID& getCollectionUuid() const {
-        return _collectionUuid;
+    const PlanCacheKeyCollectionState& getMainCollectionState() const {
+        return _mainCollectionState;
     }
 
-    size_t getCollectionVersion() const {
-        return _collectionVersion;
+    const std::vector<PlanCacheKeyCollectionState>& getSecondaryCollectionStates() const {
+        return _secondaryCollectionStates;
     }
 
     bool operator==(const PlanCacheKey& other) const {
-        return other._collectionVersion == _collectionVersion &&
-            other._collectionUuid == _collectionUuid &&
-            other._newestVisibleIndexTimestamp == _newestVisibleIndexTimestamp &&
-            other._info == _info && other._shardVersion == _shardVersion;
+        return other._info == _info && other._mainCollectionState == _mainCollectionState &&
+            other._secondaryCollectionStates == _secondaryCollectionStates;
     }
 
     bool operator!=(const PlanCacheKey& other) const {
@@ -97,14 +153,9 @@ public:
 
     uint32_t planCacheKeyHash() const {
         size_t hash = _info.planCacheKeyHash();
-        boost::hash_combine(hash, UUID::Hash{}(_collectionUuid));
-        boost::hash_combine(hash, _collectionVersion);
-        if (_newestVisibleIndexTimestamp) {
-            boost::hash_combine(hash, _newestVisibleIndexTimestamp->asULL());
-        }
-        if (_shardVersion) {
-            _shardVersion->epoch.hash_combine(hash);
-            boost::hash_combine(hash, _shardVersion->ts.asULL());
+        boost::hash_combine(hash, _mainCollectionState.hashCode());
+        for (auto& collectionState : _secondaryCollectionStates) {
+            boost::hash_combine(hash, collectionState.hashCode());
         }
         return hash;
     }
@@ -117,37 +168,12 @@ private:
     // Contains the actual encoding of the query shape as well as the index discriminators.
     const PlanCacheKeyInfo _info;
 
-    const UUID _collectionUuid;
-
-    // There is a special collection versioning scheme associated with the SBE plan cache. Whenever
-    // an action against a collection is made which should invalidate the plan cache entries for the
-    // collection -- in particular index builds and drops -- the version number is incremented.
-    // Readers specify the version number that they are reading at so that they only pick up cache
-    // entries with the right set of indexes.
-    //
-    // We also clean up all cache entries for a particular (collectionUuid, versionNumber) pair when
-    // all readers seeing this version of the collection have drained.
-    const size_t _collectionVersion;
-
-    // The '_collectionVersion' is not currently sufficient in order to ensure that the indexes
-    // visible to the reader are consistent with the indexes present in the cache entry. The reason
-    // is that all readers see the latest copy-on-write version of the 'Collection' object, even
-    // though they are allowed to read at an older timestamp, potentially at a time before an index
-    // build completed.
-    //
-    // To solve this problem, we incorporate the timestamp of the newest index visible to the reader
-    // into the plan cache key. This ensures that the set of indexes visible to the reader match
-    // those present in the plan cache entry, preventing a situation where the plan cache entry
-    // reflects a newer version of the index catalog than the one visible to the reader.
-    //
-    // In the future, this could instead be solved with point-in-time catalog lookups.
-    const boost::optional<Timestamp> _newestVisibleIndexTimestamp;
+    const PlanCacheKeyCollectionState _mainCollectionState;
 
-    // Ensures that a cached SBE plan cannot be reused if the collection has since become sharded or
-    // changed its shard key. The cached plan may no longer be valid after sharding or shard key
-    // refining since the structure of the plan depends on whether the collection is sharded, and if
-    // sharded depends on the shard key.
-    const boost::optional<PlanCacheKeyShardingEpoch> _shardVersion;
+    // To make sure the plan cache key matches, the secondary collection states need to be passed
+    // in a defined order. Currently, we use the collection order stored in
+    // MultipleCollectionAccessor, which is ordered by the collection namespaces.
+    const std::vector<PlanCacheKeyCollectionState> _secondaryCollectionStates;
 };
 
 class PlanCacheKeyHasher {
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index 99384dc11fb..63ccb11ca66 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -85,61 +85,6 @@
 namespace mongo::stage_builder {
 namespace {
 /**
- * Tree representing index key pattern or a subset of it.
- *
- * For example, the key pattern {a.b: 1, x: 1, a.c: 1} would look like:
- *
- *         <root>
- *         /   |
- *        a    x
- *       / \
- *      b   c
- *
- * This tree is used for building SBE subtrees to re-hydrate index keys and for covered projections.
- */
-struct IndexKeyPatternTreeNode {
-    IndexKeyPatternTreeNode* emplace(StringData fieldComponent) {
-        auto newNode = std::make_unique<IndexKeyPatternTreeNode>();
-        const auto newNodeRaw = newNode.get();
-        children.emplace(fieldComponent, std::move(newNode));
-        childrenOrder.push_back(fieldComponent.toString());
-
-        return newNodeRaw;
-    }
-
-    /**
-     * Returns leaf node matching field path. If the field path provided resolves to a non-leaf
-     * node, null will be returned.
-     *
-     * For example, if tree was built for key pattern {a: 1, a.b: 1}, this method will return
-     * nullptr for field path "a". On the other hand, this method will return corresponding node for
-     * field path "a.b".
-     */
-    IndexKeyPatternTreeNode* findLeafNode(const FieldRef& fieldRef, size_t currentIndex = 0) {
-        if (currentIndex == fieldRef.numParts()) {
-            if (children.empty()) {
-                return this;
-            }
-            return nullptr;
-        }
-
-        auto currentPart = fieldRef.getPart(currentIndex);
-        if (auto it = children.find(currentPart); it != children.end()) {
-            return it->second->findLeafNode(fieldRef, currentIndex + 1);
-        } else {
-            return nullptr;
-        }
-    }
-
-    StringMap<std::unique_ptr<IndexKeyPatternTreeNode>> children;
-    std::vector<std::string> childrenOrder;
-
-    // Which slot the index key for this component is stored in. May be boost::none for non-leaf
-    // nodes.
-    boost::optional<sbe::value::SlotId> indexKeySlot;
-};
-
-/**
  * For covered projections, each of the projection field paths represent respective index key. To
  * rehydrate index keys into the result object, we first need to convert projection AST into
  * 'IndexKeyPatternTreeNode' structure. Context structure and visitors below are used for this
@@ -246,94 +191,6 @@ public:
 };
 
 /**
- * Given a key pattern and an array of slots of equal size, builds an IndexKeyPatternTreeNode
- * representing the mapping between key pattern component and slot.
- *
- * Note that this will "short circuit" in cases where the index key pattern contains two components
- * where one is a subpath of the other. For example with the key pattern {a:1, a.b: 1}, the "a.b"
- * component will not be represented in the output tree. For the purpose of rehydrating index keys,
- * this is fine (and actually preferable).
- */
-std::unique_ptr<IndexKeyPatternTreeNode> buildKeyPatternTree(const BSONObj& keyPattern,
-                                                             const sbe::value::SlotVector& slots) {
-    size_t i = 0;
-
-    auto root = std::make_unique<IndexKeyPatternTreeNode>();
-    for (auto&& elem : keyPattern) {
-        auto* node = root.get();
-        bool skipElem = false;
-
-        FieldRef fr(elem.fieldNameStringData());
-        for (FieldIndex j = 0; j < fr.numParts(); ++j) {
-            const auto part = fr.getPart(j);
-            if (auto it = node->children.find(part); it != node->children.end()) {
-                node = it->second.get();
-                if (node->indexKeySlot) {
-                    // We're processing the a sub-path of a path that's already indexed.  We can
-                    // bail out here since we won't use the sub-path when reconstructing the
-                    // object.
-                    skipElem = true;
-                    break;
-                }
-            } else {
-                node = node->emplace(part);
-            }
-        }
-
-        if (!skipElem) {
-            node->indexKeySlot = slots[i];
-        }
-
-        ++i;
-    }
-
-    return root;
-}
-
-/**
- * Given a root IndexKeyPatternTreeNode, this function will construct an SBE expression for
- * producing a partial object from an index key.
- *
- * For example, given the index key pattern {a.b: 1, x: 1, a.c: 1} and the index key
- * {"": 1, "": 2, "": 3}, the SBE expression would produce the object {a: {b:1, c: 3}, x: 2}.
- */
-std::unique_ptr<sbe::EExpression> buildNewObjExpr(const IndexKeyPatternTreeNode* kpTree) {
-
-    sbe::EExpression::Vector args;
-    for (auto&& fieldName : kpTree->childrenOrder) {
-        auto it = kpTree->children.find(fieldName);
-
-        args.emplace_back(makeConstant(fieldName));
-        if (it->second->indexKeySlot) {
-            args.emplace_back(makeVariable(*it->second->indexKeySlot));
-        } else {
-            // The reason this is in an else branch is that in the case where we have an index key
-            // like {a.b: ..., a: ...}, we've already made the logic for reconstructing the 'a'
-            // portion, so the 'a.b' subtree can be skipped.
-            args.push_back(buildNewObjExpr(it->second.get()));
-        }
-    }
-
-    return sbe::makeE<sbe::EFunction>("newObj", std::move(args));
-}
-
-/**
- * Given a stage, and index key pattern a corresponding array of slot IDs, this function
- * add a ProjectStage to the tree which rehydrates the index key and stores the result in
- * 'resultSlot.'
- */
-std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage> stage,
-                                                  const BSONObj& indexKeyPattern,
-                                                  PlanNodeId nodeId,
-                                                  const sbe::value::SlotVector& indexKeySlots,
-                                                  sbe::value::SlotId resultSlot) {
-    auto kpTree = buildKeyPatternTree(indexKeyPattern, indexKeySlots);
-    auto keyExpr = buildNewObjExpr(kpTree.get());
-
-    return sbe::makeProjectStage(std::move(stage), nodeId, resultSlot, std::move(keyExpr));
-}
-
-/**
  * Generates an EOF plan. Note that even though this plan will return nothing, it will still define
  * the slots specified by 'reqs'.
  */
@@ -1635,19 +1492,16 @@ SlotBasedStageBuilder::buildProjectionSimple(const QuerySolutionNode* root,
     const auto childResult = outputs.get(kResult);
 
     outputs.set(kResult, _slotIdGenerator.generate());
-    inputStage = sbe::makeS<sbe::MakeBsonObjStage>(
-        std::move(inputStage),
-        outputs.get(kResult),
-        childResult,
-        sbe::MakeBsonObjStage::FieldBehavior::keep,
-        // TODO SERVER-67039 take a set instead of a vector here.
-        std::vector<std::string>{pn->proj.getRequiredFields().begin(),
-                                 pn->proj.getRequiredFields().end()},
-        std::vector<std::string>{},
-        sbe::value::SlotVector{},
-        true,
-        false,
-        root->nodeId());
+    inputStage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(inputStage),
+                                                   outputs.get(kResult),
+                                                   childResult,
+                                                   sbe::MakeBsonObjStage::FieldBehavior::keep,
+                                                   pn->proj.getRequiredFields(),
+                                                   std::set<std::string>{},
+                                                   sbe::value::SlotVector{},
+                                                   true,
+                                                   false,
+                                                   root->nodeId());
 
     return {std::move(inputStage), std::move(outputs)};
 }
@@ -2948,7 +2802,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
         boost::none,
         boost::none,
         std::vector<std::string>{},
-        projectFields,
+        std::move(projectFields),
         fieldSlots,
         true,
         false,
diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
index f3011ec2bac..35c752f7dcb 100644
--- a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
@@ -328,7 +328,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo
     // replica set initialization message. If this fails, then we throw
     // ErrorCodes::OplogQueryMinTsMissing. We avoid doing this check on the resumable branch of a
     // tailable scan; it only needs to be done once, when the initial branch is run.
-    if (csn->assertTsHasNotFallenOffOplog && !isTailableResumeBranch) {
+    if (csn->assertTsHasNotFallenOff && !isTailableResumeBranch) {
         invariant(csn->shouldTrackLatestOplogTimestamp);
 
         // There should always be a 'tsSlot' already allocated on the RuntimeEnvironment for the
@@ -388,7 +388,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo
                     makeBinaryOp(sbe::EPrimBinary::lessEq,
                                  makeVariable(minTsSlot),
                                  makeConstant(sbe::value::TypeTags::Timestamp,
-                                              csn->assertTsHasNotFallenOffOplog->asULL())),
+                                              csn->assertTsHasNotFallenOff->asULL())),
                     makeBinaryOp(
                         sbe::EPrimBinary::logicAnd,
                         makeBinaryOp(sbe::EPrimBinary::eq,
diff --git a/src/mongo/db/query/sbe_stage_builder_expression.cpp b/src/mongo/db/query/sbe_stage_builder_expression.cpp
index 89541a241bc..cbeac015678 100644
--- a/src/mongo/db/query/sbe_stage_builder_expression.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_expression.cpp
@@ -377,6 +377,7 @@ public:
     void visit(const ExpressionLn* expr) final {}
     void visit(const ExpressionLog* expr) final {}
     void visit(const ExpressionLog10* expr) final {}
+    void visit(const ExpressionInternalFLEEqual* expr) final {}
     void visit(const ExpressionMap* expr) final {}
     void visit(const ExpressionMeta* expr) final {}
     void visit(const ExpressionMod* expr) final {}
@@ -609,6 +610,7 @@ public:
     void visit(const ExpressionLn* expr) final {}
     void visit(const ExpressionLog* expr) final {}
     void visit(const ExpressionLog10* expr) final {}
+    void visit(const ExpressionInternalFLEEqual* expr) final {}
     void visit(const ExpressionMap* expr) final {}
     void visit(const ExpressionMeta* expr) final {}
     void visit(const ExpressionMod* expr) final {}
@@ -2317,6 +2319,9 @@ public:
         _context->pushExpr(
             sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(log10Expr)));
     }
+    void visit(const ExpressionInternalFLEEqual* expr) final {
+        unsupportedExpression("$_internalFleEq");
+    }
     void visit(const ExpressionMap* expr) final {
         unsupportedExpression("$map");
     }
diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp
index 80dedd6d89c..5005ddef8a5 100644
--- a/src/mongo/db/query/sbe_stage_builder_filter.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp
@@ -2060,6 +2060,23 @@ std::pair<boost::optional<sbe::value::SlotId>, EvalStage> generateFilter(
         return {boost::none, std::move(stage)};
     }
 
+    // We only use the classic matcher path (aka "franken matcher") when the plan cache is off,
+    // because embedding the classic matcher into the query execution tree is not compatible with
+    // auto parameterization. All of the constants used in the filter are in the MatchExpression
+    // itself, rather than in slots.
+    if (!feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
+        tassert(6681403, "trackIndex=true not supported for classic matcher in SBE", !trackIndex);
+
+        auto expr = makeFunction("applyClassicMatcher",
+                                 makeConstant(sbe::value::TypeTags::classicMatchExpresion,
+                                              sbe::value::bitcastFrom<const MatchExpression*>(
+                                                  root->shallowClone().release())),
+                                 makeVariable(inputSlot));
+
+        auto filterStage = makeFilter<false>(std::move(stage), std::move(expr), planNodeId);
+        return {boost::none, std::move(filterStage)};
+    }
+
     auto stateHelper = makeFilterStateHelper(trackIndex);
     MatchExpressionVisitorContext context{
         state, std::move(stage), inputSlot, root, planNodeId, *stateHelper};
@@ -2068,7 +2085,6 @@ std::pair<boost::optional<sbe::value::SlotId>, EvalStage> generateFilter(
     MatchExpressionPostVisitor postVisitor{&context};
     MatchExpressionWalker walker{&preVisitor, &inVisitor, &postVisitor};
     tree_walker::walk<true, MatchExpression>(root, &walker);
-
     auto [resultSlot, resultStage] = context.done();
     return {resultSlot, std::move(resultStage)};
 }
@@ -2085,8 +2101,29 @@ EvalStage generateIndexFilter(StageBuilderState& state,
         return stage;
     }
 
-    // Index filters never need to track the index of a matching element in the array as they cannot
-    // be used with a positional projection.
+    // We only use the classic matcher path (aka "franken matcher") when the plan cache is off,
+    // because embedding the classic matcher into the query execution tree is not compatible with
+    // auto parameterization. All of the constants used in the filter are in the MatchExpression
+    // itself, rather than in slots.
+    if (!feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
+        BSONObjBuilder keyPatternBuilder;
+        for (auto& field : keyFields) {
+            keyPatternBuilder.append(field, 1);
+        }
+        auto keyPatternTree = buildKeyPatternTree(keyPatternBuilder.obj(), keySlots);
+        auto mkObjExpr = buildNewObjExpr(keyPatternTree.get());
+
+        auto expr = makeFunction("applyClassicMatcher",
+                                 makeConstant(sbe::value::TypeTags::classicMatchExpresion,
+                                              sbe::value::bitcastFrom<const MatchExpression*>(
+                                                  root->shallowClone().release())),
+                                 std::move(mkObjExpr));
+
+        return makeFilter<false>(std::move(stage), std::move(expr), planNodeId);
+    }
+
+    // Covered filters never need to track the index of a matching element in the array as they
+    // cannot be used with a positional projection.
     const bool trackIndex = false;
     auto stateHelper = makeFilterStateHelper(trackIndex);
     MatchExpressionVisitorContext context{state,
diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.cpp b/src/mongo/db/query/sbe_stage_builder_helpers.cpp
index e881a9ab4eb..cb61aa76a3d 100644
--- a/src/mongo/db/query/sbe_stage_builder_helpers.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_helpers.cpp
@@ -1015,4 +1015,93 @@ sbe::value::SlotId StageBuilderState::registerInputParamSlot(
     return slotId;
 }
 
+
+/**
+ * Given a key pattern and an array of slots of equal size, builds an IndexKeyPatternTreeNode
+ * representing the mapping between key pattern component and slot.
+ *
+ * Note that this will "short circuit" in cases where the index key pattern contains two components
+ * where one is a subpath of the other. For example with the key pattern {a:1, a.b: 1}, the "a.b"
+ * component will not be represented in the output tree. For the purpose of rehydrating index keys,
+ * this is fine (and actually preferable).
+ */
+std::unique_ptr<IndexKeyPatternTreeNode> buildKeyPatternTree(const BSONObj& keyPattern,
+                                                             const sbe::value::SlotVector& slots) {
+    size_t i = 0;
+
+    auto root = std::make_unique<IndexKeyPatternTreeNode>();
+    for (auto&& elem : keyPattern) {
+        auto* node = root.get();
+        bool skipElem = false;
+
+        FieldRef fr(elem.fieldNameStringData());
+        for (FieldIndex j = 0; j < fr.numParts(); ++j) {
+            const auto part = fr.getPart(j);
+            if (auto it = node->children.find(part); it != node->children.end()) {
+                node = it->second.get();
+                if (node->indexKeySlot) {
+                    // We're processing the a sub-path of a path that's already indexed.  We can
+                    // bail out here since we won't use the sub-path when reconstructing the
+                    // object.
+                    skipElem = true;
+                    break;
+                }
+            } else {
+                node = node->emplace(part);
+            }
+        }
+
+        if (!skipElem) {
+            node->indexKeySlot = slots[i];
+        }
+
+        ++i;
+    }
+
+    return root;
+}
+
+/**
+ * Given a root IndexKeyPatternTreeNode, this function will construct an SBE expression for
+ * producing a partial object from an index key.
+ *
+ * For example, given the index key pattern {a.b: 1, x: 1, a.c: 1} and the index key
+ * {"": 1, "": 2, "": 3}, the SBE expression would produce the object {a: {b:1, c: 3}, x: 2}.
+ */
+std::unique_ptr<sbe::EExpression> buildNewObjExpr(const IndexKeyPatternTreeNode* kpTree) {
+
+    sbe::EExpression::Vector args;
+    for (auto&& fieldName : kpTree->childrenOrder) {
+        auto it = kpTree->children.find(fieldName);
+
+        args.emplace_back(makeConstant(fieldName));
+        if (it->second->indexKeySlot) {
+            args.emplace_back(makeVariable(*it->second->indexKeySlot));
+        } else {
+            // The reason this is in an else branch is that in the case where we have an index key
+            // like {a.b: ..., a: ...}, we've already made the logic for reconstructing the 'a'
+            // portion, so the 'a.b' subtree can be skipped.
+            args.push_back(buildNewObjExpr(it->second.get()));
+        }
+    }
+
+    return sbe::makeE<sbe::EFunction>("newObj", std::move(args));
+}
+
+/**
+ * Given a stage, and index key pattern a corresponding array of slot IDs, this function
+ * add a ProjectStage to the tree which rehydrates the index key and stores the result in
+ * 'resultSlot.'
+ */
+std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage> stage,
+                                                  const BSONObj& indexKeyPattern,
+                                                  PlanNodeId nodeId,
+                                                  const sbe::value::SlotVector& indexKeySlots,
+                                                  sbe::value::SlotId resultSlot) {
+    auto kpTree = buildKeyPatternTree(indexKeyPattern, indexKeySlots);
+    auto keyExpr = buildNewObjExpr(kpTree.get());
+
+    return sbe::makeProjectStage(std::move(stage), nodeId, resultSlot, std::move(keyExpr));
+}
+
 }  // namespace mongo::stage_builder
diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.h b/src/mongo/db/query/sbe_stage_builder_helpers.h
index ce718023632..05cf73896e0 100644
--- a/src/mongo/db/query/sbe_stage_builder_helpers.h
+++ b/src/mongo/db/query/sbe_stage_builder_helpers.h
@@ -39,6 +39,7 @@
 #include "mongo/db/exec/sbe/stages/makeobj.h"
 #include "mongo/db/exec/sbe/stages/project.h"
 #include "mongo/db/pipeline/expression.h"
+#include "mongo/db/query/projection_ast.h"
 #include "mongo/db/query/sbe_stage_builder_eval_frame.h"
 #include "mongo/db/query/stage_types.h"
 
@@ -948,4 +949,69 @@ struct StageBuilderState {
     stdx::unordered_map<std::string /*field path*/, EvalExpr> preGeneratedExprs;
 };
 
+/**
+ * Tree representing index key pattern or a subset of it.
+ *
+ * For example, the key pattern {a.b: 1, x: 1, a.c: 1} would look like:
+ *
+ *         <root>
+ *         /   |
+ *        a    x
+ *       / \
+ *      b   c
+ *
+ * This tree is used for building SBE subtrees to re-hydrate index keys and for covered projections.
+ */
+struct IndexKeyPatternTreeNode {
+    IndexKeyPatternTreeNode* emplace(StringData fieldComponent) {
+        auto newNode = std::make_unique<IndexKeyPatternTreeNode>();
+        const auto newNodeRaw = newNode.get();
+        children.emplace(fieldComponent, std::move(newNode));
+        childrenOrder.push_back(fieldComponent.toString());
+
+        return newNodeRaw;
+    }
+
+    /**
+     * Returns leaf node matching field path. If the field path provided resolves to a non-leaf
+     * node, null will be returned.
+     *
+     * For example, if tree was built for key pattern {a: 1, a.b: 1}, this method will return
+     * nullptr for field path "a". On the other hand, this method will return corresponding node for
+     * field path "a.b".
+     */
+    IndexKeyPatternTreeNode* findLeafNode(const FieldRef& fieldRef, size_t currentIndex = 0) {
+        if (currentIndex == fieldRef.numParts()) {
+            if (children.empty()) {
+                return this;
+            }
+            return nullptr;
+        }
+
+        auto currentPart = fieldRef.getPart(currentIndex);
+        if (auto it = children.find(currentPart); it != children.end()) {
+            return it->second->findLeafNode(fieldRef, currentIndex + 1);
+        } else {
+            return nullptr;
+        }
+    }
+
+    StringMap<std::unique_ptr<IndexKeyPatternTreeNode>> children;
+    std::vector<std::string> childrenOrder;
+
+    // Which slot the index key for this component is stored in. May be boost::none for non-leaf
+    // nodes.
+    boost::optional<sbe::value::SlotId> indexKeySlot;
+};
+
+std::unique_ptr<IndexKeyPatternTreeNode> buildKeyPatternTree(const BSONObj& keyPattern,
+                                                             const sbe::value::SlotVector& slots);
+std::unique_ptr<sbe::EExpression> buildNewObjExpr(const IndexKeyPatternTreeNode* kpTree);
+
+std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage> stage,
+                                                  const BSONObj& indexKeyPattern,
+                                                  PlanNodeId nodeId,
+                                                  const sbe::value::SlotVector& indexKeySlots,
+                                                  sbe::value::SlotId resultSlot);
+
 }  // namespace mongo::stage_builder
diff --git a/src/mongo/db/query/sbe_stage_builder_projection.cpp b/src/mongo/db/query/sbe_stage_builder_projection.cpp
index e4e222bdff8..bff2daf2f78 100644
--- a/src/mongo/db/query/sbe_stage_builder_projection.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_projection.cpp
@@ -413,7 +413,7 @@ public:
                                              childLevelResultSlot,
                                              childLevelInputSlot,
                                              sbe::MakeBsonObjStage::FieldBehavior::keep,
-                                             keepFields,
+                                             std::move(keepFields),
                                              std::move(projectFields),
                                              std::move(projectSlots),
                                              true,
diff --git a/src/mongo/db/query/sbe_sub_planner.cpp b/src/mongo/db/query/sbe_sub_planner.cpp
index e5e714ad3aa..c6ce37cb434 100644
--- a/src/mongo/db/query/sbe_sub_planner.cpp
+++ b/src/mongo/db/query/sbe_sub_planner.cpp
@@ -116,8 +116,9 @@ CandidatePlans SubPlanner::plan(
 
     // TODO SERVER-61507: do it unconditionally when $group pushdown is integrated with the SBE plan
     // cache.
-    if (_cq.pipeline().empty()) {
-        plan_cache_util::updatePlanCache(_opCtx, mainColl, _cq, *compositeSolution, *root, data);
+    if (canonical_query_encoder::canUseSbePlanCache(_cq)) {
+        plan_cache_util::updatePlanCache(
+            _opCtx, _collections, _cq, *compositeSolution, *root, data);
     }
 
     return {makeVector(plan_ranker::CandidatePlan{
diff --git a/src/mongo/db/query/sbe_utils.cpp b/src/mongo/db/query/sbe_utils.cpp
index 043027f1e89..4284646b510 100644
--- a/src/mongo/db/query/sbe_utils.cpp
+++ b/src/mongo/db/query/sbe_utils.cpp
@@ -238,6 +238,7 @@ bool isQuerySbeCompatible(const CollectionPtr* collection,
     const bool allExpressionsSupported = expCtx && expCtx->sbeCompatible;
     const bool isNotCount = !(plannerOptions & QueryPlannerParams::IS_COUNT);
     const bool isNotOplog = !cq->nss().isOplog();
+    const bool isNotChangeCollection = !cq->nss().isChangeCollection();
     const bool doesNotContainMetadataRequirements = cq->metadataDeps().none();
     const bool doesNotSortOnMetaOrPathWithNumericComponents =
         !sortPattern || std::all_of(sortPattern->begin(), sortPattern->end(), [](auto&& part) {
@@ -253,9 +254,15 @@ bool isQuerySbeCompatible(const CollectionPtr* collection,
     const bool isQueryNotAgainstClusteredCollection =
         !(collection->get() && collection->get()->isClustered());
 
+    const bool doesNotRequireMatchDetails =
+        !cq->getProj() || !cq->getProj()->requiresMatchDetails();
+
+    const bool doesNotHaveElemMatchProject = !cq->getProj() || !cq->getProj()->containsElemMatch();
+
     return allExpressionsSupported && isNotCount && doesNotContainMetadataRequirements &&
         isQueryNotAgainstTimeseriesCollection && isQueryNotAgainstClusteredCollection &&
-        doesNotSortOnMetaOrPathWithNumericComponents && isNotOplog;
+        doesNotSortOnMetaOrPathWithNumericComponents && isNotOplog && doesNotRequireMatchDetails &&
+        doesNotHaveElemMatchProject && isNotChangeCollection;
 }
 
 bool validateInputParamsBindings(
diff --git a/src/mongo/db/record_id.h b/src/mongo/db/record_id.h
index 544518ad544..2302630b37b 100644
--- a/src/mongo/db/record_id.h
+++ b/src/mongo/db/record_id.h
@@ -311,11 +311,6 @@ public:
             int size;
             auto str = elem.binData(size);
             return RecordId(str, size);
-        } else if (elem.type() == BSONType::String) {
-            // Support old format for upgrades during resumable index builds.
-            // TODO SERVER-62369: Remove when we branch out 6.0.
-            auto str = hexblob::decode(elem.String());
-            return RecordId(str.c_str(), str.size());
         } else {
             uasserted(ErrorCodes::BadValue,
                       fmt::format("Could not deserialize RecordId with type {}", elem.type()));
diff --git a/src/mongo/db/record_id_helpers.cpp b/src/mongo/db/record_id_helpers.cpp
index e9147666da8..bf313976a3b 100644
--- a/src/mongo/db/record_id_helpers.cpp
+++ b/src/mongo/db/record_id_helpers.cpp
@@ -48,23 +48,35 @@
 namespace mongo {
 namespace record_id_helpers {
 
-StatusWith<RecordId> keyForOptime(const Timestamp& opTime) {
-    // Make sure secs and inc wouldn't be negative if treated as signed. This ensures that they
-    // don't sort differently when put in a RecordId. It also avoids issues with Null/Invalid
-    // RecordIds
-    if (opTime.getSecs() > uint32_t(std::numeric_limits<int32_t>::max()))
-        return {ErrorCodes::BadValue, "ts secs too high"};
-
-    if (opTime.getInc() > uint32_t(std::numeric_limits<int32_t>::max()))
-        return {ErrorCodes::BadValue, "ts inc too high"};
-
-    const auto out = RecordId(opTime.getSecs(), opTime.getInc());
-    if (out <= RecordId::minLong())
-        return {ErrorCodes::BadValue, "ts too low"};
-    if (out >= RecordId::maxLong())
-        return {ErrorCodes::BadValue, "ts too high"};
-
-    return out;
+StatusWith<RecordId> keyForOptime(const Timestamp& opTime, const KeyFormat keyFormat) {
+    switch (keyFormat) {
+        case KeyFormat::Long: {
+            // Make sure secs and inc wouldn't be negative if treated as signed. This ensures that
+            // they don't sort differently when put in a RecordId. It also avoids issues with
+            // Null/Invalid RecordIds
+            if (opTime.getSecs() > uint32_t(std::numeric_limits<int32_t>::max()))
+                return {ErrorCodes::BadValue, "ts secs too high"};
+
+            if (opTime.getInc() > uint32_t(std::numeric_limits<int32_t>::max()))
+                return {ErrorCodes::BadValue, "ts inc too high"};
+
+            const auto out = RecordId(opTime.getSecs(), opTime.getInc());
+            if (out <= RecordId::minLong())
+                return {ErrorCodes::BadValue, "ts too low"};
+            if (out >= RecordId::maxLong())
+                return {ErrorCodes::BadValue, "ts too high"};
+
+            return out;
+        }
+        case KeyFormat::String: {
+            KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion);
+            keyBuilder.appendTimestamp(opTime);
+            return RecordId(keyBuilder.getBuffer(), keyBuilder.getSize());
+        }
+        default: { MONGO_UNREACHABLE_TASSERT(6521004); }
+    }
+
+    MONGO_UNREACHABLE_TASSERT(6521005);
 }
 
 
@@ -84,7 +96,7 @@ StatusWith<RecordId> extractKeyOptime(const char* data, int len) {
     if (elem.type() != bsonTimestamp)
         return {ErrorCodes::BadValue, "ts must be a Timestamp"};
 
-    return keyForOptime(elem.timestamp());
+    return keyForOptime(elem.timestamp(), KeyFormat::Long);
 }
 
 StatusWith<RecordId> keyForDoc(const BSONObj& doc,
diff --git a/src/mongo/db/record_id_helpers.h b/src/mongo/db/record_id_helpers.h
index 378466df45a..b957b30cce6 100644
--- a/src/mongo/db/record_id_helpers.h
+++ b/src/mongo/db/record_id_helpers.h
@@ -46,7 +46,7 @@ namespace record_id_helpers {
  * Converts Timestamp to a RecordId in an unspecified manor that is safe to use as the key to
  * in a RecordStore.
  */
-StatusWith<RecordId> keyForOptime(const Timestamp& opTime);
+StatusWith<RecordId> keyForOptime(const Timestamp& opTime, KeyFormat keyFormat);
 
 /**
  * For clustered collections, converts various values into a RecordId.
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index e9bcecfbdbf..962477568b3 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -261,6 +261,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/catalog/catalog_helpers',
         '$BUILD_DIR/mongo/db/catalog/database_holder',
         '$BUILD_DIR/mongo/db/catalog/multi_index_block',
+        '$BUILD_DIR/mongo/db/change_stream_change_collection_manager',
         '$BUILD_DIR/mongo/db/common',
         '$BUILD_DIR/mongo/db/concurrency/exception_util',
         '$BUILD_DIR/mongo/db/dbhelpers',
@@ -529,9 +530,11 @@ env.Library(
         'roll_back_local_operations',
     ],
     LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/db/catalog/database_holder',
         '$BUILD_DIR/mongo/db/catalog/import_collection_oplog_entry',
         '$BUILD_DIR/mongo/db/index_builds_coordinator_interface',
         '$BUILD_DIR/mongo/db/multitenancy',
+        '$BUILD_DIR/mongo/db/repl/tenant_migration_access_blocker',
         '$BUILD_DIR/mongo/db/s/sharding_runtime_d',
         '$BUILD_DIR/mongo/db/storage/historical_ident_tracker',
         '$BUILD_DIR/mongo/idl/server_parameter',
@@ -619,6 +622,7 @@ env.Library(
         'storage_interface',
     ],
     LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/db/change_stream_change_collection_manager',
         '$BUILD_DIR/mongo/db/commands/mongod_fsync',
         '$BUILD_DIR/mongo/db/concurrency/exception_util',
         '$BUILD_DIR/mongo/db/storage/storage_control',
@@ -1705,6 +1709,7 @@ if wiredtiger:
             '$BUILD_DIR/mongo/db/logical_time',
             '$BUILD_DIR/mongo/db/multitenancy',
             '$BUILD_DIR/mongo/db/op_observer_impl',
+            '$BUILD_DIR/mongo/db/pipeline/change_stream_expired_pre_image_remover',
             '$BUILD_DIR/mongo/db/query/command_request_response',
             '$BUILD_DIR/mongo/db/s/sharding_runtime_d',
             '$BUILD_DIR/mongo/db/service_context_d_test_fixture',
diff --git a/src/mongo/db/repl/apply_ops.cpp b/src/mongo/db/repl/apply_ops.cpp
index 972c1fb2580..4887982c95c 100644
--- a/src/mongo/db/repl/apply_ops.cpp
+++ b/src/mongo/db/repl/apply_ops.cpp
@@ -28,11 +28,10 @@
  */
 
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/repl/apply_ops.h"
 
 #include "mongo/bson/util/bson_extract.h"
+#include "mongo/client/client_deprecated.h"
 #include "mongo/db/catalog/collection.h"
 #include "mongo/db/catalog/database.h"
 #include "mongo/db/catalog/database_holder.h"
@@ -297,12 +296,11 @@ Status _checkPrecondition(OperationContext* opCtx,
         DBDirectClient db(opCtx);
         // The preconditions come in "q: {{query: {...}, orderby: ..., etc.}}" format. This format
         // is no longer used either internally or over the wire in other contexts. We are using a
-        // legacy API from 'DBDirectClient' in order to parse this format and convert it into the
+        // legacy API from 'client_deprecated' in order to parse this format and convert it into the
         // corresponding find command.
-        auto preconditionQuery = Query::fromBSONDeprecated(preCondition["q"].Obj());
-        auto cursor =
-            db.query_DEPRECATED(nss, preconditionQuery.getFilter(), preconditionQuery, 1 /*limit*/);
-        BSONObj realres = cursor->more() ? cursor->nextSafe() : BSONObj{};
+        FindCommandRequest findCmd{nss};
+        client_deprecated::initFindFromLegacyOptions(preCondition["q"].Obj(), 0, &findCmd);
+        BSONObj realres = db.findOne(std::move(findCmd));
 
         // Get collection default collation.
         auto databaseHolder = DatabaseHolder::get(opCtx);
diff --git a/src/mongo/db/repl/collection_bulk_loader_impl.cpp b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
index 574607ea257..f000e93150c 100644
--- a/src/mongo/db/repl/collection_bulk_loader_impl.cpp
+++ b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
@@ -95,7 +95,8 @@ Status CollectionBulkLoaderImpl::init(const std::vector<BSONObj>& secondaryIndex
                 UnreplicatedWritesBlock uwb(_opCtx.get());
                 // This enforces the buildIndexes setting in the replica set configuration.
                 CollectionWriter collWriter(_opCtx.get(), *_collection);
-                auto indexCatalog = collWriter.getWritableCollection()->getIndexCatalog();
+                auto indexCatalog =
+                    collWriter.getWritableCollection(_opCtx.get())->getIndexCatalog();
                 auto specs = indexCatalog->removeExistingIndexesNoChecks(
                     _opCtx.get(), collWriter.get(), secondaryIndexSpecs);
                 if (specs.size()) {
diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp
index bde00eef906..e380fbe6238 100644
--- a/src/mongo/db/repl/collection_cloner.cpp
+++ b/src/mongo/db/repl/collection_cloner.cpp
@@ -317,38 +317,43 @@ BaseCloner::AfterStageBehavior CollectionCloner::setupIndexBuildersForUnfinished
 }
 
 void CollectionCloner::runQuery() {
-    // Non-resumable query.
-    Query query;
+    FindCommandRequest findCmd{_sourceDbAndUuid};
 
     if (_resumeToken) {
         // Resume the query from where we left off.
         LOGV2_DEBUG(21133, 1, "Collection cloner will resume the last successful query");
-        query.requestResumeToken(true).resumeAfter(_resumeToken.get());
+        findCmd.setRequestResumeToken(true);
+        findCmd.setResumeAfter(_resumeToken.get());
     } else {
         // New attempt at a resumable query.
         LOGV2_DEBUG(21134, 1, "Collection cloner will run a new query");
-        query.requestResumeToken(true);
+        findCmd.setRequestResumeToken(true);
     }
-    query.hint(BSON("$natural" << 1));
+
+    findCmd.setHint(BSON("$natural" << 1));
+    findCmd.setNoCursorTimeout(true);
+    findCmd.setReadConcern(ReadConcernArgs::kLocal);
+    if (_collectionClonerBatchSize) {
+        findCmd.setBatchSize(_collectionClonerBatchSize);
+    }
+
+    ExhaustMode exhaustMode = collectionClonerUsesExhaust ? ExhaustMode::kOn : ExhaustMode::kOff;
 
     // We reset this every time we retry or resume a query.
     // We distinguish the first batch from the rest so that we only store the remote cursor id
     // the first time we get it.
     _firstBatchOfQueryRound = true;
 
-    getClient()->query_DEPRECATED(
-        [this](DBClientCursorBatchIterator& iter) { handleNextBatch(iter); },
-        _sourceDbAndUuid,
-        BSONObj{},
-        query,
-        nullptr /* fieldsToReturn */,
-        QueryOption_NoCursorTimeout | QueryOption_SecondaryOk |
-            (collectionClonerUsesExhaust ? QueryOption_Exhaust : 0),
-        _collectionClonerBatchSize,
-        ReadConcernArgs::kLocal);
+    auto cursor = getClient()->find(
+        std::move(findCmd), ReadPreferenceSetting{ReadPreference::SecondaryPreferred}, exhaustMode);
+
+    // Process the results of the cursor one batch at a time.
+    while (cursor->more()) {
+        handleNextBatch(*cursor);
+    }
 }
 
-void CollectionCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
+void CollectionCloner::handleNextBatch(DBClientCursor& cursor) {
     {
         stdx::lock_guard<InitialSyncSharedData> lk(*getSharedData());
         if (!getSharedData()->getStatus(lk).isOK()) {
@@ -370,15 +375,15 @@ void CollectionCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
 
     if (_firstBatchOfQueryRound) {
         // Store the cursorId of the remote cursor.
-        _remoteCursorId = iter.getCursorId();
+        _remoteCursorId = cursor.getCursorId();
     }
     _firstBatchOfQueryRound = false;
 
     {
         stdx::lock_guard<Latch> lk(_mutex);
         _stats.receivedBatches++;
-        while (iter.moreInCurrentBatch()) {
-            _documentsToInsert.emplace_back(iter.nextSafe());
+        while (cursor.moreInCurrentBatch()) {
+            _documentsToInsert.emplace_back(cursor.nextSafe());
         }
     }
 
@@ -394,7 +399,7 @@ void CollectionCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
     }
 
     // Store the resume token for this batch.
-    _resumeToken = iter.getPostBatchResumeToken();
+    _resumeToken = cursor.getPostBatchResumeToken();
 
     initialSyncHangCollectionClonerAfterHandlingBatchResponse.executeIf(
         [&](const BSONObj&) {
diff --git a/src/mongo/db/repl/collection_cloner.h b/src/mongo/db/repl/collection_cloner.h
index 80d8a9d72bc..085c6abdb3f 100644
--- a/src/mongo/db/repl/collection_cloner.h
+++ b/src/mongo/db/repl/collection_cloner.h
@@ -207,10 +207,10 @@ private:
     AfterStageBehavior setupIndexBuildersForUnfinishedIndexesStage();
 
     /**
-     * Put all results from a query batch into a buffer to be inserted, and schedule
-     * it to be inserted.
+     * Put all results from a query batch into a buffer to be inserted, and schedule it to be
+     * inserted.
      */
-    void handleNextBatch(DBClientCursorBatchIterator& iter);
+    void handleNextBatch(DBClientCursor& cursor);
 
     /**
      * Called whenever there is a new batch of documents ready from the DBClientConnection.
diff --git a/src/mongo/db/repl/data_replicator_external_state.h b/src/mongo/db/repl/data_replicator_external_state.h
index 87826b0f199..219b5a7ec31 100644
--- a/src/mongo/db/repl/data_replicator_external_state.h
+++ b/src/mongo/db/repl/data_replicator_external_state.h
@@ -90,7 +90,7 @@ public:
      * Forwards the parsed metadata in the query results to the replication system.
      */
     virtual void processMetadata(const rpc::ReplSetMetadata& replMetadata,
-                                 rpc::OplogQueryMetadata oqMetadata) = 0;
+                                 const rpc::OplogQueryMetadata& oqMetadata) = 0;
 
     /**
      * Evaluates quality of sync source. Accepts the current sync source; the last optime on this
diff --git a/src/mongo/db/repl/data_replicator_external_state_impl.cpp b/src/mongo/db/repl/data_replicator_external_state_impl.cpp
index 8c43a013e9a..330cdf51305 100644
--- a/src/mongo/db/repl/data_replicator_external_state_impl.cpp
+++ b/src/mongo/db/repl/data_replicator_external_state_impl.cpp
@@ -84,7 +84,7 @@ OpTimeWithTerm DataReplicatorExternalStateImpl::getCurrentTermAndLastCommittedOp
 }
 
 void DataReplicatorExternalStateImpl::processMetadata(const rpc::ReplSetMetadata& replMetadata,
-                                                      rpc::OplogQueryMetadata oqMetadata) {
+                                                      const rpc::OplogQueryMetadata& oqMetadata) {
     OpTimeAndWallTime newCommitPoint = oqMetadata.getLastOpCommitted();
 
     const bool fromSyncSource = true;
diff --git a/src/mongo/db/repl/data_replicator_external_state_impl.h b/src/mongo/db/repl/data_replicator_external_state_impl.h
index c408c484dc9..284cea32b41 100644
--- a/src/mongo/db/repl/data_replicator_external_state_impl.h
+++ b/src/mongo/db/repl/data_replicator_external_state_impl.h
@@ -53,7 +53,7 @@ public:
     OpTimeWithTerm getCurrentTermAndLastCommittedOpTime() override;
 
     void processMetadata(const rpc::ReplSetMetadata& replMetadata,
-                         rpc::OplogQueryMetadata oqMetadata) override;
+                         const rpc::OplogQueryMetadata& oqMetadata) override;
 
     ChangeSyncSourceAction shouldStopFetching(const HostAndPort& source,
                                               const rpc::ReplSetMetadata& replMetadata,
diff --git a/src/mongo/db/repl/data_replicator_external_state_mock.cpp b/src/mongo/db/repl/data_replicator_external_state_mock.cpp
index ddcfc701ca6..0ee71071f03 100644
--- a/src/mongo/db/repl/data_replicator_external_state_mock.cpp
+++ b/src/mongo/db/repl/data_replicator_external_state_mock.cpp
@@ -87,9 +87,9 @@ OpTimeWithTerm DataReplicatorExternalStateMock::getCurrentTermAndLastCommittedOp
 }
 
 void DataReplicatorExternalStateMock::processMetadata(const rpc::ReplSetMetadata& replMetadata,
-                                                      rpc::OplogQueryMetadata oqMetadata) {
-    replMetadataProcessed = replMetadata;
-    oqMetadataProcessed = oqMetadata;
+                                                      const rpc::OplogQueryMetadata& oqMetadata) {
+    replMetadataProcessed = rpc::ReplSetMetadata(replMetadata);
+    oqMetadataProcessed = rpc::OplogQueryMetadata(oqMetadata);
     metadataWasProcessed = true;
 }
 
diff --git a/src/mongo/db/repl/data_replicator_external_state_mock.h b/src/mongo/db/repl/data_replicator_external_state_mock.h
index 535ee513102..7ec17591a44 100644
--- a/src/mongo/db/repl/data_replicator_external_state_mock.h
+++ b/src/mongo/db/repl/data_replicator_external_state_mock.h
@@ -50,7 +50,7 @@ public:
     OpTimeWithTerm getCurrentTermAndLastCommittedOpTime() override;
 
     void processMetadata(const rpc::ReplSetMetadata& metadata,
-                         rpc::OplogQueryMetadata oqMetadata) override;
+                         const rpc::OplogQueryMetadata& oqMetadata) override;
 
     ChangeSyncSourceAction shouldStopFetching(const HostAndPort& source,
                                               const rpc::ReplSetMetadata& replMetadata,
diff --git a/src/mongo/db/repl/idempotency_test.cpp b/src/mongo/db/repl/idempotency_test.cpp
index 9e94154f1c0..69777fdbc55 100644
--- a/src/mongo/db/repl/idempotency_test.cpp
+++ b/src/mongo/db/repl/idempotency_test.cpp
@@ -131,7 +131,7 @@ BSONObj RandomizedIdempotencyTest::canonicalizeDocumentForDataHash(const BSONObj
 BSONObj RandomizedIdempotencyTest::getDoc() {
     AutoGetCollectionForReadCommand autoColl(_opCtx.get(), nss);
     BSONObj doc;
-    Helpers::findById(_opCtx.get(), autoColl.getDb(), nss.ns(), kDocIdQuery, doc);
+    Helpers::findById(_opCtx.get(), nss.ns(), kDocIdQuery, doc);
     return doc.getOwned();
 }
 
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 0908b06213a..7ffffbbf2c1 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -389,7 +389,7 @@ void _logOpsInner(OperationContext* opCtx,
     }
 
     // Insert the oplog records to the respective tenants change collections.
-    if (ChangeStreamChangeCollectionManager::isChangeCollectionEnabled()) {
+    if (ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive()) {
         ChangeStreamChangeCollectionManager::get(opCtx).insertDocumentsToChangeCollection(
             opCtx, *records, timestamps);
     }
@@ -1578,7 +1578,7 @@ Status applyOperation_inlock(OperationContext* opCtx,
                     invariant(op.getObject2());
                     auto&& documentId = *op.getObject2();
                     auto documentFound = Helpers::findById(
-                        opCtx, db, collection->ns().ns(), documentId, changeStreamPreImage);
+                        opCtx, collection->ns().ns(), documentId, changeStreamPreImage);
                     invariant(documentFound);
                 }
 
diff --git a/src/mongo/db/repl/oplog_applier_impl.cpp b/src/mongo/db/repl/oplog_applier_impl.cpp
index e9ca22da35c..575035711e0 100644
--- a/src/mongo/db/repl/oplog_applier_impl.cpp
+++ b/src/mongo/db/repl/oplog_applier_impl.cpp
@@ -623,8 +623,6 @@ void OplogApplierImpl::_deriveOpsAndFillWriterVectors(
     LogicalSessionIdMap<std::vector<OplogEntry*>> partialTxnOps;
     CachedCollectionProperties collPropertiesCache;
 
-    // Used to serialize writes to the tenant migrations donor and recipient namespaces.
-    boost::optional<uint32_t> tenantMigrationsWriterId;
     for (auto&& op : *ops) {
         // If the operation's optime is before or the same as the beginApplyingOpTime we don't want
         // to apply it, so don't include it in writerVectors.
@@ -706,19 +704,6 @@ void OplogApplierImpl::_deriveOpsAndFillWriterVectors(
             continue;
         }
 
-        // Writes to the tenant migration namespaces must be serialized to preserve the order of
-        // migration and access blocker states.
-        if (op.getNss() == NamespaceString::kTenantMigrationDonorsNamespace ||
-            op.getNss() == NamespaceString::kTenantMigrationRecipientsNamespace) {
-            auto writerId = OplogApplierUtils::addToWriterVector(
-                opCtx, &op, writerVectors, &collPropertiesCache, tenantMigrationsWriterId);
-            if (!tenantMigrationsWriterId) {
-                tenantMigrationsWriterId.emplace(writerId);
-            } else {
-                invariant(writerId == *tenantMigrationsWriterId);
-            }
-            continue;
-        }
         OplogApplierUtils::addToWriterVector(opCtx, &op, writerVectors, &collPropertiesCache);
     }
 }
diff --git a/src/mongo/db/repl/oplog_applier_impl_test.cpp b/src/mongo/db/repl/oplog_applier_impl_test.cpp
index 5784b645cc5..b734004bb28 100644
--- a/src/mongo/db/repl/oplog_applier_impl_test.cpp
+++ b/src/mongo/db/repl/oplog_applier_impl_test.cpp
@@ -2644,42 +2644,6 @@ TEST_F(OplogApplierImplWithSlowAutoAdvancingClockTest, DoNotLogNonSlowOpApplicat
     ASSERT_EQUALS(0, countTextFormatLogLinesContaining(expected.str()));
 }
 
-TEST_F(OplogApplierImplTest, SerializeOplogApplicationOfWritesToTenantMigrationNamespaces) {
-    auto writerPool = makeReplWriterPool();
-    NoopOplogApplierObserver observer;
-    TrackOpsAppliedApplier oplogApplier(
-        nullptr,  // executor
-        nullptr,  // oplogBuffer
-        &observer,
-        ReplicationCoordinator::get(_opCtx.get()),
-        getConsistencyMarkers(),
-        getStorageInterface(),
-        repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
-        writerPool.get());
-
-    const auto donorNss = NamespaceString::kTenantMigrationDonorsNamespace;
-    const auto recipientNss = NamespaceString::kTenantMigrationRecipientsNamespace;
-
-    std::vector<OplogEntry> opsToApply;
-    opsToApply.push_back(
-        makeDeleteDocumentOplogEntry({Timestamp(Seconds(2), 0), 1LL}, donorNss, BSON("_id" << 2)));
-    opsToApply.push_back(makeInsertDocumentOplogEntry(
-        {Timestamp(Seconds(3), 0), 1LL}, recipientNss, BSON("_id" << 3)));
-    opsToApply.push_back(makeDeleteDocumentOplogEntry(
-        {Timestamp(Seconds(4), 0), 1LL}, recipientNss, BSON("_id" << 3)));
-    opsToApply.push_back(
-        makeInsertDocumentOplogEntry({Timestamp(Seconds(5), 0), 1LL}, donorNss, BSON("_id" << 4)));
-
-    ASSERT_OK(oplogApplier.applyOplogBatch(_opCtx.get(), opsToApply));
-    const auto applied = oplogApplier.getOperationsApplied();
-    ASSERT_EQ(4U, applied.size());
-    ASSERT_BSONOBJ_EQ(opsToApply[0].getEntry().toBSON(), applied[0].getEntry().toBSON());
-    ASSERT_BSONOBJ_EQ(opsToApply[1].getEntry().toBSON(), applied[1].getEntry().toBSON());
-    ASSERT_BSONOBJ_EQ(opsToApply[2].getEntry().toBSON(), applied[2].getEntry().toBSON());
-    ASSERT_BSONOBJ_EQ(opsToApply[3].getEntry().toBSON(), applied[3].getEntry().toBSON());
-}
-
-
 class OplogApplierImplTxnTableTest : public OplogApplierImplTest {
 public:
     void setUp() override {
@@ -3319,10 +3283,7 @@ TEST_F(IdempotencyTest, EmptyCappedNamespaceNotFound) {
     ASSERT_OK(runOpInitialSync(emptyCappedOp));
 
     AutoGetCollectionForReadCommand autoColl(_opCtx.get(), nss);
-
-    // Ensure that autoColl.getCollection() and autoColl.getDb() are both null.
-    ASSERT_FALSE(autoColl.getCollection());
-    ASSERT_FALSE(autoColl.getDb());
+    ASSERT_FALSE(autoColl);
 }
 
 TEST_F(IdempotencyTest, UpdateTwoFields) {
diff --git a/src/mongo/db/repl/oplog_entry.idl b/src/mongo/db/repl/oplog_entry.idl
index 7c1ba09f320..987f5806cbf 100644
--- a/src/mongo/db/repl/oplog_entry.idl
+++ b/src/mongo/db/repl/oplog_entry.idl
@@ -59,6 +59,9 @@ enums:
             kPostImage: "postImage"
 
 structs:
+    # TODO SERVER-67155 Ensure the tenantId is included in the serialized "ns" field when
+    # multitenancySupport is on but featureFlagRequireTenantId is off. Currently it will not be
+    # included in either place
     DurableReplOperation:
         description: "A document that represents an operation. Should never be used directly in
                       server code. Instead, create an instance of ReplOperation."
diff --git a/src/mongo/db/repl/oplog_entry_test.cpp b/src/mongo/db/repl/oplog_entry_test.cpp
index ae5039be724..4bcc4adfeb0 100644
--- a/src/mongo/db/repl/oplog_entry_test.cpp
+++ b/src/mongo/db/repl/oplog_entry_test.cpp
@@ -150,7 +150,9 @@ TEST(OplogEntryTest, InsertIncludesTidField) {
 
     ASSERT(entry.getTid());
     ASSERT_EQ(*entry.getTid(), tid);
-    ASSERT_EQ(entry.getNss(), nss);
+    // TODO SERVER-66708 Check that (entry.getNss() == nss) once the OplogEntry deserializer
+    // passes "tid" to the NamespaceString constructor
+    ASSERT_EQ(entry.getNss(), NamespaceString(boost::none, nss.ns()));
     ASSERT_BSONOBJ_EQ(entry.getIdElement().wrap("_id"), BSON("_id" << docId));
     ASSERT_BSONOBJ_EQ(entry.getOperationToApply(), doc);
 }
diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp
index d50917d7fd7..6ec6c9778de 100644
--- a/src/mongo/db/repl/oplog_fetcher.cpp
+++ b/src/mongo/db/repl/oplog_fetcher.cpp
@@ -265,12 +265,8 @@ OpTime OplogFetcher::getLastOpTimeFetched_forTest() const {
     return _getLastOpTimeFetched();
 }
 
-BSONObj OplogFetcher::getFindQueryFilter_forTest() const {
-    return _makeFindQueryFilter();
-}
-
-Query OplogFetcher::getFindQuerySettings_forTest(long long findTimeout) const {
-    return _makeFindQuerySettings(findTimeout);
+FindCommandRequest OplogFetcher::makeFindCmdRequest_forTest(long long findTimeout) const {
+    return _makeFindCmdRequest(findTimeout);
 }
 
 Milliseconds OplogFetcher::getAwaitDataTimeout_forTest() const {
@@ -584,46 +580,56 @@ AggregateCommandRequest OplogFetcher::_makeAggregateCommandRequest(long long max
     return aggRequest;
 }
 
-BSONObj OplogFetcher::_makeFindQueryFilter() const {
-    BSONObjBuilder queryBob;
-
-    auto lastOpTimeFetched = _getLastOpTimeFetched();
-    BSONObjBuilder filterBob;
-    filterBob.append("ts", BSON("$gte" << lastOpTimeFetched.getTimestamp()));
-    // Handle caller-provided filter.
-    if (!_config.queryFilter.isEmpty()) {
-        filterBob.append(
-            "$or",
-            BSON_ARRAY(_config.queryFilter << BSON("ts" << lastOpTimeFetched.getTimestamp())));
+FindCommandRequest OplogFetcher::_makeFindCmdRequest(long long findTimeout) const {
+    FindCommandRequest findCmd{_nss};
+
+    // Construct the find command's filter and set it on the 'FindCommandRequest'.
+    {
+        BSONObjBuilder queryBob;
+
+        auto lastOpTimeFetched = _getLastOpTimeFetched();
+        BSONObjBuilder filterBob;
+        filterBob.append("ts", BSON("$gte" << lastOpTimeFetched.getTimestamp()));
+        // Handle caller-provided filter.
+        if (!_config.queryFilter.isEmpty()) {
+            filterBob.append(
+                "$or",
+                BSON_ARRAY(_config.queryFilter << BSON("ts" << lastOpTimeFetched.getTimestamp())));
+        }
+        findCmd.setFilter(filterBob.obj());
+    }
+
+    findCmd.setTailable(true);
+    findCmd.setAwaitData(true);
+    findCmd.setMaxTimeMS(findTimeout);
+
+    if (_config.batchSize) {
+        findCmd.setBatchSize(_config.batchSize);
     }
-    return filterBob.obj();
-}
 
-Query OplogFetcher::_makeFindQuerySettings(long long findTimeout) const {
-    Query query = Query().maxTimeMS(findTimeout);
     if (_config.requestResumeToken) {
-        query.hint(BSON("$natural" << 1)).requestResumeToken(true);
+        findCmd.setHint(BSON("$natural" << 1));
+        findCmd.setRequestResumeToken(true);
     }
 
     auto lastCommittedWithCurrentTerm =
         _dataReplicatorExternalState->getCurrentTermAndLastCommittedOpTime();
     auto term = lastCommittedWithCurrentTerm.value;
     if (term != OpTime::kUninitializedTerm) {
-        query.term(term);
+        findCmd.setTerm(term);
     }
 
     if (_config.queryReadConcern.isEmpty()) {
         // This ensures that the sync source waits for all earlier oplog writes to be visible.
         // Since Timestamp(0, 0) isn't allowed, Timestamp(0, 1) is the minimal we can use.
-        query.readConcern(BSON("level"
-                               << "local"
-                               << "afterClusterTime" << Timestamp(0, 1)));
+        findCmd.setReadConcern(BSON("level"
+                                    << "local"
+                                    << "afterClusterTime" << Timestamp(0, 1)));
     } else {
         // Caller-provided read concern.
-        query.appendElements(_config.queryReadConcern.toBSON());
+        findCmd.setReadConcern(_config.queryReadConcern.toBSONInner());
     }
-
-    return query;
+    return findCmd;
 }
 
 Status OplogFetcher::_createNewCursor(bool initialFind) {
@@ -651,17 +657,9 @@ Status OplogFetcher::_createNewCursor(bool initialFind) {
         }
         _cursor = std::move(ret.getValue());
     } else {
+        auto findCmd = _makeFindCmdRequest(maxTimeMs);
         _cursor = std::make_unique<DBClientCursor>(
-            _conn.get(),
-            _nss,
-            _makeFindQueryFilter(),
-            _makeFindQuerySettings(maxTimeMs),
-            0 /* limit */,
-            0 /* nToSkip */,
-            nullptr /* fieldsToReturn */,
-            QueryOption_CursorTailable | QueryOption_AwaitData |
-                (oplogFetcherUsesExhaust ? QueryOption_Exhaust : 0),
-            _config.batchSize);
+            _conn.get(), std::move(findCmd), ReadPreferenceSetting{}, oplogFetcherUsesExhaust);
     }
 
     _firstBatch = true;
@@ -817,7 +815,7 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) {
                     "metadata"_attr = _metadataObj);
         return oqMetadataResult.getStatus();
     }
-    auto oqMetadata = oqMetadataResult.getValue();
+    const auto& oqMetadata = oqMetadataResult.getValue();
 
     if (_firstBatch) {
         auto status =
@@ -884,7 +882,7 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) {
                     "metadata"_attr = _metadataObj);
         return metadataResult.getStatus();
     }
-    auto replSetMetadata = metadataResult.getValue();
+    const auto& replSetMetadata = metadataResult.getValue();
 
     // Determine if we should stop syncing from our current sync source.
     auto changeSyncSourceAction = _dataReplicatorExternalState->shouldStopFetching(
diff --git a/src/mongo/db/repl/oplog_fetcher.h b/src/mongo/db/repl/oplog_fetcher.h
index 01a4347669b..2147eb9ebde 100644
--- a/src/mongo/db/repl/oplog_fetcher.h
+++ b/src/mongo/db/repl/oplog_fetcher.h
@@ -275,8 +275,7 @@ public:
     /**
      * Returns the `find` query run on the sync source's oplog.
      */
-    BSONObj getFindQueryFilter_forTest() const;
-    Query getFindQuerySettings_forTest(long long findTimeout) const;
+    FindCommandRequest makeFindCmdRequest_forTest(long long findTimeout) const;
 
     /**
      * Returns the OpTime of the last oplog entry fetched and processed.
@@ -387,11 +386,9 @@ private:
 
     /**
      * This function will create the `find` query to issue to the sync source. It is provided with
-     * whether this is the initial attempt to create the `find` query to determine what the find
-     * timeout should be.
+     * the value to use as the "maxTimeMS" for the find command.
      */
-    BSONObj _makeFindQueryFilter() const;
-    Query _makeFindQuerySettings(long long findTimeout) const;
+    FindCommandRequest _makeFindCmdRequest(long long findTimeout) const;
 
     /**
      * Gets the next batch from the exhaust cursor.
diff --git a/src/mongo/db/repl/oplog_fetcher_test.cpp b/src/mongo/db/repl/oplog_fetcher_test.cpp
index e98039a0f8a..adc09da1300 100644
--- a/src/mongo/db/repl/oplog_fetcher_test.cpp
+++ b/src/mongo/db/repl/oplog_fetcher_test.cpp
@@ -806,19 +806,25 @@ TEST_F(OplogFetcherTest,
     auto oplogFetcher = makeOplogFetcher();
     auto findTimeout = durationCount<Milliseconds>(oplogFetcher->getInitialFindMaxTime_forTest());
 
-    auto filter = oplogFetcher->getFindQueryFilter_forTest();
+    auto findCmdRequest = oplogFetcher->makeFindCmdRequest_forTest(findTimeout);
+
+    auto filter = findCmdRequest.getFilter();
     ASSERT_BSONOBJ_EQ(BSON("ts" << BSON("$gte" << lastFetched.getTimestamp())), filter);
 
-    auto queryObj =
-        (oplogFetcher->getFindQuerySettings_forTest(findTimeout)).getFullSettingsDeprecated();
-    ASSERT_EQUALS(60000, queryObj.getIntField("$maxTimeMS"));
+    auto maxTimeMS = findCmdRequest.getMaxTimeMS();
+    ASSERT(maxTimeMS);
+    ASSERT_EQUALS(60000, *maxTimeMS);
 
-    ASSERT_EQUALS(mongo::BSONType::Object, queryObj["readConcern"].type());
+    auto readConcern = findCmdRequest.getReadConcern();
+    ASSERT(readConcern);
     ASSERT_BSONOBJ_EQ(BSON("level"
                            << "local"
                            << "afterClusterTime" << Timestamp(0, 1)),
-                      queryObj["readConcern"].Obj());
-    ASSERT_EQUALS(dataReplicatorExternalState->currentTerm, queryObj["term"].numberLong());
+                      *readConcern);
+
+    auto term = findCmdRequest.getTerm();
+    ASSERT(term);
+    ASSERT_EQUALS(dataReplicatorExternalState->currentTerm, *term);
 }
 
 TEST_F(OplogFetcherTest,
@@ -826,21 +832,26 @@ TEST_F(OplogFetcherTest,
     dataReplicatorExternalState->currentTerm = OpTime::kUninitializedTerm;
     auto oplogFetcher = makeOplogFetcher();
 
-    auto filter = oplogFetcher->getFindQueryFilter_forTest();
-    ASSERT_BSONOBJ_EQ(BSON("ts" << BSON("$gte" << lastFetched.getTimestamp())), filter);
-
     // Test that the correct maxTimeMS is set if we are retrying the 'find' query.
     auto findTimeout = durationCount<Milliseconds>(oplogFetcher->getRetriedFindMaxTime_forTest());
-    auto queryObj =
-        (oplogFetcher->getFindQuerySettings_forTest(findTimeout)).getFullSettingsDeprecated();
-    ASSERT_EQUALS(2000, queryObj.getIntField("$maxTimeMS"));
+    auto findCmdRequest = oplogFetcher->makeFindCmdRequest_forTest(findTimeout);
 
-    ASSERT_EQUALS(mongo::BSONType::Object, queryObj["readConcern"].type());
+    auto filter = findCmdRequest.getFilter();
+    ASSERT_BSONOBJ_EQ(BSON("ts" << BSON("$gte" << lastFetched.getTimestamp())), filter);
+
+    auto maxTimeMS = findCmdRequest.getMaxTimeMS();
+    ASSERT(maxTimeMS);
+    ASSERT_EQUALS(2000, *maxTimeMS);
+
+    auto readConcern = findCmdRequest.getReadConcern();
+    ASSERT(readConcern);
     ASSERT_BSONOBJ_EQ(BSON("level"
                            << "local"
                            << "afterClusterTime" << Timestamp(0, 1)),
-                      queryObj["readConcern"].Obj());
-    ASSERT_FALSE(queryObj.hasField("term"));
+                      *readConcern);
+
+    auto term = findCmdRequest.getTerm();
+    ASSERT(!term);
 }
 
 TEST_F(
diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp
index cb79c007ced..dbe696ecce7 100644
--- a/src/mongo/db/repl/primary_only_service.cpp
+++ b/src/mongo/db/repl/primary_only_service.cpp
@@ -362,6 +362,9 @@ void PrimaryOnlyService::onStepUp(const OpTime& stepUpOpTime) {
         instance.second.waitForCompletion();
     }
 
+    savedInstances.clear();
+    newThenOldScopedExecutor.reset();
+
     PrimaryOnlyServiceHangBeforeLaunchingStepUpLogic.pauseWhileSet();
 
     // Now wait for the first write of the new term to be majority committed, so that we know
diff --git a/src/mongo/db/repl/repl_set_commands.cpp b/src/mongo/db/repl/repl_set_commands.cpp
index 5823d880c14..7f35d2cfb31 100644
--- a/src/mongo/db/repl/repl_set_commands.cpp
+++ b/src/mongo/db/repl/repl_set_commands.cpp
@@ -528,6 +528,11 @@ public:
                "primary.)\n"
                "http://dochub.mongodb.org/core/replicasetcommands";
     }
+
+    bool shouldCheckoutSession() const final {
+        return false;
+    }
+
     CmdReplSetStepDown()
         : ReplSetCommand("replSetStepDown"),
           _stepDownCmdsWithForceExecutedMetric("commands.replSetStepDownWithForce.total",
@@ -685,7 +690,7 @@ public:
         if (metadataResult.isOK()) {
             // New style update position command has metadata, which may inform the
             // upstream of a higher term.
-            auto metadata = metadataResult.getValue();
+            const auto& metadata = metadataResult.getValue();
             replCoord->processReplSetMetadata(metadata);
         }
 
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 7cbc79f9aed..5d450af12d7 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -560,7 +560,7 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC
 
     // TODO: SERVER-65948 move the change collection creation logic from here to the PM-2502 hooks.
     // The change collection will be created when the change stream is enabled.
-    if (ChangeStreamChangeCollectionManager::isChangeCollectionEnabled()) {
+    if (ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive()) {
         auto status = ChangeStreamChangeCollectionManager::get(opCtx).createChangeCollection(
             opCtx, boost::none);
         if (!status.isOK()) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index c2f2aa1ad08..fe769df7572 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1340,7 +1340,6 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
     _updateMemberStateFromTopologyCoordinator(lk);
 
     LOGV2(21331, "Transition to primary complete; database writes are now permitted");
-    _drainFinishedCond.notify_all();
     _externalState->startNoopWriter(_getMyLastAppliedOpTime_inlock());
 }
 
@@ -1830,8 +1829,9 @@ Status ReplicationCoordinatorImpl::setLastDurableOptime_forTest(long long cfgVer
 
     const UpdatePositionArgs::UpdateInfo update(
         OpTime(), Date_t(), opTime, wallTime, cfgVer, memberId);
-    const auto status = _setLastOptime(lock, update);
-    return status;
+    const auto statusWithOpTime = _setLastOptimeForMember(lock, update);
+    _updateStateAfterRemoteOpTimeUpdates(lock, statusWithOpTime.getValue());
+    return statusWithOpTime.getStatus();
 }
 
 Status ReplicationCoordinatorImpl::setLastAppliedOptime_forTest(long long cfgVer,
@@ -1847,25 +1847,29 @@ Status ReplicationCoordinatorImpl::setLastAppliedOptime_forTest(long long cfgVer
 
     const UpdatePositionArgs::UpdateInfo update(
         opTime, wallTime, OpTime(), Date_t(), cfgVer, memberId);
-    const auto status = _setLastOptime(lock, update);
-    return status;
+    const auto statusWithOpTime = _setLastOptimeForMember(lock, update);
+    _updateStateAfterRemoteOpTimeUpdates(lock, statusWithOpTime.getValue());
+    return statusWithOpTime.getStatus();
 }
 
-Status ReplicationCoordinatorImpl::_setLastOptime(WithLock lk,
-                                                  const UpdatePositionArgs::UpdateInfo& args) {
-    auto result = _topCoord->setLastOptime(args, _replExecutor->now());
+StatusWith<OpTime> ReplicationCoordinatorImpl::_setLastOptimeForMember(
+    WithLock lk, const UpdatePositionArgs::UpdateInfo& args) {
+    auto result = _topCoord->setLastOptimeForMember(args, _replExecutor->now());
     if (!result.isOK())
         return result.getStatus();
     const bool advancedOpTime = result.getValue();
+    _rescheduleLivenessUpdate_inlock(args.memberId);
+    return advancedOpTime ? std::max(args.appliedOpTime, args.durableOpTime) : OpTime();
+}
+
+void ReplicationCoordinatorImpl::_updateStateAfterRemoteOpTimeUpdates(
+    WithLock lk, const OpTime& maxRemoteOpTime) {
     // Only update committed optime if the remote optimes increased.
-    if (advancedOpTime) {
+    if (!maxRemoteOpTime.isNull()) {
         _updateLastCommittedOpTimeAndWallTime(lk);
         // Wait up replication waiters on optime changes.
-        _wakeReadyWaiters(lk, std::max(args.appliedOpTime, args.durableOpTime));
+        _wakeReadyWaiters(lk, maxRemoteOpTime);
     }
-
-    _rescheduleLivenessUpdate_inlock(args.memberId);
-    return Status::OK();
 }
 
 bool ReplicationCoordinatorImpl::isCommitQuorumSatisfied(
@@ -4415,7 +4419,7 @@ void ReplicationCoordinatorImpl::_errorOnPromisesIfHorizonChanged(WithLock lk,
         HelloMetrics::get(opCtx)->resetNumAwaitingTopologyChanges();
     }
 
-    if (oldIndex >= 0 && newIndex >= 0) {
+    if (oldIndex >= 0) {
         invariant(_sniToValidConfigPromiseMap.empty());
 
         const auto oldHorizonMappings = oldConfig.getMemberAt(oldIndex).getHorizonMappings();
@@ -5079,18 +5083,22 @@ void ReplicationCoordinatorImpl::_wakeReadyWaiters(WithLock lk, boost::optional<
 Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(const UpdatePositionArgs& updates) {
     stdx::unique_lock<Latch> lock(_mutex);
     Status status = Status::OK();
-    bool somethingChanged = false;
+    bool gotValidUpdate = false;
+    OpTime maxRemoteOpTime;
     for (UpdatePositionArgs::UpdateIterator update = updates.updatesBegin();
          update != updates.updatesEnd();
          ++update) {
-        status = _setLastOptime(lock, *update);
-        if (!status.isOK()) {
+        auto statusWithOpTime = _setLastOptimeForMember(lock, *update);
+        if (!statusWithOpTime.isOK()) {
+            status = statusWithOpTime.getStatus();
             break;
         }
-        somethingChanged = true;
+        maxRemoteOpTime = std::max(maxRemoteOpTime, statusWithOpTime.getValue());
+        gotValidUpdate = true;
     }
+    _updateStateAfterRemoteOpTimeUpdates(lock, maxRemoteOpTime);
 
-    if (somethingChanged && !_getMemberState_inlock().primary()) {
+    if (gotValidUpdate && !_getMemberState_inlock().primary()) {
         lock.unlock();
         // Must do this outside _mutex
         _externalState->forwardSecondaryProgress();
@@ -5716,28 +5724,27 @@ void ReplicationCoordinatorImpl::prepareReplMetadata(const BSONObj& metadataRequ
         invariant(-1 != rbid);
     }
 
-    stdx::lock_guard<Latch> lk(_mutex);
+    boost::optional<rpc::ReplSetMetadata> replSetMetadata;
+    boost::optional<rpc::OplogQueryMetadata> oplogQueryMetadata;
+    {
+        stdx::lock_guard<Latch> lk(_mutex);
 
-    if (hasReplSetMetadata) {
-        _prepareReplSetMetadata_inlock(lastOpTimeFromClient, builder);
-    }
+        if (hasReplSetMetadata) {
+            OpTime lastVisibleOpTime =
+                std::max(lastOpTimeFromClient, _getCurrentCommittedSnapshotOpTime_inlock());
+            replSetMetadata = _topCoord->prepareReplSetMetadata(lastVisibleOpTime);
+        }
 
-    if (hasOplogQueryMetadata) {
-        _prepareOplogQueryMetadata_inlock(rbid, builder);
+        if (hasOplogQueryMetadata) {
+            oplogQueryMetadata = _topCoord->prepareOplogQueryMetadata(rbid);
+        }
     }
-}
-
-void ReplicationCoordinatorImpl::_prepareReplSetMetadata_inlock(const OpTime& lastOpTimeFromClient,
-                                                                BSONObjBuilder* builder) const {
-    OpTime lastVisibleOpTime =
-        std::max(lastOpTimeFromClient, _getCurrentCommittedSnapshotOpTime_inlock());
-    auto metadata = _topCoord->prepareReplSetMetadata(lastVisibleOpTime);
-    metadata.writeToMetadata(builder).transitional_ignore();
-}
 
-void ReplicationCoordinatorImpl::_prepareOplogQueryMetadata_inlock(int rbid,
-                                                                   BSONObjBuilder* builder) const {
-    _topCoord->prepareOplogQueryMetadata(rbid).writeToMetadata(builder).transitional_ignore();
+    // Do BSON serialization outside lock.
+    if (replSetMetadata)
+        invariantStatusOK(replSetMetadata->writeToMetadata(builder));
+    if (oplogQueryMetadata)
+        invariantStatusOK(oplogQueryMetadata->writeToMetadata(builder));
 }
 
 bool ReplicationCoordinatorImpl::getWriteConcernMajorityShouldJournal() {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index a6dc8fe9066..9ac44fdc62e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -469,7 +469,7 @@ public:
     executor::TaskExecutor::CallbackHandle getCatchupTakeoverCbh_forTest() const;
 
     /**
-     * Simple wrappers around _setLastOptime to make it easier to test.
+     * Simple wrappers around _setLastOptimeForMember to make it easier to test.
      */
     Status setLastAppliedOptime_forTest(long long cfgVer,
                                         long long memberId,
@@ -1099,8 +1099,19 @@ private:
      * This is only valid to call on replica sets.
      * "configVersion" will be populated with our config version if it and the configVersion
      * of "args" differ.
+     *
+     * If either applied or durable optime has changed, returns the later of the two (even if
+     * that's not the one which changed).  Otherwise returns a null optime.
+     */
+    StatusWith<OpTime> _setLastOptimeForMember(WithLock lk,
+                                               const UpdatePositionArgs::UpdateInfo& args);
+
+    /**
+     * Helper for processReplSetUpdatePosition, companion to _setLastOptimeForMember above.  Updates
+     * replication coordinator state and notifies waiters after remote optime updates.  Must be
+     * called within the same critical section as _setLastOptimeForMember.
      */
-    Status _setLastOptime(WithLock lk, const UpdatePositionArgs::UpdateInfo& args);
+    void _updateStateAfterRemoteOpTimeUpdates(WithLock lk, const OpTime& maxRemoteOpTime);
 
     /**
      * This function will report our position externally (like upstream) if necessary.
@@ -1463,17 +1474,6 @@ private:
     EventHandle _processReplSetMetadata_inlock(const rpc::ReplSetMetadata& replMetadata);
 
     /**
-     * Prepares a metadata object for ReplSetMetadata.
-     */
-    void _prepareReplSetMetadata_inlock(const OpTime& lastOpTimeFromClient,
-                                        BSONObjBuilder* builder) const;
-
-    /**
-     * Prepares a metadata object for OplogQueryMetadata.
-     */
-    void _prepareOplogQueryMetadata_inlock(int rbid, BSONObjBuilder* builder) const;
-
-    /**
      * Blesses a snapshot to be used for new committed reads.
      *
      * Returns true if the value was updated to `newCommittedSnapshot`.
@@ -1719,9 +1719,6 @@ private:
     // Current ReplicaSet state.
     MemberState _memberState;  // (M)
 
-    // Used to signal threads waiting for changes to _memberState.
-    stdx::condition_variable _drainFinishedCond;  // (M)
-
     ReplicationCoordinator::ApplierState _applierState = ApplierState::Running;  // (M)
 
     // Used to signal threads waiting for changes to _rsConfigState.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index cfb8b355366..1392cceb923 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -661,51 +661,51 @@ void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(WithLock lk,
 
 std::tuple<StatusWith<ReplSetConfig>, bool> ReplicationCoordinatorImpl::_resolveConfigToApply(
     const ReplSetConfig& config) {
+    if (!_settings.isServerless() || !config.isSplitConfig()) {
+        return {config, false};
+    }
+
     stdx::unique_lock<Latch> lk(_mutex);
-    if (config.isSplitConfig()) {
-        if (!_rsConfig.isInitialized()) {
-            // Unlock the lock because isSelf performs network I/O.
-            lk.unlock();
+    if (!_rsConfig.isInitialized()) {
+        // Unlock the lock because isSelf performs network I/O.
+        lk.unlock();
 
-            // If this node is listed in the members of incoming config, accept the config.
-            const auto foundSelfInMembers =
-                std::any_of(config.membersBegin(),
-                            config.membersEnd(),
-                            [externalState = _externalState.get()](const MemberConfig& config) {
-                                return externalState->isSelf(config.getHostAndPort(),
-                                                             getGlobalServiceContext());
-                            });
-
-            if (foundSelfInMembers) {
-                return {config, false};
-            }
+        // If this node is listed in the members of incoming config, accept the config.
+        const auto foundSelfInMembers = std::any_of(
+            config.membersBegin(),
+            config.membersEnd(),
+            [externalState = _externalState.get()](const MemberConfig& config) {
+                return externalState->isSelf(config.getHostAndPort(), getGlobalServiceContext());
+            });
 
-            return {Status(ErrorCodes::NotYetInitialized,
-                           "Cannot apply a split config if the current config is uninitialized"),
-                    false};
+        if (foundSelfInMembers) {
+            return {config, false};
         }
 
-        auto recipientConfig = config.getRecipientConfig();
-        const auto& selfMember = _rsConfig.getMemberAt(_selfIndex);
-        if (recipientConfig->findMemberByHostAndPort(selfMember.getHostAndPort())) {
-            if (selfMember.getNumVotes() > 0) {
-                return {
-                    Status(ErrorCodes::BadValue, "Cannot apply recipient config to a voting node"),
-                    false};
-            }
+        return {Status(ErrorCodes::NotYetInitialized,
+                       "Cannot apply a split config if the current config is uninitialized"),
+                false};
+    }
 
-            if (_rsConfig.getReplSetName() == recipientConfig->getReplSetName()) {
-                return {Status(ErrorCodes::InvalidReplicaSetConfig,
-                               "Cannot apply recipient config since current config and recipient "
-                               "config have the same set name."),
-                        false};
-            }
+    auto recipientConfig = config.getRecipientConfig();
+    const auto& selfMember = _rsConfig.getMemberAt(_selfIndex);
+    if (recipientConfig->findMemberByHostAndPort(selfMember.getHostAndPort())) {
+        if (selfMember.getNumVotes() > 0) {
+            return {Status(ErrorCodes::BadValue, "Cannot apply recipient config to a voting node"),
+                    false};
+        }
 
-            auto mutableConfig = recipientConfig->getMutable();
-            mutableConfig.setConfigVersion(1);
-            mutableConfig.setConfigTerm(1);
-            return {ReplSetConfig(std::move(mutableConfig)), true};
+        if (_rsConfig.getReplSetName() == recipientConfig->getReplSetName()) {
+            return {Status(ErrorCodes::InvalidReplicaSetConfig,
+                           "Cannot apply recipient config since current config and recipient "
+                           "config have the same set name."),
+                    false};
         }
+
+        auto mutableConfig = recipientConfig->getMutable();
+        mutableConfig.setConfigVersion(1);
+        mutableConfig.setConfigTerm(1);
+        return {ReplSetConfig(std::move(mutableConfig)), true};
     }
 
     return {config, false};
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
index 5203980b575..e619276b129 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
@@ -58,6 +58,7 @@ namespace {
 using executor::NetworkInterfaceMock;
 using executor::RemoteCommandRequest;
 using executor::RemoteCommandResponse;
+using InNetworkGuard = NetworkInterfaceMock::InNetworkGuard;
 
 TEST(ReplSetHeartbeatArgs, AcceptsUnknownField) {
     ReplSetHeartbeatArgsV1 hbArgs;
@@ -116,7 +117,8 @@ protected:
 
     void processResponseFromPrimary(const ReplSetConfig& config,
                                     long long version = -2,
-                                    long long term = OpTime::kInitialTerm);
+                                    long long term = OpTime::kInitialTerm,
+                                    const HostAndPort& target = HostAndPort{"h1", 1});
 };
 
 void ReplCoordHBV1Test::assertMemberState(const MemberState expected, std::string msg) {
@@ -160,13 +162,14 @@ ReplCoordHBV1Test::performSyncToFinishReconfigHeartbeat() {
 
 void ReplCoordHBV1Test::processResponseFromPrimary(const ReplSetConfig& config,
                                                    long long version,
-                                                   long long term) {
+                                                   long long term,
+                                                   const HostAndPort& target) {
     NetworkInterfaceMock* net = getNet();
     const Date_t startDate = getNet()->now();
 
     NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
     const RemoteCommandRequest& request = noi->getRequest();
-    ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
+    ASSERT_EQUALS(target, request.target);
     ReplSetHeartbeatArgsV1 hbArgs;
     ASSERT_OK(hbArgs.initialize(request.cmdObj));
     ASSERT_EQUALS("mySet", hbArgs.getSetName());
@@ -266,6 +269,85 @@ TEST_F(ReplCoordHBV1Test,
     ASSERT_TRUE(getExternalState()->threadsStarted());
 }
 
+TEST_F(ReplCoordHBV1Test, RejectSplitConfigWhenNotInServerlessMode) {
+    auto severityGuard = unittest::MinimumLoggedSeverityGuard{logv2::LogComponent::kDefault,
+                                                              logv2::LogSeverity::Debug(3)};
+
+    // Start up with three nodes, and assume the role of "node2" as a secondary. Notably, the local
+    // node is NOT started in serverless mode. "node2" is configured as having no votes, no
+    // priority, so that we can pass validation for accepting a split config.
+    assertStartSuccess(BSON("_id"
+                            << "mySet"
+                            << "protocolVersion" << 1 << "version" << 2 << "members"
+                            << BSON_ARRAY(BSON("_id" << 1 << "host"
+                                                     << "node1:12345")
+                                          << BSON("_id" << 2 << "host"
+                                                        << "node2:12345"
+                                                        << "votes" << 0 << "priority" << 0)
+                                          << BSON("_id" << 3 << "host"
+                                                        << "node3:12345"))),
+                       HostAndPort("node2", 12345));
+    ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+    getReplCoord()->updateTerm_forTest(1, nullptr);
+    ASSERT_EQ(getReplCoord()->getTerm(), 1);
+    // respond to initial heartbeat requests
+    for (int j = 0; j < 2; ++j) {
+        replyToReceivedHeartbeatV1();
+    }
+
+    // Verify that there are no further heartbeat requests, since the heartbeat requests should be
+    // scheduled for the future.
+    {
+        InNetworkGuard guard(getNet());
+        assertMemberState(MemberState::RS_SECONDARY);
+        ASSERT_FALSE(getNet()->hasReadyRequests());
+    }
+
+    ReplSetConfig splitConfig =
+        assertMakeRSConfig(BSON("_id"
+                                << "mySet"
+                                << "version" << 3 << "term" << 1 << "protocolVersion" << 1
+                                << "members"
+                                << BSON_ARRAY(BSON("_id" << 1 << "host"
+                                                         << "node1:12345")
+                                              << BSON("_id" << 2 << "host"
+                                                            << "node2:12345")
+                                              << BSON("_id" << 3 << "host"
+                                                            << "node3:12345"))
+                                << "recipientConfig"
+                                << BSON("_id"
+                                        << "recipientSet"
+                                        << "version" << 1 << "term" << 1 << "members"
+                                        << BSON_ARRAY(BSON("_id" << 1 << "host"
+                                                                 << "node1:12345")
+                                                      << BSON("_id" << 2 << "host"
+                                                                    << "node2:12345")
+                                                      << BSON("_id" << 3 << "host"
+                                                                    << "node3:12345")))));
+
+    // Accept a heartbeat from `node1` which has a split config. The split config lists this node
+    // ("node2") in the recipient member list, but a node started not in serverless mode should not
+    // accept and install the recipient config.
+    receiveHeartbeatFrom(splitConfig, 1, HostAndPort("node1", 12345));
+
+    {
+        InNetworkGuard guard(getNet());
+        processResponseFromPrimary(splitConfig, 2, 1, HostAndPort{"node1", 12345});
+        assertMemberState(MemberState::RS_SECONDARY);
+        OperationContextNoop opCtx;
+        auto storedConfig = ReplSetConfig::parse(
+            unittest::assertGet(getExternalState()->loadLocalConfigDocument(&opCtx)));
+        ASSERT_OK(storedConfig.validate());
+
+        // Verify that the recipient config was not accepted. A successfully applied splitConfig
+        // will install at version and term {1, 1}.
+        ASSERT_EQUALS(ConfigVersionAndTerm(3, 1), storedConfig.getConfigVersionAndTerm());
+        ASSERT_EQUALS("mySet", storedConfig.getReplSetName());
+    }
+
+    ASSERT_TRUE(getExternalState()->threadsStarted());
+}
+
 TEST_F(ReplCoordHBV1Test, NodeRejectsSplitConfigWhenNotInitialized) {
     ReplSetConfig rsConfig =
         assertMakeRSConfig(BSON("_id"
@@ -556,6 +638,10 @@ TEST_F(
 class ReplCoordHBV1SplitConfigTest : public ReplCoordHBV1Test {
 public:
     void startUp(const std::string& hostAndPort) {
+        ReplSettings settings;
+        settings.setServerlessMode();
+        init(settings);
+
         BSONObj configBson =
             BSON("_id" << _donorSetName << "version" << _configVersion << "term" << _configTerm
                        << "members" << _members << "protocolVersion" << 1);
@@ -740,7 +826,6 @@ TEST_F(ReplCoordHBV1SplitConfigTest, RecipientNodeApplyConfig) {
     validateNextRequest("", _recipientSetName, 1, 1);
 }
 
-using InNetworkGuard = NetworkInterfaceMock::InNetworkGuard;
 TEST_F(ReplCoordHBV1SplitConfigTest, RejectMismatchedSetNameInHeartbeatResponse) {
     startUp(_recipientSecondaryNode);
 
@@ -813,9 +898,9 @@ TEST_F(ReplCoordHBV1SplitConfigTest, RecipientNodeNonZeroVotes) {
     getNet()->runReadyNetworkOperations();
 
     // The node rejected the config as it's a voting node and its version has not changed.
-    ASSERT_EQ(getReplCoord()->getConfigVersion(), _configVersion);
-    ASSERT_EQ(getReplCoord()->getConfigTerm(), _configTerm);
-    ASSERT_EQ(getReplCoord()->getSettings().ourSetName(), _donorSetName);
+    auto config = getReplCoord()->getConfig();
+    ASSERT_EQ(config.getConfigVersionAndTerm(), ConfigVersionAndTerm(_configVersion, _configTerm));
+    ASSERT_EQ(config.getReplSetName(), _donorSetName);
 }
 
 class ReplCoordHBV1ReconfigTest : public ReplCoordHBV1Test {
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index bbe14690c7a..31a307a96b0 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -236,11 +236,11 @@ void ReplicationCoordinatorMock::setMyHeartbeatMessage(const std::string& msg) {
 }
 
 void ReplicationCoordinatorMock::_setMyLastAppliedOpTimeAndWallTime(
-    const OpTimeAndWallTime& opTimeAndWallTime) {
+    WithLock lk, const OpTimeAndWallTime& opTimeAndWallTime) {
     _myLastAppliedOpTime = opTimeAndWallTime.opTime;
     _myLastAppliedWallTime = opTimeAndWallTime.wallTime;
 
-    setCurrentCommittedSnapshotOpTime(opTimeAndWallTime.opTime);
+    _setCurrentCommittedSnapshotOpTime(lk, opTimeAndWallTime.opTime);
 
     if (auto storageEngine = _service->getStorageEngine()) {
         if (auto snapshotManager = storageEngine->getSnapshotManager()) {
@@ -253,7 +253,7 @@ void ReplicationCoordinatorMock::setMyLastAppliedOpTimeAndWallTime(
     const OpTimeAndWallTime& opTimeAndWallTime) {
     stdx::lock_guard<Mutex> lk(_mutex);
 
-    _setMyLastAppliedOpTimeAndWallTime(opTimeAndWallTime);
+    _setMyLastAppliedOpTimeAndWallTime(lk, opTimeAndWallTime);
 }
 
 void ReplicationCoordinatorMock::setMyLastDurableOpTimeAndWallTime(
@@ -269,7 +269,7 @@ void ReplicationCoordinatorMock::setMyLastAppliedOpTimeAndWallTimeForward(
     stdx::lock_guard<Mutex> lk(_mutex);
 
     if (opTimeAndWallTime.opTime > _myLastAppliedOpTime) {
-        _setMyLastAppliedOpTimeAndWallTime(opTimeAndWallTime);
+        _setMyLastAppliedOpTimeAndWallTime(lk, opTimeAndWallTime);
     }
 }
 
@@ -657,11 +657,17 @@ Status ReplicationCoordinatorMock::updateTerm(OperationContext* opCtx, long long
 
 void ReplicationCoordinatorMock::clearCommittedSnapshot() {}
 
-void ReplicationCoordinatorMock::setCurrentCommittedSnapshotOpTime(OpTime time) {
+void ReplicationCoordinatorMock::_setCurrentCommittedSnapshotOpTime(WithLock lk, OpTime time) {
     _currentCommittedSnapshotOpTime = time;
 }
 
+void ReplicationCoordinatorMock::setCurrentCommittedSnapshotOpTime(OpTime time) {
+    stdx::lock_guard<Mutex> lk(_mutex);
+    _setCurrentCommittedSnapshotOpTime(lk, time);
+}
+
 OpTime ReplicationCoordinatorMock::getCurrentCommittedSnapshotOpTime() const {
+    stdx::lock_guard<Mutex> lk(_mutex);
     return _currentCommittedSnapshotOpTime;
 }
 
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 3ac7686ea34..dbe7b28ef83 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -422,7 +422,9 @@ public:
     virtual WriteConcernTagChanges* getWriteConcernTagChanges() override;
 
 private:
-    void _setMyLastAppliedOpTimeAndWallTime(const OpTimeAndWallTime& opTimeAndWallTime);
+    void _setMyLastAppliedOpTimeAndWallTime(WithLock lk,
+                                            const OpTimeAndWallTime& opTimeAndWallTime);
+    void _setCurrentCommittedSnapshotOpTime(WithLock lk, OpTime time);
 
     ServiceContext* const _service;
     ReplSettings _settings;
diff --git a/src/mongo/db/repl/roll_back_local_operations_test.cpp b/src/mongo/db/repl/roll_back_local_operations_test.cpp
index b71765e33d3..70421f959e1 100644
--- a/src/mongo/db/repl/roll_back_local_operations_test.cpp
+++ b/src/mongo/db/repl/roll_back_local_operations_test.cpp
@@ -321,7 +321,8 @@ public:
     DBClientConnectionForTest(int numInitFailures) : _initFailuresLeft(numInitFailures) {}
 
     std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
-                                         const ReadPreferenceSetting& readPref) override {
+                                         const ReadPreferenceSetting& readPref,
+                                         ExhaustMode exhaustMode) override {
         if (_initFailuresLeft > 0) {
             _initFailuresLeft--;
             LOGV2(21657,
diff --git a/src/mongo/db/repl/rollback_source_impl.cpp b/src/mongo/db/repl/rollback_source_impl.cpp
index 9c56b0ff21e..8b427be197c 100644
--- a/src/mongo/db/repl/rollback_source_impl.cpp
+++ b/src/mongo/db/repl/rollback_source_impl.cpp
@@ -94,7 +94,8 @@ std::pair<BSONObj, NamespaceString> RollbackSourceImpl::findOneByUUID(const std:
     auto cursor =
         std::make_unique<DBClientCursor>(_getConnection(),
                                          std::move(findRequest),
-                                         ReadPreferenceSetting{ReadPreference::SecondaryPreferred});
+                                         ReadPreferenceSetting{ReadPreference::SecondaryPreferred},
+                                         false /*isExhaust*/);
     uassert(6138500, "find one by UUID failed", cursor->init());
     BSONObj result = cursor->more() ? cursor->nextSafe() : BSONObj{};
     NamespaceString nss = cursor->getNamespaceString();
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index e527aa204eb..8777903803c 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -949,7 +949,7 @@ void rollbackCreateIndexes(OperationContext* opCtx, UUID uuid, std::set<std::str
               "indexName"_attr = indexName);
 
         WriteUnitOfWork wuow(opCtx);
-        dropIndex(opCtx, collection.getWritableCollection(), indexName, *nss);
+        dropIndex(opCtx, collection.getWritableCollection(opCtx), indexName, *nss);
         wuow.commit();
 
         LOGV2_DEBUG(21673,
@@ -1634,12 +1634,12 @@ void rollback_internal::syncFixUp(OperationContext* opCtx,
             WriteUnitOfWork wuow(opCtx);
 
             // Set collection to whatever temp status is on the sync source.
-            collection.getWritableCollection()->setIsTemp(opCtx, options.temp);
+            collection.getWritableCollection(opCtx)->setIsTemp(opCtx, options.temp);
 
             // Set any document validation options. We update the validator fields without
             // parsing/validation, since we fetched the options object directly from the sync
             // source, and we should set our validation options to match it exactly.
-            auto validatorStatus = collection.getWritableCollection()->updateValidator(
+            auto validatorStatus = collection.getWritableCollection(opCtx)->updateValidator(
                 opCtx, options.validator, options.validationLevel, options.validationAction);
             if (!validatorStatus.isOK()) {
                 throw RSFatalException(str::stream()
@@ -1811,16 +1811,16 @@ void rollback_internal::syncFixUp(OperationContext* opCtx,
                                 // RecordId loc = Helpers::findById(nsd, pattern);
                                 if (!loc.isNull()) {
                                     try {
-                                        writeConflictRetry(opCtx,
-                                                           "cappedTruncateAfter",
-                                                           collection->ns().ns(),
-                                                           [&] {
-                                                               WriteUnitOfWork wunit(opCtx);
-                                                               collection.getWritableCollection()
-                                                                   ->cappedTruncateAfter(
-                                                                       opCtx, loc, true);
-                                                               wunit.commit();
-                                                           });
+                                        writeConflictRetry(
+                                            opCtx,
+                                            "cappedTruncateAfter",
+                                            collection->ns().ns(),
+                                            [&] {
+                                                WriteUnitOfWork wunit(opCtx);
+                                                collection.getWritableCollection(opCtx)
+                                                    ->cappedTruncateAfter(opCtx, loc, true);
+                                                wunit.commit();
+                                            });
                                     } catch (const DBException& e) {
                                         if (e.code() == 13415) {
                                             // hack: need to just make cappedTruncate do this...
@@ -1828,7 +1828,7 @@ void rollback_internal::syncFixUp(OperationContext* opCtx,
                                                 opCtx, "truncate", collection->ns().ns(), [&] {
                                                     WriteUnitOfWork wunit(opCtx);
                                                     uassertStatusOK(
-                                                        collection.getWritableCollection()
+                                                        collection.getWritableCollection(opCtx)
                                                             ->truncate(opCtx));
                                                     wunit.commit();
                                                 });
@@ -2012,14 +2012,6 @@ void rollback_internal::syncFixUp(OperationContext* opCtx,
         validator->resetKeyManagerCache();
     }
 
-    // Force the config server to update its shard registry on next access. Otherwise it may have
-    // the stale data that has been just rolled back.
-    if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
-        if (auto shardRegistry = Grid::get(opCtx)->shardRegistry()) {
-            shardRegistry->clearEntries();
-        }
-    }
-
     // Force the default read/write concern cache to reload on next access in case the defaults
     // document was rolled back.
     ReadWriteConcernDefaults::get(opCtx).invalidate();
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 1a90e3a57c8..22d7c7648e4 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -50,6 +50,7 @@
 #include "mongo/db/catalog/database_holder.h"
 #include "mongo/db/catalog/document_validation.h"
 #include "mongo/db/catalog/index_catalog.h"
+#include "mongo/db/change_stream_change_collection_manager.h"
 #include "mongo/db/client.h"
 #include "mongo/db/concurrency/d_concurrency.h"
 #include "mongo/db/concurrency/exception_util.h"
@@ -323,12 +324,6 @@ template <typename AutoGetCollectionType>
 StatusWith<const CollectionPtr*> getCollection(const AutoGetCollectionType& autoGetCollection,
                                                const NamespaceStringOrUUID& nsOrUUID,
                                                const std::string& message) {
-    if (!autoGetCollection.getDb()) {
-        StringData dbName = nsOrUUID.nss() ? nsOrUUID.nss()->db() : nsOrUUID.dbname();
-        return {ErrorCodes::NamespaceNotFound,
-                str::stream() << "Database [" << dbName << "] not found. " << message};
-    }
-
     const auto& collection = autoGetCollection.getCollection();
     if (!collection) {
         return {ErrorCodes::NamespaceNotFound,
@@ -347,6 +342,8 @@ Status insertDocumentsSingleBatch(OperationContext* opCtx,
     boost::optional<AutoGetOplog> autoOplog;
     const CollectionPtr* collection;
 
+    bool shouldWriteToChangeCollections = false;
+
     auto nss = nsOrUUID.nss();
     if (nss && nss->isOplog()) {
         // Simplify locking rules for oplog collection.
@@ -355,6 +352,9 @@ Status insertDocumentsSingleBatch(OperationContext* opCtx,
         if (!*collection) {
             return {ErrorCodes::NamespaceNotFound, "Oplog collection does not exist"};
         }
+
+        shouldWriteToChangeCollections =
+            ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive();
     } else {
         autoColl.emplace(opCtx, nsOrUUID, MODE_IX);
         auto collectionResult = getCollection(
@@ -371,6 +371,18 @@ Status insertDocumentsSingleBatch(OperationContext* opCtx,
     if (!status.isOK()) {
         return status;
     }
+
+    // Insert oplog entries to change collections if we are running in the serverless and the 'nss'
+    // is 'local.oplog.rs'.
+    if (shouldWriteToChangeCollections) {
+        auto& changeCollectionManager = ChangeStreamChangeCollectionManager::get(opCtx);
+        status = changeCollectionManager.insertDocumentsToChangeCollection(
+            opCtx, begin, end, nullOpDebug);
+        if (!status.isOK()) {
+            return status;
+        }
+    }
+
     wunit.commit();
 
     return Status::OK();
diff --git a/src/mongo/db/repl/storage_interface_impl_test.cpp b/src/mongo/db/repl/storage_interface_impl_test.cpp
index 3c942ed7361..14362a56821 100644
--- a/src/mongo/db/repl/storage_interface_impl_test.cpp
+++ b/src/mongo/db/repl/storage_interface_impl_test.cpp
@@ -2684,7 +2684,6 @@ TEST_F(StorageInterfaceImplTest,
     auto doc = BSON("_id" << 0 << "x" << 1);
     auto status = storage.upsertById(opCtx, nss, doc["_id"], doc);
     ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, status);
-    ASSERT_EQUALS("Database [nosuchdb] not found. Unable to update document.", status.reason());
 }
 
 TEST_F(StorageInterfaceImplTest,
@@ -2879,10 +2878,6 @@ TEST_F(StorageInterfaceImplTest, DeleteByFilterReturnsNamespaceNotFoundWhenDatab
     auto filter = BSON("x" << 1);
     auto status = storage.deleteByFilter(opCtx, nss, filter);
     ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, status);
-    ASSERT_EQUALS(std::string(str::stream()
-                              << "Database [nosuchdb] not found. Unable to delete documents in "
-                              << nss.ns() << " using filter " << filter),
-                  status.reason());
 }
 
 TEST_F(StorageInterfaceImplTest, DeleteByFilterReturnsBadValueWhenFilterContainsUnknownOperator) {
diff --git a/src/mongo/db/repl/storage_timestamp_test.cpp b/src/mongo/db/repl/storage_timestamp_test.cpp
index cc0f88d0779..fb9325c1978 100644
--- a/src/mongo/db/repl/storage_timestamp_test.cpp
+++ b/src/mongo/db/repl/storage_timestamp_test.cpp
@@ -162,7 +162,7 @@ Status createIndexFromSpec(OperationContext* opCtx,
     }
     WriteUnitOfWork wunit(opCtx);
     ASSERT_OK(indexer.commit(opCtx,
-                             collection.getWritableCollection(),
+                             collection.getWritableCollection(opCtx),
                              MultiIndexBlock::kNoopOnCreateEachFn,
                              MultiIndexBlock::kNoopOnCommitFn));
     LogicalTime indexTs = clock->tickClusterTime(1);
@@ -394,7 +394,7 @@ public:
             // Timestamping index completion. Primaries write an oplog entry.
             ASSERT_OK(
                 indexer.commit(_opCtx,
-                               coll.getWritableCollection(),
+                               coll.getWritableCollection(_opCtx),
                                [&](const BSONObj& indexSpec) {
                                    _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
                                        _opCtx, coll->ns(), coll->uuid(), indexSpec, false);
@@ -2787,7 +2787,7 @@ TEST_F(StorageTimestampTest, IndexBuildsResolveErrorsDuringStateChangeToPrimary)
         WriteUnitOfWork wuow(_opCtx);
         ASSERT_OK(
             indexer.commit(_opCtx,
-                           collection.getWritableCollection(),
+                           collection.getWritableCollection(_opCtx),
                            [&](const BSONObj& indexSpec) {
                                _opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
                                    _opCtx, collection->ns(), collection->uuid(), indexSpec, false);
diff --git a/src/mongo/db/repl/tenant_collection_cloner.cpp b/src/mongo/db/repl/tenant_collection_cloner.cpp
index 9e6d5f7e02a..165538954bd 100644
--- a/src/mongo/db/repl/tenant_collection_cloner.cpp
+++ b/src/mongo/db/repl/tenant_collection_cloner.cpp
@@ -474,36 +474,42 @@ BaseCloner::AfterStageBehavior TenantCollectionCloner::queryStage() {
 }
 
 void TenantCollectionCloner::runQuery() {
-    const BSONObj& filter = _lastDocId.isEmpty()
-        ? BSONObj{}  // Use $expr and the aggregation version of $gt to avoid type bracketing.
-        : BSON("$expr" << BSON("$gt" << BSON_ARRAY("$_id" << _lastDocId["_id"])));
-
-    auto query = _collectionOptions.clusteredIndex
-        // RecordIds are _id values and has no separate _id index
-        ? Query().hint(BSON("$natural" << 1))
-        : Query().hint(BSON("_id" << 1));
-
-
-    // Any errors that are thrown here (including NamespaceNotFound) will be handled on the stage
-    // level.
-    getClient()->query_DEPRECATED(
-        [this](DBClientCursorBatchIterator& iter) { handleNextBatch(iter); },
-        _sourceDbAndUuid,
-        filter,
-        query,
-        nullptr /* fieldsToReturn */,
-        QueryOption_NoCursorTimeout | QueryOption_SecondaryOk |
-            (collectionClonerUsesExhaust ? QueryOption_Exhaust : 0),
-        _collectionClonerBatchSize,
-        ReadConcernArgs(ReadConcernLevel::kMajorityReadConcern).toBSONInner());
+    FindCommandRequest findCmd{_sourceDbAndUuid};
+
+    findCmd.setFilter(
+        _lastDocId.isEmpty()
+            ? BSONObj{}  // Use $expr and the aggregation version of $gt to avoid type bracketing.
+            : BSON("$expr" << BSON("$gt" << BSON_ARRAY("$_id" << _lastDocId["_id"]))));
+
+    if (_collectionOptions.clusteredIndex) {
+        findCmd.setHint(BSON("$natural" << 1));
+    } else {
+        findCmd.setHint(BSON("_id" << 1));
+    }
+
+    findCmd.setNoCursorTimeout(true);
+    findCmd.setReadConcern(ReadConcernArgs(ReadConcernLevel::kMajorityReadConcern).toBSONInner());
+    if (_collectionClonerBatchSize) {
+        findCmd.setBatchSize(_collectionClonerBatchSize);
+    }
+
+    ExhaustMode exhaustMode = collectionClonerUsesExhaust ? ExhaustMode::kOn : ExhaustMode::kOff;
+
+    auto cursor = getClient()->find(
+        std::move(findCmd), ReadPreferenceSetting{ReadPreference::SecondaryPreferred}, exhaustMode);
+
+    // Process the results of the cursor one batch at a time.
+    while (cursor->more()) {
+        handleNextBatch(*cursor);
+    }
 }
 
-void TenantCollectionCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
+void TenantCollectionCloner::handleNextBatch(DBClientCursor& cursor) {
     {
         stdx::lock_guard<Latch> lk(_mutex);
         _stats.receivedBatches++;
-        while (iter.moreInCurrentBatch()) {
-            _documentsToInsert.emplace_back(iter.nextSafe());
+        while (cursor.moreInCurrentBatch()) {
+            _documentsToInsert.emplace_back(cursor.nextSafe());
         }
     }
 
diff --git a/src/mongo/db/repl/tenant_collection_cloner.h b/src/mongo/db/repl/tenant_collection_cloner.h
index b9c22928917..12bd9bbb832 100644
--- a/src/mongo/db/repl/tenant_collection_cloner.h
+++ b/src/mongo/db/repl/tenant_collection_cloner.h
@@ -209,10 +209,10 @@ private:
     AfterStageBehavior queryStage();
 
     /**
-     * Put all results from a query batch into a buffer to be inserted, and schedule
-     * it to be inserted.
+     * Put all results from a query batch into a buffer to be inserted, and schedule it to be
+     * inserted.
      */
-    void handleNextBatch(DBClientCursorBatchIterator& iter);
+    void handleNextBatch(DBClientCursor& cursor);
 
     /**
      * Called whenever there is a new batch of documents ready from the DBClientConnection.
diff --git a/src/mongo/db/repl/tenant_file_cloner.cpp b/src/mongo/db/repl/tenant_file_cloner.cpp
index 83ae3c65fc8..b909039eed1 100644
--- a/src/mongo/db/repl/tenant_file_cloner.cpp
+++ b/src/mongo/db/repl/tenant_file_cloner.cpp
@@ -188,8 +188,7 @@ void TenantFileCloner::runQuery() {
         getClient(), std::move(aggRequest), true /* secondaryOk */, useExhaust));
     try {
         while (cursor->more()) {
-            DBClientCursorBatchIterator iter(*cursor);
-            handleNextBatch(iter);
+            handleNextBatch(*cursor);
         }
     } catch (const DBException& e) {
         // We cannot continue after an error when processing exhaust cursors. Instead we must
@@ -207,7 +206,7 @@ void TenantFileCloner::runQuery() {
     }
 }
 
-void TenantFileCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
+void TenantFileCloner::handleNextBatch(DBClientCursor& cursor) {
     LOGV2_DEBUG(6113307,
                 3,
                 "TenantFileCloner handleNextBatch",
@@ -215,7 +214,7 @@ void TenantFileCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
                 "backupId"_attr = _backupId,
                 "remoteFile"_attr = _remoteFileName,
                 "fileOffset"_attr = getFileOffset(),
-                "moreInCurrentBatch"_attr = iter.moreInCurrentBatch());
+                "moreInCurrentBatch"_attr = cursor.moreInCurrentBatch());
     {
         stdx::lock_guard<TenantMigrationSharedData> lk(*getSharedData());
         if (!getSharedData()->getStatus(lk).isOK()) {
@@ -225,11 +224,11 @@ void TenantFileCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
                       str::stream() << message << ": " << getSharedData()->getStatus(lk));
         }
     }
-    while (iter.moreInCurrentBatch()) {
+    while (cursor.moreInCurrentBatch()) {
         stdx::lock_guard<Latch> lk(_mutex);
         _stats.receivedBatches++;
-        while (iter.moreInCurrentBatch()) {
-            _dataToWrite.emplace_back(iter.nextSafe());
+        while (cursor.moreInCurrentBatch()) {
+            _dataToWrite.emplace_back(cursor.nextSafe());
         }
     }
 
diff --git a/src/mongo/db/repl/tenant_file_cloner.h b/src/mongo/db/repl/tenant_file_cloner.h
index 90e37946224..27ff89fbc3a 100644
--- a/src/mongo/db/repl/tenant_file_cloner.h
+++ b/src/mongo/db/repl/tenant_file_cloner.h
@@ -160,7 +160,7 @@ private:
     /**
      * Put all results from a query batch into a buffer, and schedule it to be written to disk.
      */
-    void handleNextBatch(DBClientCursorBatchIterator& iter);
+    void handleNextBatch(DBClientCursor& cursor);
 
     /**
      * Called whenever there is a new batch of documents ready from the DBClientConnection.
diff --git a/src/mongo/db/repl/tenant_file_importer_service.cpp b/src/mongo/db/repl/tenant_file_importer_service.cpp
index 85d95d7e22d..af565c3c713 100644
--- a/src/mongo/db/repl/tenant_file_importer_service.cpp
+++ b/src/mongo/db/repl/tenant_file_importer_service.cpp
@@ -118,14 +118,21 @@ TenantFileImporterService* TenantFileImporterService::get(ServiceContext* servic
 void TenantFileImporterService::startMigration(const UUID& migrationId,
                                                const StringData& donorConnectionString) {
     stdx::lock_guard lk(_mutex);
+    if (migrationId == _migrationId && _state >= State::kStarted && _state < State::kInterrupted) {
+        return;
+    }
+
     _reset(lk);
     _migrationId = migrationId;
     _donorConnectionString = donorConnectionString.toString();
-    _eventQueue = std::make_unique<Queue>();
-    _state.setState(ImporterState::State::kStarted);
+    _eventQueue = std::make_shared<Queue>();
+    _state = State::kStarted;
 
-    _thread = std::make_unique<stdx::thread>([this] {
+    _thread = std::make_unique<stdx::thread>([this, migrationId] {
         Client::initThread("TenantFileImporterService");
+        LOGV2_INFO(6378904,
+                   "TenantFileImporterService starting worker thread",
+                   "migrationId"_attr = migrationId.toString());
         auto opCtx = cc().makeOperationContext();
         _handleEvents(opCtx.get());
     });
@@ -134,48 +141,55 @@ void TenantFileImporterService::startMigration(const UUID& migrationId,
 void TenantFileImporterService::learnedFilename(const UUID& migrationId,
                                                 const BSONObj& metadataDoc) {
     stdx::lock_guard lk(_mutex);
+    if (migrationId == _migrationId && _state >= State::kLearnedAllFilenames) {
+        return;
+    }
+
     tassert(8423347,
             "Called learnedFilename with migrationId {}, but {} is active"_format(
                 migrationId.toString(), _migrationId ? _migrationId->toString() : "no migration"),
             migrationId == _migrationId);
 
-    _state.setState(ImporterState::State::kLearnedFilename);
+    _state = State::kLearnedFilename;
     ImporterEvent event{ImporterEvent::Type::kLearnedFileName, migrationId};
     event.metadataDoc = metadataDoc.getOwned();
+    invariant(_eventQueue);
     auto success = _eventQueue->tryPush(std::move(event));
 
-    uassert(6378904,
+    uassert(6378903,
             "TenantFileImporterService failed to push '{}' event without blocking"_format(
-                _state.toString()),
+                stateToString(_state)),
             success);
 }
 
 void TenantFileImporterService::learnedAllFilenames(const UUID& migrationId) {
     stdx::lock_guard lk(_mutex);
+    if (migrationId == _migrationId && _state >= State::kLearnedAllFilenames) {
+        return;
+    }
+
     tassert(8423345,
             "Called learnedAllFilenames with migrationId {}, but {} is active"_format(
                 migrationId.toString(), _migrationId ? _migrationId->toString() : "no migration"),
             migrationId == _migrationId);
 
-    _state.setState(ImporterState::State::kLearnedAllFilenames);
+    _state = State::kLearnedAllFilenames;
+    invariant(_eventQueue);
     auto success = _eventQueue->tryPush({ImporterEvent::Type::kLearnedAllFilenames, migrationId});
-    uassert(6378905,
+    uassert(6378902,
             "TenantFileImporterService failed to push '{}' event without blocking"_format(
-                _state.toString()),
+                stateToString(_state)),
             success);
 }
 
 void TenantFileImporterService::interrupt(const UUID& migrationId) {
     stdx::lock_guard lk(_mutex);
-    if (!_migrationId) {
-        return;
-    }
     if (migrationId != _migrationId) {
         LOGV2_WARNING(
-            6378907,
+            6378901,
             "Called interrupt with migrationId {migrationId}, but {activeMigrationId} is active",
             "migrationId"_attr = migrationId.toString(),
-            "activeMigrationId"_attr = _migrationId->toString());
+            "activeMigrationId"_attr = _migrationId ? _migrationId->toString() : "no migration");
         return;
     }
     _interrupt(lk);
@@ -195,8 +209,11 @@ void TenantFileImporterService::_handleEvents(OperationContext* opCtx) {
     std::string donorConnectionString;
     boost::optional<UUID> migrationId;
 
+    std::shared_ptr<Queue> eventQueueRef;
     {
         stdx::lock_guard lk(_mutex);
+        invariant(_eventQueue);
+        eventQueueRef = _eventQueue;
         donorConnectionString = _donorConnectionString;
         migrationId = _migrationId;
     }
@@ -206,9 +223,9 @@ void TenantFileImporterService::_handleEvents(OperationContext* opCtx) {
         opCtx->checkForInterrupt();
 
         try {
-            event = _eventQueue->pop(opCtx);
+            event = eventQueueRef->pop(opCtx);
         } catch (const ExceptionFor<ErrorCodes::ProducerConsumerQueueEndClosed>& err) {
-            LOGV2_WARNING(6378908, "Event queue was interrupted", "error"_attr = err);
+            LOGV2_WARNING(6378900, "Event queue was interrupted", "error"_attr = err);
             break;
         }
 
@@ -259,7 +276,7 @@ void TenantFileImporterService::_voteImportedFiles(OperationContext* opCtx) {
 }
 
 void TenantFileImporterService::_interrupt(WithLock) {
-    if (_state.is(ImporterState::State::kInterrupted)) {
+    if (_state == State::kInterrupted) {
         return;
     }
 
@@ -276,11 +293,16 @@ void TenantFileImporterService::_interrupt(WithLock) {
         // _opCtx->markKilled(ErrorCodes::Interrupted);
     }
 
-    _state.setState(ImporterState::State::kInterrupted);
+    _state = State::kInterrupted;
 }
 
 void TenantFileImporterService::_reset(WithLock) {
-    _migrationId.reset();
+    if (_migrationId) {
+        LOGV2_INFO(6378905,
+                   "TenantFileImporterService resetting migration",
+                   "migrationId"_attr = _migrationId->toString());
+        _migrationId.reset();
+    }
 
     if (_thread && _thread->joinable()) {
         _thread->join();
@@ -292,6 +314,6 @@ void TenantFileImporterService::_reset(WithLock) {
     }
 
     // TODO SERVER-66907: how should we be resetting _opCtx?
-    _state.setState(ImporterState::State::kUninitialized);
+    _state = State::kUninitialized;
 }
 }  // namespace mongo::repl
diff --git a/src/mongo/db/repl/tenant_file_importer_service.h b/src/mongo/db/repl/tenant_file_importer_service.h
index 9a27af816da..d7188f9a0e6 100644
--- a/src/mongo/db/repl/tenant_file_importer_service.h
+++ b/src/mongo/db/repl/tenant_file_importer_service.h
@@ -82,75 +82,35 @@ private:
     boost::optional<UUID> _migrationId;
     std::string _donorConnectionString;
     Mutex _mutex = MONGO_MAKE_LATCH("TenantFileImporterService::_mutex");
-    class ImporterState {
-    public:
-        enum class State {
-            kUninitialized,
-            kStarted,
-            kLearnedFilename,
-            kLearnedAllFilenames,
-            kInterrupted
-        };
-        void setState(State nextState) {
-            tassert(6114403,
-                    str::stream() << "current state: " << toString(_state)
-                                  << ", new state: " << toString(nextState),
-                    isValidTransition(nextState));
-            _state = nextState;
-        }
-
-        bool is(State state) const {
-            return _state == state;
-        }
-
-        StringData toString() const {
-            return toString(_state);
-        }
 
-    private:
-        static StringData toString(State value) {
-            switch (value) {
-                case State::kUninitialized:
-                    return "uninitialized";
-                case State::kStarted:
-                    return "started";
-                case State::kLearnedFilename:
-                    return "learned filename";
-                case State::kLearnedAllFilenames:
-                    return "learned all filenames";
-                case State::kInterrupted:
-                    return "interrupted";
-            }
-            MONGO_UNREACHABLE;
-            return StringData();
-        }
+    // Explicit State enum ordering defined here because we rely on comparison
+    // operators for state checking in various TenantFileImporterService methods.
+    enum class State {
+        kUninitialized = 0,
+        kStarted = 1,
+        kLearnedFilename = 2,
+        kLearnedAllFilenames = 3,
+        kInterrupted = 4
+    };
 
-        bool isValidTransition(State newState) {
-            if (_state == newState) {
-                return true;
-            }
-
-            switch (_state) {
-                case State::kUninitialized:
-                    return newState == State::kStarted || newState == State::kInterrupted;
-                case State::kStarted:
-                    return newState == State::kInterrupted || newState == State::kLearnedFilename ||
-                        newState == State::kLearnedAllFilenames;
-                case State::kLearnedFilename:
-                    return newState == State::kInterrupted || newState == State::kLearnedFilename ||
-                        newState == State::kLearnedAllFilenames;
-                case State::kLearnedAllFilenames:
-                    return newState == State::kInterrupted;
-                case State::kInterrupted:
-                    return newState == State::kUninitialized || newState == State::kStarted;
-            }
-            MONGO_UNREACHABLE;
+    static StringData stateToString(State state) {
+        switch (state) {
+            case State::kUninitialized:
+                return "uninitialized";
+            case State::kStarted:
+                return "started";
+            case State::kLearnedFilename:
+                return "learned filename";
+            case State::kLearnedAllFilenames:
+                return "learned all filenames";
+            case State::kInterrupted:
+                return "interrupted";
         }
+        MONGO_UNREACHABLE;
+        return StringData();
+    }
 
-        State _state = State::kUninitialized;
-    };
-
-    ImporterState _state;
+    State _state;
 
     struct ImporterEvent {
         enum class Type { kNone, kLearnedFileName, kLearnedAllFilenames };
@@ -166,6 +126,6 @@ private:
         MultiProducerSingleConsumerQueue<ImporterEvent,
                                          producer_consumer_queue_detail::DefaultCostFunction>;
 
-    std::unique_ptr<Queue> _eventQueue;
+    std::shared_ptr<Queue> _eventQueue;
 };
 }  // namespace mongo::repl
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
index 53e7b24f135..fc693f64c20 100644
--- a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
+++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
@@ -437,7 +437,7 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) {
 
     // Recover TenantMigrationDonorAccessBlockers for ShardSplit.
     PersistentTaskStore<ShardSplitDonorDocument> shardSplitDonorStore(
-        NamespaceString::kTenantSplitDonorsNamespace);
+        NamespaceString::kShardSplitDonorsNamespace);
 
     shardSplitDonorStore.forEach(opCtx, {}, [&](const ShardSplitDonorDocument& doc) {
         // Skip creating a TenantMigrationDonorAccessBlocker for terminal shard split that have been
@@ -462,6 +462,8 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) {
                 .add(tenantId.toString(), mtab);
 
             switch (doc.getState()) {
+                case ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+                    break;
                 case ShardSplitDonorStateEnum::kBlocking:
                     invariant(doc.getBlockTimestamp());
                     mtab->startBlockingWrites();
diff --git a/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp b/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
index 34700086793..4cfdb60b43c 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
@@ -282,11 +282,11 @@ void TenantMigrationRecipientOpObserver::onDelete(OperationContext* opCtx,
     if (nss == NamespaceString::kTenantMigrationRecipientsNamespace &&
         !tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
         if (tenantIdToDeleteDecoration(opCtx)) {
+            auto tenantId = tenantIdToDeleteDecoration(opCtx).get();
             LOGV2_INFO(8423337, "Removing expired 'multitenant migration' migration");
-            opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) {
+            opCtx->recoveryUnit()->onCommit([opCtx, tenantId](boost::optional<Timestamp>) {
                 TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
-                    .remove(tenantIdToDeleteDecoration(opCtx).get(),
-                            TenantMigrationAccessBlocker::BlockerType::kRecipient);
+                    .remove(tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
             });
         }
 
@@ -297,8 +297,7 @@ void TenantMigrationRecipientOpObserver::onDelete(OperationContext* opCtx,
                        "migrationId"_attr = migrationId);
             opCtx->recoveryUnit()->onCommit([opCtx, migrationId](boost::optional<Timestamp>) {
                 TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
-                    .removeRecipientAccessBlockersForMigration(
-                        migrationIdToDeleteDecoration(opCtx).get());
+                    .removeRecipientAccessBlockersForMigration(migrationId);
                 repl::TenantFileImporterService::get(opCtx->getServiceContext())
                     ->interrupt(migrationId);
             });
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index facaf190ab8..f355b7a3ac6 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -43,6 +43,7 @@
 #include "mongo/db/commands/tenant_migration_donor_cmds_gen.h"
 #include "mongo/db/commands/test_commands_enabled.h"
 #include "mongo/db/concurrency/exception_util.h"
+#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
 #include "mongo/db/db_raii.h"
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/namespace_string.h"
@@ -213,7 +214,7 @@ public:
 
     // Tenant migration does not require the metadata from the oplog query.
     void processMetadata(const rpc::ReplSetMetadata& replMetadata,
-                         rpc::OplogQueryMetadata oqMetadata) final {}
+                         const rpc::OplogQueryMetadata& oqMetadata) final {}
 
     // Tenant migration does not change sync source depending on metadata.
     ChangeSyncSourceAction shouldStopFetching(const HostAndPort& source,
@@ -2516,7 +2517,8 @@ void TenantMigrationRecipientService::Instance::_startOplogApplier() {
 }
 
 void TenantMigrationRecipientService::Instance::_setup() {
-    auto opCtx = cc().makeOperationContext();
+    auto uniqueOpCtx = cc().makeOperationContext();
+    auto opCtx = uniqueOpCtx.get();
     {
         stdx::lock_guard lk(_mutex);
         // Do not set the internal states if the migration is already interrupted.
@@ -2543,12 +2545,23 @@ void TenantMigrationRecipientService::Instance::_setup() {
         _sharedData = std::make_unique<TenantMigrationSharedData>(
             getGlobalServiceContext()->getFastClockSource(), getMigrationUUID(), resumePhase);
 
-        _createOplogBuffer(lk, opCtx.get());
+        _createOplogBuffer(lk, opCtx);
     }
 
     // Start the oplog buffer outside the mutex to avoid deadlock on a concurrent stepdown.
     try {
-        _donorOplogBuffer->startup(opCtx.get());
+        // It is illegal to start the replicated donor buffer when the node is not primary.
+        // So ensure we are primary before trying to startup the oplog buffer.
+        repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX);
+
+        auto oplogBufferNS = getOplogBufferNs(getMigrationUUID());
+        if (!repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(
+                opCtx, oplogBufferNS.db())) {
+            uassertStatusOK(
+                Status(ErrorCodes::NotWritablePrimary, "Recipient node is no longer a primary."));
+        }
+
+        _donorOplogBuffer->startup(opCtx);
     } catch (DBException& ex) {
         ex.addContext("Failed to create oplog buffer collection.");
         throw;
diff --git a/src/mongo/db/repl/tenant_oplog_applier_test.cpp b/src/mongo/db/repl/tenant_oplog_applier_test.cpp
index 4215b04043a..864960d84d7 100644
--- a/src/mongo/db/repl/tenant_oplog_applier_test.cpp
+++ b/src/mongo/db/repl/tenant_oplog_applier_test.cpp
@@ -201,10 +201,14 @@ private:
         logv2::LogComponent::kTenantMigration, logv2::LogSeverity::Debug(1)};
 };
 
+// TODO SERVER-67155 Remove all calls to DatabaseName::toStringWithTenantId() once the OplogEntry
+// deserializer passes "tid" to the NamespaceString constructor
 TEST_F(TenantOplogApplierTest, NoOpsForSingleBatch) {
     std::vector<OplogEntry> srcOps;
-    srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
-    srcOps.push_back(makeInsertOplogEntry(2, NamespaceString(_dbName, "bar"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        2, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
     pushOps(srcOps);
 
     auto writerPool = makeTenantMigrationWriterPool();
@@ -235,7 +239,8 @@ TEST_F(TenantOplogApplierTest, NoOpsForLargeBatch) {
     std::vector<OplogEntry> srcOps;
     // This should be big enough to use several threads to do the writing
     for (int i = 0; i < 64; i++) {
-        srcOps.push_back(makeInsertOplogEntry(i + 1, NamespaceString(_dbName, "foo"), UUID::gen()));
+        srcOps.push_back(makeInsertOplogEntry(
+            i + 1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
     }
     pushOps(srcOps);
 
@@ -266,10 +271,14 @@ TEST_F(TenantOplogApplierTest, NoOpsForLargeBatch) {
 
 TEST_F(TenantOplogApplierTest, NoOpsForMultipleBatches) {
     std::vector<OplogEntry> srcOps;
-    srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
-    srcOps.push_back(makeInsertOplogEntry(2, NamespaceString(_dbName, "bar"), UUID::gen()));
-    srcOps.push_back(makeInsertOplogEntry(3, NamespaceString(_dbName, "baz"), UUID::gen()));
-    srcOps.push_back(makeInsertOplogEntry(4, NamespaceString(_dbName, "bif"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        2, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        3, NamespaceString(_dbName.toStringWithTenantId(), "baz"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        4, NamespaceString(_dbName.toStringWithTenantId(), "bif"), UUID::gen()));
 
     auto writerPool = makeTenantMigrationWriterPool();
 
@@ -305,14 +314,20 @@ TEST_F(TenantOplogApplierTest, NoOpsForMultipleBatches) {
 
 TEST_F(TenantOplogApplierTest, NoOpsForLargeTransaction) {
     std::vector<OplogEntry> innerOps1;
-    innerOps1.push_back(makeInsertOplogEntry(11, NamespaceString(_dbName, "bar"), UUID::gen()));
-    innerOps1.push_back(makeInsertOplogEntry(12, NamespaceString(_dbName, "bar"), UUID::gen()));
+    innerOps1.push_back(makeInsertOplogEntry(
+        11, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
+    innerOps1.push_back(makeInsertOplogEntry(
+        12, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
     std::vector<OplogEntry> innerOps2;
-    innerOps2.push_back(makeInsertOplogEntry(21, NamespaceString(_dbName, "bar"), UUID::gen()));
-    innerOps2.push_back(makeInsertOplogEntry(22, NamespaceString(_dbName, "bar"), UUID::gen()));
+    innerOps2.push_back(makeInsertOplogEntry(
+        21, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
+    innerOps2.push_back(makeInsertOplogEntry(
+        22, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
     std::vector<OplogEntry> innerOps3;
-    innerOps3.push_back(makeInsertOplogEntry(31, NamespaceString(_dbName, "bar"), UUID::gen()));
-    innerOps3.push_back(makeInsertOplogEntry(32, NamespaceString(_dbName, "bar"), UUID::gen()));
+    innerOps3.push_back(makeInsertOplogEntry(
+        31, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
+    innerOps3.push_back(makeInsertOplogEntry(
+        32, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
 
     // Makes entries with ts from range [2, 5).
     std::vector<OplogEntry> srcOps = makeMultiEntryTransactionOplogEntries(
@@ -353,7 +368,7 @@ TEST_F(TenantOplogApplierTest, CommitUnpreparedTransaction_DataPartiallyApplied)
         client.createIndexes(NamespaceString::kSessionTransactionsTableNamespace.ns(),
                              {MongoDSessionCatalog::getConfigTxnPartialIndexSpec()});
     }
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto lsid = makeLogicalSessionId(_opCtx.get());
     TxnNumber txnNum(0);
@@ -411,7 +426,8 @@ TEST_F(TenantOplogApplierTest, CommitUnpreparedTransaction_DataPartiallyApplied)
 }
 
 TEST_F(TenantOplogApplierTest, ApplyInsert_DatabaseMissing) {
-    auto entry = makeInsertOplogEntry(1, NamespaceString(_dbName, "bar"), UUID::gen());
+    auto entry = makeInsertOplogEntry(
+        1, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen());
     bool onInsertsCalled = false;
     _opObserver->onInsertsFn = [&](OperationContext* opCtx,
                                    const NamespaceString&,
@@ -439,7 +455,8 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_DatabaseMissing) {
 
 TEST_F(TenantOplogApplierTest, ApplyInsert_CollectionMissing) {
     createDatabase(_opCtx.get(), _dbName.toString());
-    auto entry = makeInsertOplogEntry(1, NamespaceString(_dbName, "bar"), UUID::gen());
+    auto entry = makeInsertOplogEntry(
+        1, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen());
     bool onInsertsCalled = false;
     _opObserver->onInsertsFn = [&](OperationContext* opCtx,
                                    const NamespaceString&,
@@ -466,7 +483,7 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_CollectionMissing) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyInsert_InsertExisting) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     ASSERT_OK(getStorageInterface()->insertDocument(_opCtx.get(),
                                                     nss,
@@ -504,7 +521,7 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_InsertExisting) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyInsert_UniqueKey_InsertExisting) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
 
     // Create unique key index on the collection.
@@ -545,7 +562,7 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_UniqueKey_InsertExisting) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyInsert_Success) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto entry = makeInsertOplogEntry(1, nss, uuid);
     bool onInsertsCalled = false;
@@ -553,7 +570,9 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_Success) {
         [&](OperationContext* opCtx, const NamespaceString& nss, const std::vector<BSONObj>& docs) {
             ASSERT_FALSE(onInsertsCalled);
             onInsertsCalled = true;
-            ASSERT_EQUALS(nss.db(), _dbName.toString());
+            // TODO Check that (nss.dbName() == _dbName) once the OplogEntry deserializer passes
+            // "tid" to the NamespaceString constructor
+            ASSERT_EQUALS(nss.dbName().db(), _dbName.toStringWithTenantId());
             ASSERT_EQUALS(nss.coll(), "bar");
             ASSERT_EQUALS(1, docs.size());
             ASSERT_BSONOBJ_EQ(docs[0], entry.getObject());
@@ -581,9 +600,9 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_Success) {
 TEST_F(TenantOplogApplierTest, ApplyInserts_Grouped) {
     // TODO(SERVER-50256): remove nss_workaround, which is used to work around a bug where
     // the first operation assigned to a worker cannot be grouped.
-    NamespaceString nss_workaround(_dbName, "a");
-    NamespaceString nss1(_dbName, "bar");
-    NamespaceString nss2(_dbName, "baz");
+    NamespaceString nss_workaround(_dbName.toStringWithTenantId(), "a");
+    NamespaceString nss1(_dbName.toStringWithTenantId(), "bar");
+    NamespaceString nss2(_dbName.toStringWithTenantId(), "baz");
     auto uuid1 = createCollectionWithUuid(_opCtx.get(), nss1);
     auto uuid2 = createCollectionWithUuid(_opCtx.get(), nss2);
     std::vector<OplogEntry> entries;
@@ -641,7 +660,7 @@ TEST_F(TenantOplogApplierTest, ApplyInserts_Grouped) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyUpdate_MissingDocument) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto entry = makeOplogEntry(
         repl::OpTypeEnum::kUpdate, nss, uuid, BSON("$set" << BSON("a" << 1)), BSON("_id" << 0));
@@ -676,7 +695,7 @@ TEST_F(TenantOplogApplierTest, ApplyUpdate_MissingDocument) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyUpdate_Success) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     ASSERT_OK(getStorageInterface()->insertDocument(_opCtx.get(), nss, {BSON("_id" << 0)}, 0));
     auto entry = makeOplogEntry(
@@ -708,7 +727,8 @@ TEST_F(TenantOplogApplierTest, ApplyUpdate_Success) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyDelete_DatabaseMissing) {
-    auto entry = makeOplogEntry(OpTypeEnum::kDelete, NamespaceString(_dbName, "bar"), UUID::gen());
+    auto entry = makeOplogEntry(
+        OpTypeEnum::kDelete, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen());
     bool onDeleteCalled = false;
     _opObserver->onDeleteFn = [&](OperationContext* opCtx,
                                   const NamespaceString&,
@@ -738,7 +758,8 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_DatabaseMissing) {
 
 TEST_F(TenantOplogApplierTest, ApplyDelete_CollectionMissing) {
     createDatabase(_opCtx.get(), _dbName.toString());
-    auto entry = makeOplogEntry(OpTypeEnum::kDelete, NamespaceString(_dbName, "bar"), UUID::gen());
+    auto entry = makeOplogEntry(
+        OpTypeEnum::kDelete, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen());
     bool onDeleteCalled = false;
     _opObserver->onDeleteFn = [&](OperationContext* opCtx,
                                   const NamespaceString&,
@@ -767,7 +788,7 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_CollectionMissing) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyDelete_DocumentMissing) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto entry = makeOplogEntry(OpTypeEnum::kDelete, nss, uuid, BSON("_id" << 0));
     bool onDeleteCalled = false;
@@ -798,7 +819,7 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_DocumentMissing) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyDelete_Success) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     ASSERT_OK(getStorageInterface()->insertDocument(_opCtx.get(), nss, {BSON("_id" << 0)}, 0));
     auto entry = makeOplogEntry(OpTypeEnum::kDelete, nss, uuid, BSON("_id" << 0));
@@ -814,7 +835,9 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_Success) {
         ASSERT_TRUE(opCtx->lockState()->isCollectionLockedForMode(nss, MODE_IX));
         ASSERT_TRUE(opCtx->writesAreReplicated());
         ASSERT_FALSE(args.fromMigrate);
-        ASSERT_EQUALS(nss.db(), _dbName.toString());
+        // TODO SERVER-66708 Check that (nss.dbName() == _dbName) once the OplogEntry deserializer
+        // passes "tid" to the NamespaceString constructor
+        ASSERT_EQUALS(nss.dbName().db(), _dbName.toStringWithTenantId());
         ASSERT_EQUALS(nss.coll(), "bar");
         ASSERT_EQUALS(uuid, observer_uuid);
     };
@@ -839,7 +862,7 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_Success) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_CollExisting) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto op = BSON("op"
                    << "c"
@@ -874,8 +897,8 @@ TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_CollExisting) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyRenameCollCommand_CollExisting) {
-    NamespaceString nss1(_dbName, "foo");
-    NamespaceString nss2(_dbName, "bar");
+    NamespaceString nss1(_dbName.toStringWithTenantId(), "foo");
+    NamespaceString nss2(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss2);
     auto op =
         BSON("op"
@@ -914,7 +937,7 @@ TEST_F(TenantOplogApplierTest, ApplyRenameCollCommand_CollExisting) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_Success) {
-    NamespaceString nss(_dbName, "t");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "t");
     auto op =
         BSON("op"
              << "c"
@@ -954,7 +977,7 @@ TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_Success) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyCreateIndexesCommand_Success) {
-    NamespaceString nss(_dbName, "t");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "t");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto op =
         BSON("op"
@@ -1001,7 +1024,7 @@ TEST_F(TenantOplogApplierTest, ApplyCreateIndexesCommand_Success) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyStartIndexBuildCommand_Failure) {
-    NamespaceString nss(_dbName, "t");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "t");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto op = BSON("op"
                    << "c"
@@ -1066,7 +1089,7 @@ TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_WrongNSS) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyDropIndexesCommand_IndexNotFound) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto op = BSON("op"
                    << "c"
@@ -1104,7 +1127,7 @@ TEST_F(TenantOplogApplierTest, ApplyDropIndexesCommand_IndexNotFound) {
 }
 
 TEST_F(TenantOplogApplierTest, ApplyCollModCommand_IndexNotFound) {
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
     auto op = BSON("op"
                    << "c"
@@ -1148,7 +1171,7 @@ TEST_F(TenantOplogApplierTest, ApplyCollModCommand_IndexNotFound) {
 
 TEST_F(TenantOplogApplierTest, ApplyCollModCommand_CollectionMissing) {
     createDatabase(_opCtx.get(), _dbName.toString());
-    NamespaceString nss(_dbName, "bar");
+    NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
     UUID uuid(UUID::gen());
     auto op = BSON("op"
                    << "c"
@@ -1312,7 +1335,8 @@ TEST_F(TenantOplogApplierTest, ApplyResumeTokenNoop_Success) {
 
 TEST_F(TenantOplogApplierTest, ApplyInsertThenResumeTokenNoopInDifferentBatch_Success) {
     std::vector<OplogEntry> srcOps;
-    srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
     srcOps.push_back(makeNoopOplogEntry(2, TenantMigrationRecipientService::kNoopMsg));
     pushOps(srcOps);
     auto writerPool = makeTenantMigrationWriterPool();
@@ -1349,7 +1373,8 @@ TEST_F(TenantOplogApplierTest, ApplyInsertThenResumeTokenNoopInDifferentBatch_Su
 TEST_F(TenantOplogApplierTest, ApplyResumeTokenNoopThenInsertInSameBatch_Success) {
     std::vector<OplogEntry> srcOps;
     srcOps.push_back(makeNoopOplogEntry(1, TenantMigrationRecipientService::kNoopMsg));
-    srcOps.push_back(makeInsertOplogEntry(2, NamespaceString(_dbName, "foo"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        2, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
     pushOps(srcOps);
     auto writerPool = makeTenantMigrationWriterPool();
 
@@ -1380,7 +1405,8 @@ TEST_F(TenantOplogApplierTest, ApplyResumeTokenNoopThenInsertInSameBatch_Success
 
 TEST_F(TenantOplogApplierTest, ApplyResumeTokenInsertThenNoopSameTimestamp_Success) {
     std::vector<OplogEntry> srcOps;
-    srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
     srcOps.push_back(makeNoopOplogEntry(1, TenantMigrationRecipientService::kNoopMsg));
     pushOps(srcOps);
     ASSERT_EQ(srcOps[0].getOpTime(), srcOps[1].getOpTime());
@@ -1413,7 +1439,8 @@ TEST_F(TenantOplogApplierTest, ApplyResumeTokenInsertThenNoopSameTimestamp_Succe
 
 TEST_F(TenantOplogApplierTest, ApplyResumeTokenInsertThenNoop_Success) {
     std::vector<OplogEntry> srcOps;
-    srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
+    srcOps.push_back(makeInsertOplogEntry(
+        1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
     srcOps.push_back(makeNoopOplogEntry(2, TenantMigrationRecipientService::kNoopMsg));
     pushOps(srcOps);
     auto writerPool = makeTenantMigrationWriterPool();
@@ -1445,8 +1472,8 @@ TEST_F(TenantOplogApplierTest, ApplyResumeTokenInsertThenNoop_Success) {
 
 TEST_F(TenantOplogApplierTest, ApplyInsert_MultiKeyIndex) {
     createCollectionWithUuid(_opCtx.get(), NamespaceString::kSessionTransactionsTableNamespace);
-    NamespaceString indexedNss(_dbName, "indexedColl");
-    NamespaceString nonIndexedNss(_dbName, "nonIndexedColl");
+    NamespaceString indexedNss(_dbName.toStringWithTenantId(), "indexedColl");
+    NamespaceString nonIndexedNss(_dbName.toStringWithTenantId(), "nonIndexedColl");
     auto indexedCollUUID = createCollectionWithUuid(_opCtx.get(), indexedNss);
     createCollection(_opCtx.get(), nonIndexedNss, CollectionOptions());
 
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 7f30b7b113d..c72bb2ddfb3 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -1364,14 +1364,14 @@ void TopologyCoordinator::setMyLastDurableOpTimeAndWallTime(OpTimeAndWallTime op
     myMemberData.setLastDurableOpTimeAndWallTime(opTimeAndWallTime, now);
 }
 
-StatusWith<bool> TopologyCoordinator::setLastOptime(const UpdatePositionArgs::UpdateInfo& args,
-                                                    Date_t now) {
+StatusWith<bool> TopologyCoordinator::setLastOptimeForMember(
+    const UpdatePositionArgs::UpdateInfo& args, Date_t now) {
     if (_selfIndex == -1) {
         // Ignore updates when we're in state REMOVED.
         return Status(ErrorCodes::NotPrimaryOrSecondary,
                       "Received replSetUpdatePosition command but we are in state REMOVED");
     }
-    invariant(_rsConfig.isInitialized());  // Can only use setLastOptime in replSet mode.
+    invariant(_rsConfig.isInitialized());  // Can only use setLastOptimeForMember in replSet mode.
 
     MemberId memberId;
     try {
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index fb9f7a196f7..3285a5b4825 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -585,7 +585,7 @@ public:
      * Returns a Status if the position could not be set, false if the last optimes for the node
      * did not change, or true if either the last applied or last durable optime did change.
      */
-    StatusWith<bool> setLastOptime(const UpdatePositionArgs::UpdateInfo& args, Date_t now);
+    StatusWith<bool> setLastOptimeForMember(const UpdatePositionArgs::UpdateInfo& args, Date_t now);
 
     /**
      * Sets the latest optime committed in the previous config to the current lastCommitted optime.
diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h
index 16e1bbeb34c..fadcc67896b 100644
--- a/src/mongo/db/repl_index_build_state.h
+++ b/src/mongo/db/repl_index_build_state.h
@@ -302,8 +302,6 @@ public:
 
     /**
      * Called when commit quorum is satisfied.
-     * Invokes 'onCommitQuorumSatisfied' if state is successfully transitioned to commit quorum
-     * satisfied.
      */
     void setCommitQuorumSatisfied(OperationContext* opCtx);
 
diff --git a/src/mongo/db/s/README.md b/src/mongo/db/s/README.md
index b7d8bdff562..f3e67bce8b8 100644
--- a/src/mongo/db/s/README.md
+++ b/src/mongo/db/s/README.md
@@ -752,10 +752,14 @@ operations. The metadata is reaped if the cluster does not receive a new operati
 session for a reasonably long time (the default is 30 minutes).
 
 A logical session is identified by its "logical session id," or `lsid`. An `lsid` is a combination
-of two pieces of information:
+of up to four pieces of information:
 
 1. `id` - A globally unique id (UUID) generated by the mongo shell, driver, or the `startSession` server command
 1. `uid` (user id) - The identification information for the logged-in user (if authentication is enabled)
+1. `txnNumber` - An optional parameter set only for internal transactions spawned from retryable writes. Strictly-increasing counter set by the transaction API to match the txnNumber of the corresponding retryable write.
+1. `txnUUID` - An optional parameter set only for internal transactions spawned inside client sessions. The txnUUID is a globally unique id generated by the transaction API.
+
+A logical session with a `txnNumber` and `txnUUID` is considered a child of the session with matching `id` and `uid` values. There may be multiple child sessions per parent session, and checking out a child/parents session checks out the other and updates the `lastUsedTime` of both. Killing a parent session also kills all of its child sessions.
 
 The order of operations in the logical session that need to durably store metadata is defined by an
 integer counter, called the `txnNumber`. When the cluster receives a retryable write or transaction
@@ -848,8 +852,12 @@ and to check the session back in upon completion. When a session is checked out,
 until it is checked back in, forcing other operations to wait for the ongoing operation to complete
 or yield the session.
 
+Checking out an internal/child session additionally checks out its parent session (the session with the same `id` and `uid` value in the lsid, but without a `txnNumber` or `txnUUID` value), and vice versa.
+
 The runtime state for a session consists of the last checkout time and operation, the number of operations
-waiting to check out the session, and the number of kills requested. The last checkout time is used by
+waiting to check out the session, and the number of kills requested. Retryable internal sessions are reaped from the logical session catalog [eagerly](https://github.com/mongodb/mongo/blob/67e37f8e806a6a5d402e20eee4b3097e2b11f820/src/mongo/db/session_catalog.cpp#L342), meaning that if a transaction session with a higher transaction number has successfully started, sessions with lower txnNumbers are removed from the session catalog and inserted into an in-memory buffer by the [InternalTransactionsReapService](https://github.com/mongodb/mongo/blob/67e37f8e806a6a5d402e20eee4b3097e2b11f820/src/mongo/db/internal_transactions_reap_service.h#L42) until a configurable threshold is met (1000 by default), after which they are deleted from the transactions table (`config.transactions`) and `config.image_collection` all at once. Eager reaping is best-effort, in that the in-memory buffer is cleared on stepdown or restart. Any missed sessions will be reaped once the session expires or their `config.transactions` entries have not been written to for `TransactionRecordMinimumLifetimeMinutes` minutes.
+
+The last checkout time is used by
 the [periodic job inside the logical session cache](#periodic-cleanup-of-the-session-catalog-and-transactions-table)
 to determine when a session should be reaped from the session catalog, whereas the number of
 operations waiting to check out a session is used to block reaping of sessions that are still in
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 415bd49e852..96f4e84813a 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -55,6 +55,7 @@ env.Library(
         'collection_critical_section_document.idl',
         'collection_sharding_runtime.cpp',
         'collection_sharding_state_factory_shard.cpp',
+        'commit_chunk_migration.idl',
         'config_server_op_observer.cpp',
         'global_index_metrics.cpp',
         'metadata_manager.cpp',
@@ -96,7 +97,7 @@ env.Library(
         'resharding/resharding_future_util.cpp',
         'resharding/resharding_manual_cleanup.cpp',
         'resharding/resharding_metrics_helpers.cpp',
-        'resharding/resharding_metrics_new.cpp',
+        'resharding/resharding_metrics.cpp',
         'resharding/resharding_op_observer.cpp',
         'resharding/resharding_oplog_applier.cpp',
         'resharding/resharding_oplog_applier_metrics.cpp',
@@ -137,6 +138,7 @@ env.Library(
         'type_shard_collection.idl',
     ],
     LIBDEPS=[
+        '$BUILD_DIR/mongo/client/remote_command_targeter',
         '$BUILD_DIR/mongo/db/catalog/multi_index_block',
         '$BUILD_DIR/mongo/db/client_metadata_propagation_egress_hook',
         '$BUILD_DIR/mongo/db/commands/mongod_fcv',
@@ -173,6 +175,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/repl/image_collection_entry',
         '$BUILD_DIR/mongo/db/rs_local_client',
         '$BUILD_DIR/mongo/db/session_catalog',
+        '$BUILD_DIR/mongo/db/timeseries/bucket_catalog',
         '$BUILD_DIR/mongo/idl/server_parameter',
         '$BUILD_DIR/mongo/util/future_util',
     ],
@@ -334,7 +337,6 @@ env.Library(
         'cluster_pipeline_cmd_d.cpp',
         'cluster_write_cmd_d.cpp',
         'collmod_coordinator_document.idl',
-        'collmod_coordinator_pre60_compatible.cpp',
         'collmod_coordinator.cpp',
         'compact_structured_encryption_data_coordinator.cpp',
         'compact_structured_encryption_data_coordinator.idl',
@@ -402,7 +404,6 @@ env.Library(
         'resharding_test_commands.idl',
         'set_allow_migrations_coordinator_document.idl',
         'set_allow_migrations_coordinator.cpp',
-        'set_shard_version_command.cpp',
         'sharded_collmod.idl',
         'sharded_index_consistency_server_status.cpp',
         'sharded_rename_collection.idl',
@@ -550,7 +551,6 @@ env.CppUnitTest(
         'collection_metadata_filtering_test.cpp',
         'collection_metadata_test.cpp',
         'collection_sharding_runtime_test.cpp',
-        'create_collection_coordinator_test.cpp',
         'database_sharding_state_test.cpp',
         'dist_lock_catalog_mock.cpp',
         'dist_lock_catalog_replset_test.cpp',
@@ -574,7 +574,7 @@ env.CppUnitTest(
         'resharding/resharding_donor_oplog_iterator_test.cpp',
         'resharding/resharding_donor_recipient_common_test.cpp',
         'resharding/resharding_donor_service_test.cpp',
-        'resharding/resharding_metrics_new_test.cpp',
+        'resharding/resharding_metrics_test.cpp',
         'resharding/resharding_oplog_applier_test.cpp',
         'resharding/resharding_oplog_applier_metrics_test.cpp',
         'resharding/resharding_oplog_batch_applier_test.cpp',
@@ -618,6 +618,8 @@ env.CppUnitTest(
         '$BUILD_DIR/mongo/db/exec/document_value/document_value_test_util',
         '$BUILD_DIR/mongo/db/keys_collection_client_direct',
         '$BUILD_DIR/mongo/db/logical_session_cache_impl',
+        '$BUILD_DIR/mongo/db/op_observer',
+        '$BUILD_DIR/mongo/db/op_observer_util',
         '$BUILD_DIR/mongo/db/ops/write_ops_exec',
         '$BUILD_DIR/mongo/db/pipeline/document_source_mock',
         '$BUILD_DIR/mongo/db/pipeline/expression_context',
diff --git a/src/mongo/db/s/balancer/balance_stats_test.cpp b/src/mongo/db/s/balancer/balance_stats_test.cpp
index 9381e0a2da6..aa7b056ae34 100644
--- a/src/mongo/db/s/balancer/balance_stats_test.cpp
+++ b/src/mongo/db/s/balancer/balance_stats_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/bson/oid.h"
 #include "mongo/db/namespace_string.h"
 #include "mongo/db/s/balancer/balance_stats.h"
@@ -79,7 +77,7 @@ private:
     const Timestamp _timestamp{Timestamp(1, 1)};
     const ShardId _shardPrimary{"dummyShardPrimary"};
     const DatabaseVersion _dbVersion{UUID::gen(), _timestamp};
-    ChunkVersion _nextVersion{1, 0, _epoch, _timestamp};
+    ChunkVersion _nextVersion{{_epoch, _timestamp}, {1, 0}};
 };
 
 TEST_F(BalanceStatsTest, SingleChunkNoZones) {
diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp
index 2ec66bc8ffd..fc2c42a59c1 100644
--- a/src/mongo/db/s/balancer/balancer.cpp
+++ b/src/mongo/db/s/balancer/balancer.cpp
@@ -80,13 +80,13 @@ namespace {
 
 MONGO_FAIL_POINT_DEFINE(overrideBalanceRoundInterval);
 
-const Seconds kBalanceRoundDefaultInterval(10);
+const Milliseconds kBalanceRoundDefaultInterval(10 * 1000);
 
 // Sleep between balancer rounds in the case where the last round found some chunks which needed to
 // be balanced. This value should be set sufficiently low so that imbalanced clusters will quickly
 // reach balanced state, but setting it too low may cause CRUD operations to start failing due to
 // not being able to establish a stable shard version.
-const Seconds kShortBalanceRoundInterval(1);
+const Milliseconds kBalancerMigrationsThrottling(1 * 1000);
 
 /**
  * Balancer status response
@@ -293,11 +293,11 @@ void Balancer::initiateBalancer(OperationContext* opCtx) {
 
 void Balancer::interruptBalancer() {
     stdx::lock_guard<Latch> scopedLock(_mutex);
-    if (_state != kRunning)
+    if (_state != kRunning) {
         return;
+    }
 
     _state = kStopping;
-    _thread.detach();
 
     // Interrupt the balancer thread if it has been started. We are guaranteed that the operation
     // context of that thread is still alive, because we hold the balancer mutex.
@@ -312,8 +312,10 @@ void Balancer::interruptBalancer() {
 
 void Balancer::waitForBalancerToStop() {
     stdx::unique_lock<Latch> scopedLock(_mutex);
-
     _joinCond.wait(scopedLock, [this] { return _state == kStopped; });
+    if (_thread.joinable()) {
+        _thread.join();
+    }
 }
 
 void Balancer::joinCurrentRound(OperationContext* opCtx) {
@@ -612,12 +614,12 @@ void Balancer::_consumeActionStreamLoop() {
 
 void Balancer::_mainThread() {
     ON_BLOCK_EXIT([this] {
-        stdx::lock_guard<Latch> scopedLock(_mutex);
-
-        _state = kStopped;
+        {
+            stdx::lock_guard<Latch> scopedLock(_mutex);
+            _state = kStopped;
+            LOGV2_DEBUG(21855, 1, "Balancer thread terminated");
+        }
         _joinCond.notify_all();
-
-        LOGV2_DEBUG(21855, 1, "Balancer thread terminated");
     });
 
     Client::initThread("Balancer");
@@ -664,6 +666,7 @@ void Balancer::_mainThread() {
     LOGV2(6036606, "Balancer worker thread initialised. Entering main loop.");
 
     // Main balancer loop
+    auto lastMigrationTime = Date_t::fromMillisSinceEpoch(0);
     while (!_stopRequested()) {
         BalanceRoundDetails roundDetails;
 
@@ -691,6 +694,14 @@ void Balancer::_mainThread() {
                 continue;
             }
 
+            boost::optional<Milliseconds> forcedBalancerRoundInterval(boost::none);
+            overrideBalanceRoundInterval.execute([&](const BSONObj& data) {
+                forcedBalancerRoundInterval = Milliseconds(data["intervalMs"].numberInt());
+                LOGV2(21864,
+                      "overrideBalanceRoundInterval: using customized balancing interval",
+                      "balancerInterval"_attr = *forcedBalancerRoundInterval);
+            });
+
             // The current configuration is allowing the balancer to perform operations.
             // Unblock the secondary thread if needed.
             _defragmentationCondVar.notify_all();
@@ -739,9 +750,20 @@ void Balancer::_mainThread() {
                 if (chunksToRebalance.empty() && chunksToDefragment.empty()) {
                     LOGV2_DEBUG(21862, 1, "No need to move any chunk");
                     _balancedLastTime = 0;
+                    LOGV2_DEBUG(21863, 1, "End balancing round");
+                    _endRound(opCtx.get(),
+                              forcedBalancerRoundInterval ? *forcedBalancerRoundInterval
+                                                          : kBalanceRoundDefaultInterval);
                 } else {
+                    auto timeSinceLastMigration = Date_t::now() - lastMigrationTime;
+                    _sleepFor(opCtx.get(),
+                              forcedBalancerRoundInterval
+                                  ? *forcedBalancerRoundInterval - timeSinceLastMigration
+                                  : kBalancerMigrationsThrottling - timeSinceLastMigration);
+
                     _balancedLastTime =
                         _moveChunks(opCtx.get(), chunksToRebalance, chunksToDefragment);
+                    lastMigrationTime = Date_t::now();
 
                     roundDetails.setSucceeded(
                         static_cast<int>(chunksToRebalance.size() + chunksToDefragment.size()),
@@ -750,24 +772,13 @@ void Balancer::_mainThread() {
                     ShardingLogging::get(opCtx.get())
                         ->logAction(opCtx.get(), "balancer.round", "", roundDetails.toBSON())
                         .ignore();
-                }
 
-                LOGV2_DEBUG(21863, 1, "End balancing round");
+                    LOGV2_DEBUG(6679500, 1, "End balancing round");
+                    // Migration throttling of kBalancerMigrationsThrottling will be applied before
+                    // the next call to _moveChunks, so don't sleep here.
+                    _endRound(opCtx.get(), Milliseconds(0));
+                }
             }
-
-            Milliseconds balancerInterval =
-                _balancedLastTime ? kShortBalanceRoundInterval : kBalanceRoundDefaultInterval;
-
-            overrideBalanceRoundInterval.execute([&](const BSONObj& data) {
-                balancerInterval = Milliseconds(data["intervalMs"].numberInt());
-                LOGV2(21864,
-                      "overrideBalanceRoundInterval: using shorter balancing interval: "
-                      "{balancerInterval}",
-                      "overrideBalanceRoundInterval: using shorter balancing interval",
-                      "balancerInterval"_attr = balancerInterval);
-            });
-
-            _endRound(opCtx.get(), balancerInterval);
         } catch (const DBException& e) {
             LOGV2(21865,
                   "caught exception while doing balance: {error}",
@@ -976,15 +987,6 @@ int Balancer::_moveChunks(OperationContext* opCtx,
             return coll.getMaxChunkSizeBytes().value_or(balancerConfig->getMaxChunkSizeBytes());
         }();
 
-        if (serverGlobalParams.featureCompatibility.isLessThan(
-                multiversion::FeatureCompatibilityVersion::kVersion_6_0)) {
-            // TODO SERVER-65322 only use `moveRange` once v6.0 branches out
-            MoveChunkSettings settings(maxChunkSizeBytes,
-                                       balancerConfig->getSecondaryThrottle(),
-                                       balancerConfig->waitForDelete());
-            return _commandScheduler->requestMoveChunk(opCtx, migrateInfo, settings);
-        }
-
         MoveRangeRequestBase requestBase(migrateInfo.to);
         requestBase.setWaitForDelete(balancerConfig->waitForDelete());
         requestBase.setMin(migrateInfo.minKey);
@@ -1086,7 +1088,7 @@ SharedSemiFuture<void> Balancer::applyLegacyChunkSizeConstraintsOnClusterData(
             NamespaceString::kLogicalSessionsNamespace,
             0,
             boost::none /*defragmentCollection*/,
-            boost::none /*enableAutoSplitter*/);
+            false /*enableAutoSplitter*/);
     } catch (const ExceptionFor<ErrorCodes::NamespaceNotSharded>&) {
         // config.system.collections does not appear in config.collections; continue.
     }
diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp
index bf22d67619e..8b50d3d002f 100644
--- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/commands.h"
 #include "mongo/db/s/balancer/balancer_chunk_selection_policy_impl.h"
 #include "mongo/db/s/balancer/cluster_statistics_impl.h"
@@ -133,7 +131,7 @@ TEST_F(BalancerChunkSelectionTest, TagRangesOverlap) {
 
     // Set up a database and a sharded collection in the metadata.
     const auto collUUID = UUID::gen();
-    ChunkVersion version(2, 0, OID::gen(), Timestamp(42));
+    ChunkVersion version({OID::gen(), Timestamp(42)}, {2, 0});
     setUpDatabase(kDbName, kShardId0);
     setUpCollection(kNamespace, collUUID, version);
 
@@ -192,7 +190,7 @@ TEST_F(BalancerChunkSelectionTest, TagRangeMaxNotAlignedWithChunkMax) {
 
     // Set up a database and a sharded collection in the metadata.
     const auto collUUID = UUID::gen();
-    ChunkVersion version(2, 0, OID::gen(), Timestamp(42));
+    ChunkVersion version({OID::gen(), Timestamp(42)}, {2, 0});
     setUpDatabase(kDbName, kShardId0);
     setUpCollection(kNamespace, collUUID, version);
 
@@ -251,7 +249,7 @@ TEST_F(BalancerChunkSelectionTest, ShardedTimeseriesCollectionsCanBeAutoSplitted
 
     // Set up a database and a sharded collection in the metadata.
     const auto collUUID = UUID::gen();
-    ChunkVersion version(2, 0, OID::gen(), Timestamp(42));
+    ChunkVersion version({OID::gen(), Timestamp(42)}, {2, 0});
     setUpDatabase(kDbName, kShardId0);
 
     TypeCollectionTimeseriesFields tsFields;
@@ -302,7 +300,7 @@ TEST_F(BalancerChunkSelectionTest, ShardedTimeseriesCollectionsCanBeBalanced) {
 
     // Set up a database and a sharded collection in the metadata.
     const auto collUUID = UUID::gen();
-    ChunkVersion version(2, 0, OID::gen(), Timestamp(42));
+    ChunkVersion version({OID::gen(), Timestamp(42)}, {2, 0});
     setUpDatabase(kDbName, kShardId0);
 
     TypeCollectionTimeseriesFields tsFields;
diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
index e78ae862393..7ebe9dac42c 100644
--- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
+++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
@@ -155,7 +155,7 @@ std::vector<RequestData> rebuildRequestsFromRecoveryInfo(
     DBDirectClient dbClient(opCtx);
     try {
         FindCommandRequest findRequest{MigrationType::ConfigNS};
-        dbClient.find(std::move(findRequest), ReadPreferenceSetting{}, documentProcessor);
+        dbClient.find(std::move(findRequest), documentProcessor);
     } catch (const DBException& e) {
         LOGV2_ERROR(5847215, "Failed to fetch requests to recover", "error"_attr = redact(e));
     }
diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
index 678e5f63f9f..72e86413aa9 100644
--- a/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/client/remote_command_targeter_mock.h"
 #include "mongo/db/s/balancer/balancer_commands_scheduler.h"
 #include "mongo/db/s/balancer/balancer_commands_scheduler_impl.h"
@@ -65,7 +63,7 @@ public:
         chunk.setMax(BSON("x" << min + 10));
         chunk.setJumbo(false);
         chunk.setShard(shardId);
-        chunk.setVersion(ChunkVersion(1, 1, OID::gen(), Timestamp(10)));
+        chunk.setVersion(ChunkVersion({OID::gen(), Timestamp(10)}, {1, 1}));
         return chunk;
     }
 
@@ -76,7 +74,7 @@ public:
                            kUuid,
                            BSON("x" << min),
                            BSON("x" << min + 10),
-                           ChunkVersion(1, 1, OID::gen(), Timestamp(10)),
+                           ChunkVersion({OID::gen(), Timestamp(10)}, {1, 1}),
                            MoveChunkRequest::ForceJumbo::kDoNotForce);
     }
 
@@ -234,7 +232,7 @@ TEST_F(BalancerCommandsSchedulerTest, SuccessfulMergeChunkCommand) {
     _scheduler.start(operationContext(), getMigrationRecoveryDefaultValues());
 
     ChunkRange range(BSON("x" << 0), BSON("x" << 20));
-    ChunkVersion version(1, 1, OID::gen(), Timestamp(10));
+    ChunkVersion version({OID::gen(), Timestamp(10)}, {1, 1});
     auto futureResponse =
         _scheduler.requestMergeChunks(operationContext(), kNss, kShardId0, range, version);
     ASSERT_OK(futureResponse.getNoThrow());
@@ -246,7 +244,7 @@ TEST_F(BalancerCommandsSchedulerTest, MergeChunkNonexistentShard) {
     auto remoteResponsesFuture = setRemoteResponses();
     _scheduler.start(operationContext(), getMigrationRecoveryDefaultValues());
     ChunkRange range(BSON("x" << 0), BSON("x" << 20));
-    ChunkVersion version(1, 1, OID::gen(), Timestamp(10));
+    ChunkVersion version({OID::gen(), Timestamp(10)}, {1, 1});
     auto futureResponse = _scheduler.requestMergeChunks(
         operationContext(), kNss, ShardId("nonexistent"), range, version);
     auto shardNotFoundError = Status{ErrorCodes::ShardNotFound, "Shard nonexistent not found"};
diff --git a/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp b/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp
index c42f7e86cd7..d1f431b4082 100644
--- a/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp
@@ -47,7 +47,7 @@ protected:
     const ShardId kShardId1 = ShardId("shard1");
     const ShardId kShardId2 = ShardId("shard2");
     const ShardId kShardId3 = ShardId("shard3");
-    const ChunkVersion kCollectionVersion = ChunkVersion(1, 1, OID::gen(), Timestamp(10));
+    const ChunkVersion kCollectionVersion = ChunkVersion({OID::gen(), Timestamp(10)}, {1, 1});
     const KeyPattern kShardKeyPattern = KeyPattern(BSON("x" << 1));
     const BSONObj kKeyAtMin = BSONObjBuilder().appendMinKey("x").obj();
     const BSONObj kKeyAtZero = BSON("x" << 0);
@@ -494,7 +494,8 @@ TEST_F(BalancerDefragmentationPolicyTest, TestPhaseOneAllConsecutive) {
         ChunkType chunk(
             kUuid,
             ChunkRange(minKey, maxKey),
-            ChunkVersion(1, i, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+            ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()},
+                         {1, uint32_t(i)}),
             kShardId0);
         chunkList.push_back(chunk);
     }
@@ -504,7 +505,8 @@ TEST_F(BalancerDefragmentationPolicyTest, TestPhaseOneAllConsecutive) {
         ChunkType chunk(
             kUuid,
             ChunkRange(minKey, maxKey),
-            ChunkVersion(1, i, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+            ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()},
+                         {1, uint32_t(i)}),
             kShardId1);
         chunkList.push_back(chunk);
     }
@@ -543,7 +545,8 @@ TEST_F(BalancerDefragmentationPolicyTest, PhaseOneNotConsecutive) {
         ChunkType chunk(
             kUuid,
             ChunkRange(minKey, maxKey),
-            ChunkVersion(1, i, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+            ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()},
+                         {1, uint32_t(i)}),
             chosenShard);
         chunkList.push_back(chunk);
     }
@@ -620,13 +623,13 @@ TEST_F(BalancerDefragmentationPolicyTest, TestPhaseTwoChunkCanBeMovedAndMergedWi
     ChunkType biggestChunk(
         kUuid,
         ChunkRange(kKeyAtMin, kKeyAtZero),
-        ChunkVersion(1, 0, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+        ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 0}),
         kShardId0);
     biggestChunk.setEstimatedSizeBytes(2048);
     ChunkType smallestChunk(
         kUuid,
         ChunkRange(kKeyAtZero, kKeyAtMax),
-        ChunkVersion(1, 1, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+        ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 1}),
         kShardId1);
     smallestChunk.setEstimatedSizeBytes(1024);
 
@@ -682,42 +685,42 @@ TEST_F(BalancerDefragmentationPolicyTest,
     ChunkType firstChunkOnShard0(
         kUuid,
         ChunkRange(kKeyAtMin, kKeyAtZero),
-        ChunkVersion(1, 0, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+        ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 0}),
         kShardId0);
     firstChunkOnShard0.setEstimatedSizeBytes(1);
 
     ChunkType firstChunkOnShard1(
         kUuid,
         ChunkRange(kKeyAtZero, kKeyAtTen),
-        ChunkVersion(1, 1, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+        ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 1}),
         kShardId1);
     firstChunkOnShard1.setEstimatedSizeBytes(1);
 
     ChunkType chunkOnShard2(
         kUuid,
         ChunkRange(kKeyAtTen, kKeyAtTwenty),
-        ChunkVersion(1, 2, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+        ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 2}),
         kShardId2);
     chunkOnShard2.setEstimatedSizeBytes(1);
 
     ChunkType chunkOnShard3(
         kUuid,
         ChunkRange(kKeyAtTwenty, kKeyAtThirty),
-        ChunkVersion(1, 3, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+        ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 3}),
         kShardId3);
     chunkOnShard3.setEstimatedSizeBytes(1);
 
     ChunkType secondChunkOnShard0(
         kUuid,
         ChunkRange(kKeyAtThirty, kKeyAtForty),
-        ChunkVersion(1, 4, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+        ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 4}),
         kShardId0);
     secondChunkOnShard0.setEstimatedSizeBytes(1);
 
     ChunkType secondChunkOnShard1(
         kUuid,
         ChunkRange(kKeyAtForty, kKeyAtMax),
-        ChunkVersion(1, 5, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+        ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 5}),
         kShardId1);
     secondChunkOnShard1.setEstimatedSizeBytes(1);
 
diff --git a/src/mongo/db/s/balancer/balancer_policy_test.cpp b/src/mongo/db/s/balancer/balancer_policy_test.cpp
index fb98d610b00..be3532fee56 100644
--- a/src/mongo/db/s/balancer/balancer_policy_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_policy_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/keypattern.h"
 #include "mongo/db/s/balancer/balancer_policy.h"
 #include "mongo/platform/random.h"
@@ -79,7 +76,7 @@ std::pair<ShardStatisticsVector, ShardToChunksMap> generateCluster(
 
     int64_t currentChunk = 0;
 
-    ChunkVersion chunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+    ChunkVersion chunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
     const UUID uuid = UUID::gen();
 
     const KeyPattern shardKeyPattern(BSON("x" << 1));
diff --git a/src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp b/src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp
index 607e57dab44..94b6e874cbf 100644
--- a/src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp
+++ b/src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp
@@ -30,6 +30,7 @@
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/s/balancer/cluster_chunks_resize_policy_impl.h"
 #include "mongo/db/s/config/config_server_test_fixture.h"
+
 namespace mongo {
 namespace {
 
@@ -37,7 +38,7 @@ class ClusterChunksResizePolicyTest : public ConfigServerTestFixture {
 protected:
     const NamespaceString kNss{"testDb.testColl"};
     const UUID kUuid = UUID::gen();
-    const ChunkVersion kCollectionVersion = ChunkVersion(1, 1, OID::gen(), Timestamp(10));
+    const ChunkVersion kCollectionVersion = ChunkVersion({OID::gen(), Timestamp(10)}, {1, 1});
 
     const ShardId kShardId0 = ShardId("shard0");
     const ShardId kShardId1 = ShardId("shard1");
diff --git a/src/mongo/db/s/balancer/type_migration.cpp b/src/mongo/db/s/balancer/type_migration.cpp
index 1aac063b940..a47fdff6197 100644
--- a/src/mongo/db/s/balancer/type_migration.cpp
+++ b/src/mongo/db/s/balancer/type_migration.cpp
@@ -113,8 +113,7 @@ StatusWith<MigrationType> MigrationType::fromBSON(const BSONObj& source) {
     }
 
     try {
-        auto chunkVersionStatus =
-            ChunkVersion::fromBSONPositionalOrNewerFormat(source[chunkVersion.name()]);
+        auto chunkVersionStatus = ChunkVersion::parse(source[chunkVersion.name()]);
         migrationType._chunkVersion = chunkVersionStatus;
     } catch (const DBException& ex) {
         return ex.toStatus();
diff --git a/src/mongo/db/s/balancer/type_migration_test.cpp b/src/mongo/db/s/balancer/type_migration_test.cpp
index f605983fe2c..610e150c963 100644
--- a/src/mongo/db/s/balancer/type_migration_test.cpp
+++ b/src/mongo/db/s/balancer/type_migration_test.cpp
@@ -27,12 +27,9 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/jsobj.h"
 #include "mongo/db/s/balancer/type_migration.h"
 #include "mongo/s/catalog/type_chunk.h"
-
 #include "mongo/unittest/unittest.h"
 
 namespace mongo {
@@ -48,7 +45,7 @@ const ShardId kToShard("shard0001");
 const bool kWaitForDelete{true};
 
 TEST(MigrationTypeTest, FromAndToBSONWithoutOptionalFields) {
-    const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
 
     BSONObjBuilder builder;
     builder.append(MigrationType::ns(), kNs);
@@ -68,7 +65,7 @@ TEST(MigrationTypeTest, FromAndToBSONWithoutOptionalFields) {
 }
 
 TEST(MigrationTypeTest, FromAndToBSONWitOptionalFields) {
-    const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
     const auto secondaryThrottle =
         MigrationSecondaryThrottleOptions::createWithWriteConcern(WriteConcernOptions(
             "majority", WriteConcernOptions::SyncMode::JOURNAL, Milliseconds(60000)));
@@ -94,7 +91,7 @@ TEST(MigrationTypeTest, FromAndToBSONWitOptionalFields) {
 }
 
 TEST(MigrationTypeTest, MissingRequiredNamespaceField) {
-    const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
 
     BSONObjBuilder builder;
     builder.append(MigrationType::min(), kMin);
@@ -111,7 +108,7 @@ TEST(MigrationTypeTest, MissingRequiredNamespaceField) {
 }
 
 TEST(MigrationTypeTest, MissingRequiredMinField) {
-    const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
 
     BSONObjBuilder builder;
     builder.append(MigrationType::ns(), kNs);
@@ -128,7 +125,7 @@ TEST(MigrationTypeTest, MissingRequiredMinField) {
 }
 
 TEST(MigrationTypeTest, MissingRequiredMaxField) {
-    const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
 
     BSONObjBuilder builder;
     builder.append(MigrationType::ns(), kNs);
@@ -145,7 +142,7 @@ TEST(MigrationTypeTest, MissingRequiredMaxField) {
 }
 
 TEST(MigrationTypeTest, MissingRequiredFromShardField) {
-    const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
 
     BSONObjBuilder builder;
     builder.append(MigrationType::ns(), kNs);
@@ -162,7 +159,7 @@ TEST(MigrationTypeTest, MissingRequiredFromShardField) {
 }
 
 TEST(MigrationTypeTest, MissingRequiredToShardField) {
-    const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
 
     BSONObjBuilder builder;
     builder.append(MigrationType::ns(), kNs);
diff --git a/src/mongo/db/s/check_sharding_index_command.cpp b/src/mongo/db/s/check_sharding_index_command.cpp
index 1422dc7c4c8..004c23b2d31 100644
--- a/src/mongo/db/s/check_sharding_index_command.cpp
+++ b/src/mongo/db/s/check_sharding_index_command.cpp
@@ -27,7 +27,6 @@
  *    it in the license file.
  */
 
-
 #include "mongo/platform/basic.h"
 
 #include "mongo/db/auth/action_type.h"
@@ -40,7 +39,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 namespace {
 
@@ -96,13 +94,15 @@ public:
             return false;
         }
 
+        std::string tmpErrMsg = "couldn't find valid index for shard key";
         auto shardKeyIdx = findShardKeyPrefixedIndex(opCtx,
                                                      *collection,
                                                      collection->getIndexCatalog(),
                                                      keyPattern,
-                                                     /*requireSingleKey=*/true);
+                                                     /*requireSingleKey=*/true,
+                                                     &tmpErrMsg);
         if (!shardKeyIdx) {
-            errmsg = "couldn't find valid index for shard key";
+            errmsg = tmpErrMsg;
             return false;
         }
 
diff --git a/src/mongo/db/s/chunk_splitter.cpp b/src/mongo/db/s/chunk_splitter.cpp
index e0fb5839a09..043b0139b20 100644
--- a/src/mongo/db/s/chunk_splitter.cpp
+++ b/src/mongo/db/s/chunk_splitter.cpp
@@ -31,7 +31,6 @@
 #include "mongo/db/s/chunk_splitter.h"
 
 #include "mongo/client/dbclient_cursor.h"
-#include "mongo/client/query.h"
 #include "mongo/db/client.h"
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/namespace_string.h"
diff --git a/src/mongo/db/s/collection_metadata_filtering_test.cpp b/src/mongo/db/s/collection_metadata_filtering_test.cpp
index e2e3081b436..74dc6a9e655 100644
--- a/src/mongo/db/s/collection_metadata_filtering_test.cpp
+++ b/src/mongo/db/s/collection_metadata_filtering_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/catalog_raii.h"
 #include "mongo/db/s/collection_sharding_runtime.h"
 #include "mongo/db/s/operation_sharding_state.h"
@@ -79,7 +77,7 @@ protected:
             boost::none,
             true,
             [&] {
-                ChunkVersion version(1, 0, epoch, Timestamp(1, 1));
+                ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
                 ChunkType chunk1(uuid,
                                  {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << -100)},
diff --git a/src/mongo/db/s/collection_metadata_test.cpp b/src/mongo/db/s/collection_metadata_test.cpp
index 8f789549796..4084fe8e9e2 100644
--- a/src/mongo/db/s/collection_metadata_test.cpp
+++ b/src/mongo/db/s/collection_metadata_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/base/status.h"
 #include "mongo/db/range_arithmetic.h"
 #include "mongo/db/s/collection_metadata.h"
@@ -62,7 +60,7 @@ CollectionMetadata makeCollectionMetadataImpl(
 
     std::vector<ChunkType> allChunks;
     auto nextMinKey = shardKeyPattern.globalMin();
-    ChunkVersion version{1, 0, epoch, timestamp};
+    ChunkVersion version({epoch, timestamp}, {1, 0});
     for (const auto& myNextChunk : thisShardsChunks) {
         if (SimpleBSONObjComparator::kInstance.evaluate(nextMinKey < myNextChunk.first)) {
             // Need to add a chunk to the other shard from nextMinKey to myNextChunk.first.
@@ -125,7 +123,7 @@ protected:
             reshardingFields.setRecipientFields(std::move(recipientFields));
         } else if (state == CoordinatorStateEnum::kBlockingWrites) {
             TypeCollectionDonorFields donorFields{
-                constructTemporaryReshardingNss(kNss.db(), existingUuid),
+                resharding::constructTemporaryReshardingNss(kNss.db(), existingUuid),
                 KeyPattern{BSON("newKey" << 1)},
                 {kThisShard, kOtherShard}};
             reshardingFields.setDonorFields(std::move(donorFields));
diff --git a/src/mongo/db/s/collection_sharding_runtime_test.cpp b/src/mongo/db/s/collection_sharding_runtime_test.cpp
index dcee5b73ac0..c6985aa5742 100644
--- a/src/mongo/db/s/collection_sharding_runtime_test.cpp
+++ b/src/mongo/db/s/collection_sharding_runtime_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "boost/optional/optional_io.hpp"
 #include "mongo/db/catalog/create_collection.h"
 #include "mongo/db/catalog_raii.h"
@@ -63,7 +61,7 @@ protected:
         const Timestamp timestamp(1, 1);
         auto range = ChunkRange(BSON(kShardKey << MINKEY), BSON(kShardKey << MAXKEY));
         auto chunk = ChunkType(
-            uuid, std::move(range), ChunkVersion(1, 0, epoch, timestamp), ShardId("other"));
+            uuid, std::move(range), ChunkVersion({epoch, timestamp}, {1, 0}), ShardId("other"));
         ChunkManager cm(ShardId("0"),
                         DatabaseVersion(UUID::gen(), timestamp),
                         makeStandaloneRoutingTableHistory(
@@ -218,8 +216,8 @@ TEST_F(CollectionShardingRuntimeTest, ReturnUnshardedMetadataInServerlessMode) {
     ScopedSetShardRole scopedSetShardRole2{
         opCtx,
         NamespaceString::kLogicalSessionsNamespace,
-        ChunkVersion(1, 0, OID::gen(), Timestamp(1, 1)), /* shardVersion */
-        boost::none                                      /* databaseVersion */
+        ChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0}), /* shardVersion */
+        boost::none                                          /* databaseVersion */
     };
 
     CollectionShardingRuntime csrLogicalSession(
@@ -324,11 +322,11 @@ public:
                                         const Timestamp& timestamp) {
         auto range1 = ChunkRange(BSON(kShardKey << MINKEY), BSON(kShardKey << 5));
         ChunkType chunk1(
-            uuid, range1, ChunkVersion(1, 0, epoch, timestamp), kShardList[0].getName());
+            uuid, range1, ChunkVersion({epoch, timestamp}, {1, 0}), kShardList[0].getName());
 
         auto range2 = ChunkRange(BSON(kShardKey << 5), BSON(kShardKey << MAXKEY));
         ChunkType chunk2(
-            uuid, range2, ChunkVersion(1, 1, epoch, timestamp), kShardList[0].getName());
+            uuid, range2, ChunkVersion({epoch, timestamp}, {1, 1}), kShardList[0].getName());
 
         return {chunk1, chunk2};
     }
diff --git a/src/mongo/db/s/collmod_coordinator.cpp b/src/mongo/db/s/collmod_coordinator.cpp
index ebf179c21c8..50e92b41571 100644
--- a/src/mongo/db/s/collmod_coordinator.cpp
+++ b/src/mongo/db/s/collmod_coordinator.cpp
@@ -77,10 +77,7 @@ bool hasTimeSeriesGranularityUpdate(const CollModRequest& request) {
 
 CollModCoordinator::CollModCoordinator(ShardingDDLCoordinatorService* service,
                                        const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState),
-      _initialState{initialState.getOwned()},
-      _doc{CollModCoordinatorDocument::parse(IDLParserErrorContext("CollModCoordinatorDocument"),
-                                             _initialState)},
+    : RecoverableShardingDDLCoordinator(service, "CollModCoordinator", initialState),
       _request{_doc.getCollModRequest()} {}
 
 void CollModCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
@@ -96,54 +93,9 @@ void CollModCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
             SimpleBSONObjComparator::kInstance.evaluate(selfReq == otherReq));
 }
 
-boost::optional<BSONObj> CollModCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-
-    const auto currPhase = [&]() {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getPhase();
-    }();
-
-    cmdBob.appendElements(_request.toBSON());
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "CollModCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("currentPhase", currPhase);
-    bob.append("active", true);
-    return bob.obj();
-}
-
-void CollModCoordinator::_enterPhase(Phase newPhase) {
-    StateDoc newDoc(_doc);
-    newDoc.setPhase(newPhase);
-
-    LOGV2_DEBUG(6069401,
-                2,
-                "CollMod coordinator phase transition",
-                "namespace"_attr = nss(),
-                "newPhase"_attr = CollModCoordinatorPhase_serializer(newDoc.getPhase()),
-                "oldPhase"_attr = CollModCoordinatorPhase_serializer(_doc.getPhase()));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        newDoc = _insertStateDocument(std::move(newDoc));
-    } else {
-        newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(newDoc);
-    }
-}
+void CollModCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+    cmdInfoBuilder->appendElements(_request.toBSON());
+};
 
 void CollModCoordinator::_performNoopRetryableWriteOnParticipants(
     OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor) {
@@ -154,9 +106,9 @@ void CollModCoordinator::_performNoopRetryableWriteOnParticipants(
         return participants;
     }();
 
-    _doc = _updateSession(opCtx, _doc);
+    _updateSession(opCtx);
     sharding_ddl_util::performNoopRetryableWriteOnShards(
-        opCtx, shardsAndConfigsvr, getCurrentSession(_doc), executor);
+        opCtx, shardsAndConfigsvr, getCurrentSession(), executor);
 }
 
 void CollModCoordinator::_saveCollectionInfoOnCoordinatorIfNecessary(OperationContext* opCtx) {
@@ -229,14 +181,15 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
                 auto* opCtx = opCtxHolder.get();
                 getForwardableOpMetadata().setOn(opCtx);
 
-                _doc = _updateSession(opCtx, _doc);
+                _updateSession(opCtx);
 
                 _saveCollectionInfoOnCoordinatorIfNecessary(opCtx);
 
                 if (_collInfo->isSharded) {
-                    _doc.setCollUUID(
-                        sharding_ddl_util::getCollectionUUID(opCtx, nss(), true /* allowViews */));
-                    sharding_ddl_util::stopMigrations(opCtx, nss(), _doc.getCollUUID());
+                    _doc.setCollUUID(sharding_ddl_util::getCollectionUUID(
+                        opCtx, _collInfo->nsForTargeting, true /* allowViews */));
+                    sharding_ddl_util::stopMigrations(
+                        opCtx, _collInfo->nsForTargeting, _doc.getCollUUID());
                 }
 
                 _saveShardingInfoOnCoordinatorIfNecessary(opCtx);
@@ -258,7 +211,7 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
                 auto* opCtx = opCtxHolder.get();
                 getForwardableOpMetadata().setOn(opCtx);
 
-                _doc = _updateSession(opCtx, _doc);
+                _updateSession(opCtx);
 
                 _saveCollectionInfoOnCoordinatorIfNecessary(opCtx);
                 _saveShardingInfoOnCoordinatorIfNecessary(opCtx);
@@ -285,7 +238,7 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
                 auto* opCtx = opCtxHolder.get();
                 getForwardableOpMetadata().setOn(opCtx);
 
-                _doc = _updateSession(opCtx, _doc);
+                _updateSession(opCtx);
 
                 _saveCollectionInfoOnCoordinatorIfNecessary(opCtx);
                 _saveShardingInfoOnCoordinatorIfNecessary(opCtx);
@@ -335,7 +288,8 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
                         CommandHelpers::appendSimpleCommandStatus(builder, ok, errmsg);
                     }
                     _result = builder.obj();
-                    sharding_ddl_util::resumeMigrations(opCtx, nss(), _doc.getCollUUID());
+                    sharding_ddl_util::resumeMigrations(
+                        opCtx, _collInfo->nsForTargeting, _doc.getCollUUID());
                 } else {
                     CollMod cmd(nss());
                     cmd.setCollModRequest(_request);
@@ -370,7 +324,8 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
                     auto* opCtx = opCtxHolder.get();
                     getForwardableOpMetadata().setOn(opCtx);
 
-                    sharding_ddl_util::resumeMigrations(opCtx, nss(), _doc.getCollUUID());
+                    sharding_ddl_util::resumeMigrations(
+                        opCtx, _collInfo->nsForTargeting, _doc.getCollUUID());
                 }
             }
             return status;
diff --git a/src/mongo/db/s/collmod_coordinator.h b/src/mongo/db/s/collmod_coordinator.h
index b85b6b16d5a..4b65502f78d 100644
--- a/src/mongo/db/s/collmod_coordinator.h
+++ b/src/mongo/db/s/collmod_coordinator.h
@@ -35,7 +35,9 @@
 
 namespace mongo {
 
-class CollModCoordinator final : public ShardingDDLCoordinator {
+class CollModCoordinator final
+    : public RecoverableShardingDDLCoordinator<CollModCoordinatorDocument,
+                                               CollModCoordinatorPhaseEnum> {
 public:
     using StateDoc = CollModCoordinatorDocument;
     using Phase = CollModCoordinatorPhaseEnum;
@@ -44,9 +46,7 @@ public:
 
     void checkIfOptionsConflict(const BSONObj& doc) const override;
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+    void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
 
     /**
      * Waits for the termination of the parent DDLCoordinator (so all the resources are liberated)
@@ -74,32 +74,13 @@ private:
         std::vector<ShardId> shardsOwningChunks;
     };
 
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        return _doc.getShardingDDLCoordinatorMetadata();
+    StringData serializePhase(const Phase& phase) const override {
+        return CollModCoordinatorPhase_serializer(phase);
     }
 
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
 
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-
-            if (currPhase > newPhase) {
-                // Do not execute this phase if we already reached a subsequent one.
-                return;
-            }
-            if (currPhase < newPhase) {
-                // Persist the new phase if this is the first time we are executing it.
-                _enterPhase(newPhase);
-            }
-            return func();
-        };
-    }
-
-    void _enterPhase(Phase newPhase);
-
     void _performNoopRetryableWriteOnParticipants(
         OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor);
 
@@ -107,10 +88,6 @@ private:
 
     void _saveShardingInfoOnCoordinatorIfNecessary(OperationContext* opCtx);
 
-    BSONObj _initialState;
-    mutable Mutex _docMutex = MONGO_MAKE_LATCH("CollModCoordinator::_docMutex");
-    CollModCoordinatorDocument _doc;
-
     const mongo::CollModRequest _request;
 
     boost::optional<BSONObj> _result;
diff --git a/src/mongo/db/s/collmod_coordinator_pre60_compatible.cpp b/src/mongo/db/s/collmod_coordinator_pre60_compatible.cpp
deleted file mode 100644
index 37005996f3a..00000000000
--- a/src/mongo/db/s/collmod_coordinator_pre60_compatible.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- *    Copyright (C) 2021-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-
-#include "mongo/db/s/collmod_coordinator_pre60_compatible.h"
-
-#include "mongo/db/catalog/collection_catalog.h"
-#include "mongo/db/catalog/database_holder.h"
-#include "mongo/db/coll_mod_gen.h"
-#include "mongo/db/db_raii.h"
-#include "mongo/db/ops/insert.h"
-#include "mongo/db/s/sharded_collmod_gen.h"
-#include "mongo/db/s/sharding_ddl_util.h"
-#include "mongo/db/s/sharding_state.h"
-#include "mongo/db/timeseries/catalog_helper.h"
-#include "mongo/db/timeseries/timeseries_collmod.h"
-#include "mongo/idl/idl_parser.h"
-#include "mongo/logv2/log.h"
-#include "mongo/s/async_requests_sender.h"
-#include "mongo/s/cluster_commands_helpers.h"
-#include "mongo/s/grid.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
-
-namespace mongo {
-
-namespace {
-
-bool isShardedColl(OperationContext* opCtx, const NamespaceString& nss) {
-    try {
-        auto coll = Grid::get(opCtx)->catalogClient()->getCollection(opCtx, nss);
-        return true;
-    } catch (const ExceptionFor<ErrorCodes::NamespaceNotFound>&) {
-        // The collection is not sharded or doesn't exist.
-        return false;
-    }
-}
-
-bool hasTimeSeriesGranularityUpdate(const CollModRequest& request) {
-    return request.getTimeseries() && request.getTimeseries()->getGranularity();
-}
-
-}  // namespace
-
-CollModCoordinatorPre60Compatible::CollModCoordinatorPre60Compatible(
-    ShardingDDLCoordinatorService* service, const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState) {
-    _initialState = initialState.getOwned();
-    _doc = CollModCoordinatorDocument::parse(IDLParserErrorContext("CollModCoordinatorDocument"),
-                                             _initialState);
-}
-
-void CollModCoordinatorPre60Compatible::checkIfOptionsConflict(const BSONObj& doc) const {
-    const auto otherDoc =
-        CollModCoordinatorDocument::parse(IDLParserErrorContext("CollModCoordinatorDocument"), doc);
-
-    const auto& selfReq = _doc.getCollModRequest().toBSON();
-    const auto& otherReq = otherDoc.getCollModRequest().toBSON();
-
-    uassert(ErrorCodes::ConflictingOperationInProgress,
-            str::stream() << "Another collMod for namespace " << nss()
-                          << " is being executed with different parameters: " << selfReq,
-            SimpleBSONObjComparator::kInstance.evaluate(selfReq == otherReq));
-}
-
-boost::optional<BSONObj> CollModCoordinatorPre60Compatible::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-
-    const auto currPhase = [&]() {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getPhase();
-    }();
-
-    cmdBob.appendElements(_doc.getCollModRequest().toBSON());
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "CollModCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("currentPhase", currPhase);
-    bob.append("active", true);
-    return bob.obj();
-}
-
-void CollModCoordinatorPre60Compatible::_enterPhase(Phase newPhase) {
-    StateDoc newDoc(_doc);
-    newDoc.setPhase(newPhase);
-
-    LOGV2_DEBUG(6482601,
-                2,
-                "CollMod coordinator phase transition",
-                "namespace"_attr = nss(),
-                "newPhase"_attr = CollModCoordinatorPhase_serializer(newDoc.getPhase()),
-                "oldPhase"_attr = CollModCoordinatorPhase_serializer(_doc.getPhase()));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        newDoc = _insertStateDocument(std::move(newDoc));
-    } else {
-        newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(newDoc);
-    }
-}
-
-void CollModCoordinatorPre60Compatible::_performNoopRetryableWriteOnParticipants(
-    OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor) {
-    auto shardsAndConfigsvr = [&] {
-        const auto shardRegistry = Grid::get(opCtx)->shardRegistry();
-        auto participants = shardRegistry->getAllShardIds(opCtx);
-        participants.emplace_back(shardRegistry->getConfigShard()->getId());
-        return participants;
-    }();
-
-    _doc = _updateSession(opCtx, _doc);
-    sharding_ddl_util::performNoopRetryableWriteOnShards(
-        opCtx, shardsAndConfigsvr, getCurrentSession(_doc), executor);
-}
-
-ExecutorFuture<void> CollModCoordinatorPre60Compatible::_runImpl(
-    std::shared_ptr<executor::ScopedTaskExecutor> executor,
-    const CancellationToken& token) noexcept {
-    return ExecutorFuture<void>(**executor)
-        .then(_executePhase(
-            Phase::kUpdateShards,
-            [this, executor = executor, anchor = shared_from_this()] {
-                auto opCtxHolder = cc().makeOperationContext();
-                auto* opCtx = opCtxHolder.get();
-                getForwardableOpMetadata().setOn(opCtx);
-
-                const auto isTimeSeries = timeseries::getTimeseriesOptions(
-                    opCtx, nss(), !nss().isTimeseriesBucketsCollection());
-                const auto collNss = isTimeSeries && !nss().isTimeseriesBucketsCollection()
-                    ? nss().makeTimeseriesBucketsNamespace()
-                    : nss();
-                const auto isSharded = isShardedColl(opCtx, collNss);
-
-                if (isSharded) {
-                    // Updating granularity on sharded time-series collections is not allowed.
-                    if (isTimeSeries) {
-                        uassert(
-                            ErrorCodes::NotImplemented,
-                            str::stream()
-                                << "Cannot update granularity of a sharded time-series collection.",
-                            !hasTimeSeriesGranularityUpdate(_doc.getCollModRequest()));
-                    }
-                    _doc.setCollUUID(
-                        sharding_ddl_util::getCollectionUUID(opCtx, nss(), true /* allowViews */));
-
-                    sharding_ddl_util::stopMigrations(opCtx, nss(), _doc.getCollUUID());
-
-                    if (!_firstExecution) {
-                        _performNoopRetryableWriteOnParticipants(opCtx, **executor);
-                    }
-
-                    _doc = _updateSession(opCtx, _doc);
-                    const OperationSessionInfo osi = getCurrentSession(_doc);
-
-                    const auto chunkManager = uassertStatusOK(
-                        Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(
-                            opCtx, collNss));
-                    std::unique_ptr<CollatorInterface> collator;
-                    const auto expCtx =
-                        make_intrusive<ExpressionContext>(opCtx, std::move(collator), collNss);
-                    std::set<ShardId> participants;
-                    chunkManager.getShardIdsForQuery(
-                        expCtx, {} /* query */, {} /* collation */, &participants);
-
-                    ShardsvrCollModParticipant request(nss(), _doc.getCollModRequest());
-                    const auto cmdObj =
-                        CommandHelpers::appendMajorityWriteConcern(request.toBSON({}));
-                    const auto& responses = sharding_ddl_util::sendAuthenticatedCommandToShards(
-                        opCtx,
-                        nss().db(),
-                        cmdObj.addFields(osi.toBSON()),
-                        {std::make_move_iterator(participants.begin()),
-                         std::make_move_iterator(participants.end())},
-                        **executor);
-                    BSONObjBuilder builder;
-                    std::string errmsg;
-                    auto ok = appendRawResponses(opCtx, &errmsg, &builder, responses).responseOK;
-                    if (!errmsg.empty()) {
-                        CommandHelpers::appendSimpleCommandStatus(builder, ok, errmsg);
-                    }
-                    _result = builder.obj();
-                    sharding_ddl_util::resumeMigrations(opCtx, nss(), _doc.getCollUUID());
-                } else {
-                    CollMod cmd(nss());
-                    cmd.setCollModRequest(_doc.getCollModRequest());
-                    BSONObjBuilder collModResBuilder;
-                    uassertStatusOK(timeseries::processCollModCommandWithTimeSeriesTranslation(
-                        opCtx, nss(), cmd, true, &collModResBuilder));
-                    auto collModRes = collModResBuilder.obj();
-
-                    const auto dbInfo = uassertStatusOK(
-                        Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, nss().db()));
-                    const auto shard = uassertStatusOK(
-                        Grid::get(opCtx)->shardRegistry()->getShard(opCtx, dbInfo->getPrimary()));
-                    BSONObjBuilder builder;
-                    builder.appendElements(collModRes);
-                    BSONObjBuilder subBuilder(builder.subobjStart("raw"));
-                    subBuilder.append(shard->getConnString().toString(), collModRes);
-                    subBuilder.doneFast();
-                    _result = builder.obj();
-                }
-            }))
-        .onError([this, anchor = shared_from_this()](const Status& status) {
-            if (!status.isA<ErrorCategory::NotPrimaryError>() &&
-                !status.isA<ErrorCategory::ShutdownError>()) {
-                LOGV2_ERROR(6482602,
-                            "Error running collMod",
-                            "namespace"_attr = nss(),
-                            "error"_attr = redact(status));
-                // If we have the collection UUID set, this error happened in a sharded collection,
-                // we should restore the migrations.
-                if (_doc.getCollUUID()) {
-                    auto opCtxHolder = cc().makeOperationContext();
-                    auto* opCtx = opCtxHolder.get();
-                    getForwardableOpMetadata().setOn(opCtx);
-
-                    sharding_ddl_util::resumeMigrations(opCtx, nss(), _doc.getCollUUID());
-                }
-            }
-            return status;
-        });
-}
-
-}  // namespace mongo
diff --git a/src/mongo/db/s/collmod_coordinator_pre60_compatible.h b/src/mongo/db/s/collmod_coordinator_pre60_compatible.h
deleted file mode 100644
index a8de0c67f53..00000000000
--- a/src/mongo/db/s/collmod_coordinator_pre60_compatible.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- *    Copyright (C) 2021-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/s/collmod_coordinator_document_gen.h"
-#include "mongo/db/s/sharding_ddl_coordinator.h"
-#include "mongo/s/request_types/sharded_ddl_commands_gen.h"
-#include "mongo/stdx/mutex.h"
-
-namespace mongo {
-
-class CollModCoordinatorPre60Compatible final : public ShardingDDLCoordinator {
-public:
-    using StateDoc = CollModCoordinatorDocument;
-    using Phase = CollModCoordinatorPhaseEnum;
-
-    CollModCoordinatorPre60Compatible(ShardingDDLCoordinatorService* service,
-                                      const BSONObj& initialState);
-
-    void checkIfOptionsConflict(const BSONObj& doc) const override;
-
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
-
-    /**
-     * Waits for the termination of the parent DDLCoordinator (so all the resources are liberated)
-     * and then return the result.
-     */
-    BSONObj getResult(OperationContext* opCtx) {
-        getCompletionFuture().get(opCtx);
-        invariant(_result.is_initialized());
-        return *_result;
-    }
-
-private:
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getShardingDDLCoordinatorMetadata();
-    }
-
-    ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
-                                  const CancellationToken& token) noexcept override;
-
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-
-            if (currPhase > newPhase) {
-                // Do not execute this phase if we already reached a subsequent one.
-                return;
-            }
-            if (currPhase < newPhase) {
-                // Persist the new phase if this is the first time we are executing it.
-                _enterPhase(newPhase);
-            }
-            return func();
-        };
-    }
-
-    void _enterPhase(Phase newPhase);
-
-    void _performNoopRetryableWriteOnParticipants(
-        OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor);
-
-    BSONObj _initialState;
-    mutable Mutex _docMutex = MONGO_MAKE_LATCH("CollModCoordinatorPre60Compatible::_docMutex");
-    CollModCoordinatorDocument _doc;
-
-    boost::optional<BSONObj> _result;
-};
-
-}  // namespace mongo
diff --git a/src/mongo/db/s/commit_chunk_migration.idl b/src/mongo/db/s/commit_chunk_migration.idl
new file mode 100644
index 00000000000..6484623cd5c
--- /dev/null
+++ b/src/mongo/db/s/commit_chunk_migration.idl
@@ -0,0 +1,85 @@
+ 
+ # Copyright (C) 2019-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+
+
+global:
+    cpp_namespace: "mongo"
+
+imports:
+    - "mongo/idl/basic_types.idl"
+    - "mongo/s/sharding_types.idl"
+    - "mongo/s/chunk_version.idl"
+
+structs:
+    ConfigSvrCommitChunkMigrationResponse:
+        description: "Response of the _configsvrCommitChunkMigration command."
+        strict: false
+        fields:
+            shardVersion:
+                type: ChunkVersion
+                description: "Collection version at the end of the migration."
+
+    MigratedChunkType:
+        description: "ChunkType describing a migrated chunk"
+        strict: false 
+        fields:
+            lastmod : ChunkVersion
+            min: object
+            max: object
+                
+commands:
+    _configsvrCommitChunkMigration:
+        command_name: _configsvrCommitChunkMigration
+        cpp_name: CommitChunkMigrationRequest
+        description: "internal _configsvrCommitChunkMigration command for config server"
+        namespace: type
+        api_version: ""
+        type: namespacestring
+        strict: false
+        reply_type: ConfigSvrCommitChunkMigrationResponse
+        fields:
+            fromShard:
+                type: shard_id
+                description: "from shard name"
+
+            toShard:
+                type: shard_id
+                description: "to shard name"
+
+            migratedChunk:
+                type: MigratedChunkType
+                description: "ChunkType describing a migrated chunk"
+
+            fromShardCollectionVersion:
+                type: ChunkVersion 
+                description: "{ shardVersionField: <version> }" 
+                
+            validAfter:
+                type: timestamp 
+                description: "The time after which this chunk is at the new shard"
+\ No newline at end of file
diff --git a/src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp b/src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp
index 04048f7946b..69c67d89dcb 100644
--- a/src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp
+++ b/src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp
@@ -187,94 +187,35 @@ void doDropOperation(const CompactStructuredEncryptionDataState& state) {
 boost::optional<BSONObj> CompactStructuredEncryptionDataCoordinator::reportForCurrentOp(
     MongoProcessInterface::CurrentOpConnectionsMode connMode,
     MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-    BSONObjBuilder bob;
-
-    CompactStructuredEncryptionDataPhaseEnum currPhase;
-    std::string nss;
-    std::string escNss;
-    std::string eccNss;
-    std::string ecoNss;
-    std::string ecocNss;
-    std::string ecocRenameUuid;
-    std::string ecocUiid;
-    std::string ecocRenameNss;
-    {
-        stdx::lock_guard l{_docMutex};
-        currPhase = _doc.getPhase();
-        nss = _doc.getId().getNss().ns();
-        escNss = _doc.getEscNss().ns();
-        eccNss = _doc.getEccNss().ns();
-        ecoNss = _doc.getEcocNss().ns();
-        ecocNss = _doc.getEcocNss().ns();
-        ecocRenameUuid =
-            _doc.getEcocRenameUuid() ? _doc.getEcocRenameUuid().value().toString() : "none";
-        ecocUiid = _doc.getEcocUuid() ? _doc.getEcocUuid().value().toString() : "none";
-        ecocRenameNss = _doc.getEcocRenameNss().ns();
-    }
-
-    bob.append("type", "op");
-    bob.append("desc", "CompactStructuredEncryptionDataCoordinator");
-    bob.append("op", "command");
-    bob.append("nss", nss);
-    bob.append("escNss", escNss);
-    bob.append("eccNss", eccNss);
-    bob.append("ecocNss", ecocNss);
-    bob.append("ecocUuid", ecocUiid);
-    bob.append("ecocRenameNss", ecocRenameNss);
-    bob.append("ecocRenameUuid", ecocRenameUuid);
-    bob.append("currentPhase", currPhase);
-    bob.append("active", true);
+    auto bob = basicReportBuilder();
+
+    stdx::lock_guard lg{_docMutex};
+    bob.append("escNss", _doc.getEscNss().ns());
+    bob.append("eccNss", _doc.getEccNss().ns());
+    bob.append("ecocNss", _doc.getEcocNss().ns());
+    bob.append("ecocUuid", _doc.getEcocUuid() ? _doc.getEcocUuid().value().toString() : "none");
+    bob.append("ecocRenameNss", _doc.getEcocRenameNss().ns());
+    bob.append("ecocRenameUuid",
+               _doc.getEcocRenameUuid() ? _doc.getEcocRenameUuid().value().toString() : "none");
     return bob.obj();
 }
 
-void CompactStructuredEncryptionDataCoordinator::_enterPhase(Phase newPhase) {
-    StateDoc doc(_doc);
-    doc.setPhase(newPhase);
-
-    LOGV2_DEBUG(6350490,
-                2,
-                "Transitioning phase for CompactStructuredEncryptionDataCoordinator",
-                "nss"_attr = _doc.getId().getNss().ns(),
-                "escNss"_attr = _doc.getEscNss().ns(),
-                "eccNss"_attr = _doc.getEccNss().ns(),
-                "ecocNss"_attr = _doc.getEcocNss().ns(),
-                "ecocUuid"_attr = _doc.getEcocUuid(),
-                "ecocRenameNss"_attr = _doc.getEcocRenameNss().ns(),
-                "ecocRenameUuid"_attr = _doc.getEcocRenameUuid(),
-                "skipCompact"_attr = _doc.getSkipCompact(),
-                "compactionTokens"_attr = _doc.getCompactionTokens(),
-                "oldPhase"_attr = CompactStructuredEncryptionDataPhase_serializer(_doc.getPhase()),
-                "newPhase"_attr = CompactStructuredEncryptionDataPhase_serializer(newPhase));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        doc = _insertStateDocument(std::move(doc));
-    } else {
-        auto opCtx = cc().makeOperationContext();
-        doc = _updateStateDocument(opCtx.get(), std::move(doc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(doc);
-    }
-}
-
 ExecutorFuture<void> CompactStructuredEncryptionDataCoordinator::_runImpl(
     std::shared_ptr<executor::ScopedTaskExecutor> executor,
     const CancellationToken& token) noexcept {
     return ExecutorFuture<void>(**executor)
         .then(_executePhase(Phase::kRenameEcocForCompact,
-                            [this, anchor = shared_from_this()](const auto& state) {
-                                doRenameOperation(state, &_skipCompact, &_ecocRenameUuid);
+                            [this, anchor = shared_from_this()]() {
+                                doRenameOperation(_doc, &_skipCompact, &_ecocRenameUuid);
                                 stdx::unique_lock ul{_docMutex};
                                 _doc.setSkipCompact(_skipCompact);
                                 _doc.setEcocRenameUuid(_ecocRenameUuid);
                             }))
-        .then(_executePhase(Phase::kCompactStructuredEncryptionData,
-                            [this, anchor = shared_from_this()](const auto& state) {
-                                _response = doCompactOperation(state);
-                            }))
-        .then(_executePhase(Phase::kDropTempCollection, doDropOperation));
+        .then(_executePhase(
+            Phase::kCompactStructuredEncryptionData,
+            [this, anchor = shared_from_this()]() { _response = doCompactOperation(_doc); }))
+        .then(_executePhase(Phase::kDropTempCollection,
+                            [this, anchor = shared_from_this()] { doDropOperation(_doc); }));
 }
 
 }  // namespace mongo
diff --git a/src/mongo/db/s/compact_structured_encryption_data_coordinator.h b/src/mongo/db/s/compact_structured_encryption_data_coordinator.h
index 4b8ffd33441..b030e19910a 100644
--- a/src/mongo/db/s/compact_structured_encryption_data_coordinator.h
+++ b/src/mongo/db/s/compact_structured_encryption_data_coordinator.h
@@ -40,7 +40,9 @@
 
 namespace mongo {
 
-class CompactStructuredEncryptionDataCoordinator final : public ShardingDDLCoordinator {
+class CompactStructuredEncryptionDataCoordinator final
+    : public RecoverableShardingDDLCoordinator<CompactStructuredEncryptionDataState,
+                                               CompactStructuredEncryptionDataPhaseEnum> {
 public:
     static constexpr auto kStateContext = "CompactStructuredEncryptionDataState"_sd;
     using StateDoc = CompactStructuredEncryptionDataState;
@@ -48,7 +50,8 @@ public:
 
     CompactStructuredEncryptionDataCoordinator(ShardingDDLCoordinatorService* service,
                                                const BSONObj& doc)
-        : ShardingDDLCoordinator(service, doc), _doc(StateDoc::parse({kStateContext}, doc)) {}
+        : RecoverableShardingDDLCoordinator(
+              service, "CompactStructuredEncryptionDataCoordinator", doc) {}
 
     boost::optional<BSONObj> reportForCurrentOp(
         MongoProcessInterface::CurrentOpConnectionsMode connMode,
@@ -63,36 +66,14 @@ public:
     void checkIfOptionsConflict(const BSONObj& doc) const final {}
 
 private:
-    void _enterPhase(Phase newPhase);
-
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-            if (currPhase > newPhase) {
-                return;
-            }
-            if (currPhase < newPhase) {
-                _enterPhase(newPhase);
-            }
-
-            return func(_doc);
-        };
-    }
-
-private:
-    ShardingDDLCoordinatorMetadata const& metadata() const final {
-        return _doc.getShardingDDLCoordinatorMetadata();
+    StringData serializePhase(const Phase& phase) const override {
+        return CompactStructuredEncryptionDataPhase_serializer(phase);
     }
 
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept final;
 
 private:
-    mutable Mutex _docMutex =
-        MONGO_MAKE_LATCH("CompactStructuredEncryptionDataCoordinator::_docMutex");
-    StateDoc _doc;
-
     boost::optional<CompactStructuredEncryptionDataCommandReply> _response;
     bool _skipCompact{false};
     boost::optional<UUID> _ecocRenameUuid;
diff --git a/src/mongo/db/s/config/config_server_test_fixture.cpp b/src/mongo/db/s/config/config_server_test_fixture.cpp
index 198371cf17a..d697528a86d 100644
--- a/src/mongo/db/s/config/config_server_test_fixture.cpp
+++ b/src/mongo/db/s/config/config_server_test_fixture.cpp
@@ -70,7 +70,6 @@
 #include "mongo/s/config_server_catalog_cache_loader.h"
 #include "mongo/s/database_version.h"
 #include "mongo/s/query/cluster_cursor_manager.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
 #include "mongo/s/shard_id.h"
 #include "mongo/s/write_ops/batched_command_response.h"
 #include "mongo/util/clock_source_mock.h"
@@ -452,30 +451,6 @@ std::vector<KeysCollectionDocument> ConfigServerTestFixture::getKeys(OperationCo
     return keys;
 }
 
-void ConfigServerTestFixture::expectSetShardVersion(
-    const HostAndPort& expectedHost,
-    const ShardType& expectedShard,
-    const NamespaceString& expectedNs,
-    boost::optional<ChunkVersion> expectedChunkVersion) {
-    onCommand([&](const RemoteCommandRequest& request) {
-        ASSERT_EQ(expectedHost, request.target);
-        ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(),
-                          rpc::TrackingMetadata::removeTrackingData(request.metadata));
-
-        SetShardVersionRequest ssv =
-            assertGet(SetShardVersionRequest::parseFromBSON(request.cmdObj));
-
-        ASSERT(ssv.isAuthoritative());
-        ASSERT_EQ(expectedNs.toString(), ssv.getNS().ns());
-
-        if (expectedChunkVersion) {
-            ASSERT_EQ(*expectedChunkVersion, ssv.getNSVersion());
-        }
-
-        return BSON("ok" << true);
-    });
-}
-
 void ConfigServerTestFixture::setupOpObservers() {
     auto opObserverRegistry =
         checked_cast<OpObserverRegistry*>(getServiceContext()->getOpObserver());
diff --git a/src/mongo/db/s/config/config_server_test_fixture.h b/src/mongo/db/s/config/config_server_test_fixture.h
index 05ed2b55a67..bd2a41b41a0 100644
--- a/src/mongo/db/s/config/config_server_test_fixture.h
+++ b/src/mongo/db/s/config/config_server_test_fixture.h
@@ -166,17 +166,6 @@ protected:
     StatusWith<std::vector<BSONObj>> getIndexes(OperationContext* opCtx, const NamespaceString& ns);
 
     /**
-     * Expects a setShardVersion command to be executed on the specified shard.
-     *
-     * The expectedChunkVersion is optional, because in some cases it may not be possible to know
-     * the OID of a ChunkVersion generated by some internal code. (See SERVER-29451).
-     */
-    void expectSetShardVersion(const HostAndPort& expectedHost,
-                               const ShardType& expectedShard,
-                               const NamespaceString& expectedNs,
-                               boost::optional<ChunkVersion> expectedChunkVersion);
-
-    /**
      * Returns the stored raw pointer to the addShard TaskExecutor's NetworkInterface.
      */
     executor::NetworkInterfaceMock* networkForAddShard() const;
diff --git a/src/mongo/db/s/config/configsvr_collmod_command.cpp b/src/mongo/db/s/config/configsvr_collmod_command.cpp
index e4bda1b9995..6d224756002 100644
--- a/src/mongo/db/s/config/configsvr_collmod_command.cpp
+++ b/src/mongo/db/s/config/configsvr_collmod_command.cpp
@@ -66,6 +66,10 @@ public:
         return Command::AllowedOnSecondary::kNever;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBase {
     public:
         using InvocationBase::InvocationBase;
diff --git a/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp b/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
index 9dcff9c96d0..a50f499662f 100644
--- a/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
+++ b/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
@@ -31,12 +31,14 @@
 #include "mongo/platform/basic.h"
 
 #include "mongo/base/status_with.h"
+#include "mongo/bson/util/bson_extract.h"
 #include "mongo/db/auth/authorization_session.h"
 #include "mongo/db/commands.h"
 #include "mongo/db/concurrency/d_concurrency.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/repl/read_concern_args.h"
 #include "mongo/db/s/chunk_move_write_concern_options.h"
+#include "mongo/db/s/commit_chunk_migration_gen.h"
 #include "mongo/db/s/config/sharding_catalog_manager.h"
 #include "mongo/db/s/sharding_state.h"
 #include "mongo/rpc/get_status_from_command_result.h"
@@ -44,7 +46,6 @@
 #include "mongo/s/chunk_version.h"
 #include "mongo/s/client/shard_registry.h"
 #include "mongo/s/grid.h"
-#include "mongo/s/request_types/commit_chunk_migration_request_type.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
@@ -79,9 +80,23 @@ namespace {
  * }
  *
  */
-class ConfigSvrCommitChunkMigrationCommand : public BasicCommand {
+
+
+ChunkType toChunkType(const MigratedChunkType& migratedChunk) {
+
+    ChunkType chunk;
+    chunk.setMin(migratedChunk.getMin());
+    chunk.setMax(migratedChunk.getMax());
+    chunk.setVersion(migratedChunk.getLastmod());
+    return chunk;
+}
+
+
+class ConfigSvrCommitChunkMigrationCommand
+    : public TypedCommand<ConfigSvrCommitChunkMigrationCommand> {
 public:
-    ConfigSvrCommitChunkMigrationCommand() : BasicCommand("_configsvrCommitChunkMigration") {}
+    using Request = CommitChunkMigrationRequest;
+    using Response = ConfigSvrCommitChunkMigrationResponse;
 
     bool skipApiVersionCheck() const override {
         // Internal command (server to server).
@@ -100,51 +115,57 @@ public:
         return true;
     }
 
-    virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
-        return true;
-    }
+    class Invocation : public InvocationBase {
+    public:
+        using InvocationBase::InvocationBase;
+
+        ConfigSvrCommitChunkMigrationResponse typedRun(OperationContext* opCtx) {
+
+            uassert(ErrorCodes::IllegalOperation,
+                    "_configsvrClearJumboFlag can only be run on config servers",
+                    serverGlobalParams.clusterRole == ClusterRole::ConfigServer);
+
+            // Set the operation context read concern level to local for reads into the config
+            // database.
+            repl::ReadConcernArgs::get(opCtx) =
+                repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern);
+
+            const NamespaceString nss = ns();
+            auto migratedChunk = toChunkType(request().getMigratedChunk());
 
-    Status checkAuthForCommand(Client* client,
-                               const std::string& dbname,
-                               const BSONObj& cmdObj) const override {
-        if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
-                ResourcePattern::forClusterResource(), ActionType::internal)) {
-            return Status(ErrorCodes::Unauthorized, "Unauthorized");
+            StatusWith<BSONObj> chunkVersionResponse =
+                ShardingCatalogManager::get(opCtx)->commitChunkMigration(
+                    opCtx,
+                    nss,
+                    migratedChunk,
+                    request().getFromShardCollectionVersion().epoch(),
+                    request().getFromShardCollectionVersion().getTimestamp(),
+                    request().getFromShard(),
+                    request().getToShard(),
+                    request().getValidAfter());
+
+            auto chunkVersionObj = uassertStatusOK(chunkVersionResponse);
+
+            return Response{ChunkVersion::parse(chunkVersionObj[ChunkVersion::kShardVersionField])};
         }
-        return Status::OK();
-    }
 
-    std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override {
-        return CommandHelpers::parseNsFullyQualified(cmdObj);
-    }
+    private:
+        bool supportsWriteConcern() const override {
+            return true;
+        }
 
-    bool run(OperationContext* opCtx,
-             const std::string& dbName,
-             const BSONObj& cmdObj,
-             BSONObjBuilder& result) override {
-
-        // Set the operation context read concern level to local for reads into the config database.
-        repl::ReadConcernArgs::get(opCtx) =
-            repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern);
-
-        const NamespaceString nss = NamespaceString(parseNs(dbName, cmdObj));
-
-        auto commitRequest =
-            uassertStatusOK(CommitChunkMigrationRequest::createFromCommand(nss, cmdObj));
-
-        StatusWith<BSONObj> response = ShardingCatalogManager::get(opCtx)->commitChunkMigration(
-            opCtx,
-            nss,
-            commitRequest.getMigratedChunk(),
-            commitRequest.getCollectionEpoch(),
-            commitRequest.getCollectionTimestamp(),
-            commitRequest.getFromShard(),
-            commitRequest.getToShard(),
-            commitRequest.getValidAfter());
-        uassertStatusOK(response.getStatus());
-        result.appendElements(response.getValue());
-        return true;
-    }
+        NamespaceString ns() const override {
+            return request().getCommandParameter();
+        }
+
+        void doCheckAuthorization(OperationContext* opCtx) const override {
+            uassert(ErrorCodes::Unauthorized,
+                    "Unauthorized",
+                    AuthorizationSession::get(opCtx->getClient())
+                        ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(),
+                                                           ActionType::internal));
+        }
+    };
 
 } configsvrCommitChunkMigrationCommand;
 
diff --git a/src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp b/src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp
index 136af191f6d..8769cbe9b53 100644
--- a/src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp
+++ b/src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp
@@ -66,11 +66,6 @@ public:
                     str::stream() << Request::kCommandName << " can only be run on config servers",
                     serverGlobalParams.clusterRole == ClusterRole::ConfigServer);
 
-            uassert(8423309,
-                    str::stream() << Request::kCommandName << " command not supported",
-                    mongo::feature_flags::gPerCollBalancingSettings.isEnabled(
-                        serverGlobalParams.featureCompatibility));
-
             const NamespaceString& nss = ns();
 
             uassert(ErrorCodes::InvalidNamespace,
diff --git a/src/mongo/db/s/config/configsvr_merge_chunks_command.cpp b/src/mongo/db/s/config/configsvr_merge_chunks_command.cpp
index ea2823dcdf0..db155fa6bea 100644
--- a/src/mongo/db/s/config/configsvr_merge_chunks_command.cpp
+++ b/src/mongo/db/s/config/configsvr_merge_chunks_command.cpp
@@ -96,8 +96,8 @@ public:
                                                                       request().getChunkRange(),
                                                                       request().getShard(),
                                                                       request().getValidAfter()));
-            return ConfigSvrMergeResponse{ChunkVersion::fromBSONPositionalOrNewerFormat(
-                shardAndCollVers[ChunkVersion::kShardVersionField])};
+            return ConfigSvrMergeResponse{
+                ChunkVersion::parse(shardAndCollVers[ChunkVersion::kShardVersionField])};
         }
 
     private:
diff --git a/src/mongo/db/s/config/configsvr_move_chunk_command.cpp b/src/mongo/db/s/config/configsvr_move_chunk_command.cpp
index 41d1679b4c9..cfa02c94711 100644
--- a/src/mongo/db/s/config/configsvr_move_chunk_command.cpp
+++ b/src/mongo/db/s/config/configsvr_move_chunk_command.cpp
@@ -96,20 +96,10 @@ public:
         repl::ReadConcernArgs::get(opCtx) =
             repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern);
 
-        auto request = uassertStatusOK(
-            BalanceChunkRequest::parseFromConfigCommand(cmdObj, false /* requireUUID */));
+        auto request = uassertStatusOK(BalanceChunkRequest::parseFromConfigCommand(cmdObj));
 
         const auto& nss = request.getNss();
 
-        // In case of mixed binaries including v5.0, the collection UUID field may not be attached
-        // to the chunk.
-        if (!request.getChunk().hasCollectionUUID_UNSAFE()) {
-            // TODO (SERVER-60792): Remove the following logic after v6.0 branches out.
-            const auto& collection = Grid::get(opCtx)->catalogClient()->getCollection(
-                opCtx, nss, repl::ReadConcernLevel::kLocalReadConcern);
-            request.setCollectionUUID(collection.getUuid());  // Set collection UUID on chunk member
-        }
-
         if (request.hasToShardId()) {
             uassertStatusOK(Balancer::get(opCtx)->moveSingleChunk(opCtx,
                                                                   nss,
diff --git a/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp b/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp
index c6ceb8a4ca2..da6ec5ed2b9 100644
--- a/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp
+++ b/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp
@@ -149,6 +149,10 @@ public:
     AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
         return AllowedOnSecondary::kNever;
     }
+
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
 } configsvrRemoveChunksCmd;
 
 }  // namespace
diff --git a/src/mongo/db/s/config/configsvr_remove_tags_command.cpp b/src/mongo/db/s/config/configsvr_remove_tags_command.cpp
index f880d9be4bf..7333b0036dc 100644
--- a/src/mongo/db/s/config/configsvr_remove_tags_command.cpp
+++ b/src/mongo/db/s/config/configsvr_remove_tags_command.cpp
@@ -144,6 +144,10 @@ public:
     AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
         return AllowedOnSecondary::kNever;
     }
+
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
 } configsvrRemoveTagsCmd;
 
 }  // namespace
diff --git a/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp b/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp
index 438a7d3227a..df59c5135ea 100644
--- a/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp
+++ b/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp
@@ -68,6 +68,10 @@ public:
         return AllowedOnSecondary::kNever;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBase {
     public:
         using InvocationBase::InvocationBase;
diff --git a/src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp b/src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp
index 7f284e2c642..1a094c7db5f 100644
--- a/src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp
+++ b/src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp
@@ -63,8 +63,9 @@ getExistingInstanceToJoin(OperationContext* opCtx,
                           const NamespaceString& nss,
                           const BSONObj& newShardKey) {
     auto instances =
-        getReshardingStateMachines<ReshardingCoordinatorService,
-                                   ReshardingCoordinatorService::ReshardingCoordinator>(opCtx, nss);
+        resharding::getReshardingStateMachines<ReshardingCoordinatorService,
+                                               ReshardingCoordinatorService::ReshardingCoordinator>(
+            opCtx, nss);
     for (const auto& instance : instances) {
         if (SimpleBSONObjComparator::kInstance.evaluate(
                 instance->getMetadata().getReshardingKey().toBSON() == newShardKey)) {
@@ -139,7 +140,7 @@ public:
                         "Must specify only one of _presetReshardedChunks or numInitialChunks",
                         !(bool(request().getNumInitialChunks())));
 
-                validateReshardedChunks(
+                resharding::validateReshardedChunks(
                     *presetChunks, opCtx, ShardKeyPattern(request().getKey()).getKeyPattern());
             }
 
@@ -183,11 +184,12 @@ public:
                     return boost::none;
                 }
 
-                auto tempReshardingNss = constructTemporaryReshardingNss(nss.db(), cm.getUUID());
+                auto tempReshardingNss =
+                    resharding::constructTemporaryReshardingNss(nss.db(), cm.getUUID());
 
 
                 if (auto zones = request().getZones()) {
-                    checkForOverlappingZones(*zones);
+                    resharding::checkForOverlappingZones(*zones);
                 }
 
                 auto coordinatorDoc =
diff --git a/src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp b/src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp
index 31a20120586..3b2a6c883df 100644
--- a/src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp
+++ b/src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp
@@ -62,12 +62,6 @@ public:
                     serverGlobalParams.clusterRole == ClusterRole::ConfigServer);
 
             const auto coordinatorCompletionFuture = [&]() -> SharedSemiFuture<void> {
-                FixedFCVRegion fcvRegion(opCtx);
-                uassert(ErrorCodes::IllegalOperation,
-                        "featureFlagClusterWideConfig not enabled",
-                        gFeatureFlagClusterWideConfig.isEnabled(
-                            serverGlobalParams.featureCompatibility));
-
                 // Validate parameter before creating coordinator.
                 {
                     BSONObj cmdParamObj = request().getCommandParameter();
diff --git a/src/mongo/db/s/config/initial_split_policy.cpp b/src/mongo/db/s/config/initial_split_policy.cpp
index 1be2dd486fb..0b2ab1b0474 100644
--- a/src/mongo/db/s/config/initial_split_policy.cpp
+++ b/src/mongo/db/s/config/initial_split_policy.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/s/config/initial_split_policy.h"
 
 #include "mongo/client/read_preference.h"
@@ -50,7 +47,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 namespace {
 
@@ -73,17 +69,11 @@ void appendChunk(const SplitPolicyParams& params,
                  const BSONObj& min,
                  const BSONObj& max,
                  ChunkVersion* version,
-                 const Timestamp& creationTimestamp,
                  const ShardId& shardId,
                  std::vector<ChunkType>* chunks) {
-    chunks->emplace_back(
-        params.collectionUUID,
-        ChunkRange(min, max),
-        ChunkVersion(
-            version->majorVersion(), version->minorVersion(), version->epoch(), creationTimestamp),
-        shardId);
+    chunks->emplace_back(params.collectionUUID, ChunkRange(min, max), *version, shardId);
     auto& chunk = chunks->back();
-    chunk.setHistory({ChunkHistory(creationTimestamp, shardId)});
+    chunk.setHistory({ChunkHistory(version->getTimestamp(), shardId)});
     version->incMinor();
 }
 
@@ -238,7 +228,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::generateShardColle
         finalSplitPoints.push_back(splitPoint);
     }
 
-    ChunkVersion version(1, 0, OID::gen(), validAfter);
+    ChunkVersion version({OID::gen(), validAfter}, {1, 0});
     const auto& keyPattern(shardKeyPattern.getKeyPattern());
 
     std::vector<ChunkType> chunks;
@@ -254,7 +244,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::generateShardColle
             ? params.primaryShardId
             : allShardIds[(i / numContiguousChunksPerShard) % allShardIds.size()];
 
-        appendChunk(params, min, max, &version, validAfter, shardId, &chunks);
+        appendChunk(params, min, max, &version, shardId, &chunks);
     }
 
     return {std::move(chunks)};
@@ -327,14 +317,13 @@ InitialSplitPolicy::ShardCollectionConfig SingleChunkOnPrimarySplitPolicy::creat
     const auto currentTime = VectorClock::get(opCtx)->getTime();
     const auto validAfter = currentTime.clusterTime().asTimestamp();
 
-    ChunkVersion version(1, 0, OID::gen(), validAfter);
+    ChunkVersion version({OID::gen(), validAfter}, {1, 0});
     const auto& keyPattern = shardKeyPattern.getKeyPattern();
     std::vector<ChunkType> chunks;
     appendChunk(params,
                 keyPattern.globalMin(),
                 keyPattern.globalMax(),
                 &version,
-                validAfter,
                 params.primaryShardId,
                 &chunks);
 
@@ -421,19 +410,14 @@ InitialSplitPolicy::ShardCollectionConfig AbstractTagsBasedSplitPolicy::createFi
         return shardIds[indx++ % shardIds.size()];
     };
 
-    ChunkVersion version(1, 0, OID::gen(), validAfter);
+    ChunkVersion version({OID::gen(), validAfter}, {1, 0});
     auto lastChunkMax = keyPattern.globalMin();
     std::vector<ChunkType> chunks;
     for (const auto& tag : _tags) {
         // Create a chunk for the hole [lastChunkMax, tag.getMinKey)
         if (tag.getMinKey().woCompare(lastChunkMax) > 0) {
-            appendChunk(params,
-                        lastChunkMax,
-                        tag.getMinKey(),
-                        &version,
-                        validAfter,
-                        nextShardIdForHole(),
-                        &chunks);
+            appendChunk(
+                params, lastChunkMax, tag.getMinKey(), &version, nextShardIdForHole(), &chunks);
         }
         // Create chunk for the actual tag - [tag.getMinKey, tag.getMaxKey)
         const auto it = tagToShards.find(tag.getTag());
@@ -470,7 +454,7 @@ InitialSplitPolicy::ShardCollectionConfig AbstractTagsBasedSplitPolicy::createFi
                 const BSONObj max = (splitPointIdx == splitInfo.splitPoints.size())
                     ? tag.getMaxKey()
                     : splitInfo.splitPoints[splitPointIdx];
-                appendChunk(params, min, max, &version, validAfter, targetShard, &chunks);
+                appendChunk(params, min, max, &version, targetShard, &chunks);
             }
         }
         lastChunkMax = tag.getMaxKey();
@@ -478,13 +462,8 @@ InitialSplitPolicy::ShardCollectionConfig AbstractTagsBasedSplitPolicy::createFi
 
     // Create a chunk for the hole [lastChunkMax, MaxKey]
     if (lastChunkMax.woCompare(keyPattern.globalMax()) < 0) {
-        appendChunk(params,
-                    lastChunkMax,
-                    keyPattern.globalMax(),
-                    &version,
-                    validAfter,
-                    nextShardIdForHole(),
-                    &chunks);
+        appendChunk(
+            params, lastChunkMax, keyPattern.globalMax(), &version, nextShardIdForHole(), &chunks);
     }
 
     return {std::move(chunks)};
@@ -765,13 +744,13 @@ InitialSplitPolicy::ShardCollectionConfig ReshardingSplitPolicy::createFirstChun
     const auto currentTime = VectorClock::get(opCtx)->getTime();
     const auto validAfter = currentTime.clusterTime().asTimestamp();
 
-    ChunkVersion version(1, 0, OID::gen(), validAfter);
+    ChunkVersion version({OID::gen(), validAfter}, {1, 0});
 
     splitPoints.insert(keyPattern.globalMax());
     for (const auto& splitPoint : splitPoints) {
         auto bestShard = selectBestShard(
             chunkDistribution, zoneInfo, zoneToShardMap, {lastChunkMax, splitPoint});
-        appendChunk(params, lastChunkMax, splitPoint, &version, validAfter, bestShard, &chunks);
+        appendChunk(params, lastChunkMax, splitPoint, &version, bestShard, &chunks);
 
         lastChunkMax = splitPoint;
         chunkDistribution[bestShard]++;
diff --git a/src/mongo/db/s/config/initial_split_policy_test.cpp b/src/mongo/db/s/config/initial_split_policy_test.cpp
index 2eea0b6905f..9fc9a5576d0 100644
--- a/src/mongo/db/s/config/initial_split_policy_test.cpp
+++ b/src/mongo/db/s/config/initial_split_policy_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/bson/json.h"
 #include "mongo/db/s/config/config_server_test_fixture.h"
 #include "mongo/db/s/config/initial_split_policy.h"
@@ -40,7 +37,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -208,7 +204,7 @@ public:
         std::vector<ChunkType> chunks;
 
         for (unsigned long i = 0; i < chunkRanges.size(); ++i) {
-            ChunkVersion version(1, 0, OID::gen(), Timestamp(1, 1));
+            ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 0});
             ChunkType chunk(_uuid, chunkRanges[i], version, shardIds[i]);
             chunk.setHistory({ChunkHistory(timeStamp, shardIds[i])});
             chunks.push_back(chunk);
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
index 0a45a9d3a6d..bfef69bcb9f 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
@@ -175,8 +175,6 @@ protected:
     }
 
     void expectClusterParametersRequest(const HostAndPort& target) {
-        if (!gFeatureFlagClusterWideConfig.isEnabled(serverGlobalParams.featureCompatibility))
-            return;
         auto clusterParameterDocs = uassertStatusOK(getConfigShard()->exhaustiveFindOnConfig(
             operationContext(),
             ReadPreferenceSetting(ReadPreference::PrimaryOnly),
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp
index a4abd0ff45b..fbb502f933b 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/concurrency/exception_util.h"
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/logical_session_cache_noop.h"
@@ -43,7 +41,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -121,17 +118,17 @@ TEST_F(ShardingCatalogManagerBumpCollectionVersionAndChangeMetadataTest,
     const auto collUUID = UUID::gen();
 
     const auto shard0Chunk0 = generateChunkType(collUUID,
-                                                ChunkVersion(10, 1, collEpoch, collTimestamp),
+                                                ChunkVersion({collEpoch, collTimestamp}, {10, 1}),
                                                 kShard0.getName(),
                                                 BSON("a" << 1),
                                                 BSON("a" << 10));
     const auto shard0Chunk1 = generateChunkType(collUUID,
-                                                ChunkVersion(11, 2, collEpoch, collTimestamp),
+                                                ChunkVersion({collEpoch, collTimestamp}, {11, 2}),
                                                 kShard0.getName(),
                                                 BSON("a" << 11),
                                                 BSON("a" << 20));
     const auto shard1Chunk0 = generateChunkType(collUUID,
-                                                ChunkVersion(8, 1, collEpoch, collTimestamp),
+                                                ChunkVersion({collEpoch, collTimestamp}, {8, 1}),
                                                 kShard1.getName(),
                                                 BSON("a" << 21),
                                                 BSON("a" << 100));
@@ -157,7 +154,7 @@ TEST_F(ShardingCatalogManagerBumpCollectionVersionAndChangeMetadataTest, NoChunk
     const auto collUUID = UUID::gen();
 
     const auto shard0Chunk0 = generateChunkType(collUUID,
-                                                ChunkVersion(10, 1, collEpoch, collTimestamp),
+                                                ChunkVersion({collEpoch, collTimestamp}, {10, 1}),
                                                 kShard0.getName(),
                                                 BSON("a" << 1),
                                                 BSON("a" << 10));
@@ -182,12 +179,12 @@ TEST_F(ShardingCatalogManagerBumpCollectionVersionAndChangeMetadataTest,
     const auto collUUID = UUID::gen();
 
     const auto shard0Chunk0 = generateChunkType(collUUID,
-                                                ChunkVersion(10, 1, collEpoch, collTimestamp),
+                                                ChunkVersion({collEpoch, collTimestamp}, {10, 1}),
                                                 kShard0.getName(),
                                                 BSON("a" << 1),
                                                 BSON("a" << 10));
     const auto shard1Chunk0 = generateChunkType(collUUID,
-                                                ChunkVersion(11, 2, collEpoch, collTimestamp),
+                                                ChunkVersion({collEpoch, collTimestamp}, {11, 2}),
                                                 kShard1.getName(),
                                                 BSON("a" << 11),
                                                 BSON("a" << 20));
@@ -244,12 +241,12 @@ TEST_F(ShardingCatalogManagerBumpCollectionVersionAndChangeMetadataTest,
     const auto collUUID = UUID::gen();
 
     const auto shard0Chunk0 = generateChunkType(collUUID,
-                                                ChunkVersion(10, 1, collEpoch, collTimestamp),
+                                                ChunkVersion({collEpoch, collTimestamp}, {10, 1}),
                                                 kShard0.getName(),
                                                 BSON("a" << 1),
                                                 BSON("a" << 10));
     const auto shard1Chunk0 = generateChunkType(collUUID,
-                                                ChunkVersion(11, 2, collEpoch, collTimestamp),
+                                                ChunkVersion({collEpoch, collTimestamp}, {11, 2}),
                                                 kShard1.getName(),
                                                 BSON("a" << 11),
                                                 BSON("a" << 20));
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
index 0cf64cc5288..f461f1ae0a5 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/s/config/sharding_catalog_manager.h"
 
 #include "mongo/base/status_with.h"
@@ -67,7 +64,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 namespace {
 
@@ -87,7 +83,7 @@ void appendShortVersion(BufBuilder* out, const ChunkType& chunk) {
     bb.append(ChunkType::min(), chunk.getMin());
     bb.append(ChunkType::max(), chunk.getMax());
     if (chunk.isVersionSet()) {
-        chunk.getVersion().appendLegacyWithField(&bb, ChunkType::lastmod());
+        chunk.getVersion().serializeToBSON(ChunkType::lastmod(), &bb);
     }
     bb.done();
 }
@@ -268,7 +264,8 @@ ChunkVersion getShardVersion(OperationContext* opCtx,
         if (swDonorShardVersion.getStatus().code() == 50577) {
             // The query to find 'nss' chunks belonging to the donor shard didn't return any chunks,
             // meaning the last chunk for fromShard was donated. Gracefully handle the error.
-            return ChunkVersion(0, 0, collectionVersion.epoch(), collectionVersion.getTimestamp());
+            return ChunkVersion({collectionVersion.epoch(), collectionVersion.getTimestamp()},
+                                {0, 0});
         } else {
             // Bubble up any other error
             uassertStatusOK(swDonorShardVersion);
@@ -391,10 +388,9 @@ void ShardingCatalogManager::bumpMajorVersionOneChunkPerShard(
     TxnNumber txnNumber,
     const std::vector<ShardId>& shardIds) {
     auto curCollectionVersion = uassertStatusOK(getCollectionVersion(opCtx, nss));
-    ChunkVersion targetChunkVersion(curCollectionVersion.majorVersion() + 1,
-                                    0,
-                                    curCollectionVersion.epoch(),
-                                    curCollectionVersion.getTimestamp());
+    ChunkVersion targetChunkVersion(
+        {curCollectionVersion.epoch(), curCollectionVersion.getTimestamp()},
+        {curCollectionVersion.majorVersion() + 1, 0});
 
     auto const configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
     auto findCollResponse = uassertStatusOK(
@@ -684,7 +680,7 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkSplit(
         BSONObjBuilder b(logDetail.subobjStart("before"));
         b.append(ChunkType::min(), range.getMin());
         b.append(ChunkType::max(), range.getMax());
-        collVersion.appendLegacyWithField(&b, ChunkType::lastmod());
+        collVersion.serializeToBSON(ChunkType::lastmod(), &b);
     }
 
     if (splitChunkResult.newChunks->size() == 2) {
@@ -960,8 +956,8 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunksMerge(
             b.append(chunkToMerge.toConfigBSON());
         }
     }
-    initialVersion.appendLegacyWithField(&logDetail, "prevShardVersion");
-    mergeVersion.appendLegacyWithField(&logDetail, "mergedVersion");
+    initialVersion.serializeToBSON("prevShardVersion", &logDetail);
+    mergeVersion.serializeToBSON("mergedVersion", &logDetail);
     logDetail.append("owningShard", shardId);
 
     ShardingLogging::get(opCtx)->logChange(
@@ -1127,10 +1123,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
     newMigratedChunk->setMin(migratedChunk.getMin());
     newMigratedChunk->setMax(migratedChunk.getMax());
     newMigratedChunk->setShard(toShard);
-    newMigratedChunk->setVersion(ChunkVersion(currentCollectionVersion.majorVersion() + 1,
-                                              minVersionIncrement++,
-                                              currentCollectionVersion.epoch(),
-                                              currentCollectionVersion.getTimestamp()));
+    newMigratedChunk->setVersion(
+        ChunkVersion({currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp()},
+                     {currentCollectionVersion.majorVersion() + 1, minVersionIncrement++}));
 
     // Copy the complete history.
     auto newHistory = currentChunk.getHistory();
@@ -1186,10 +1181,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
             ChunkType leftSplitChunk = currentChunk;
             leftSplitChunk.setName(OID::gen());
             leftSplitChunk.setMax(movedChunkMin);
-            leftSplitChunk.setVersion(ChunkVersion(movedChunkVersion.majorVersion(),
-                                                   minVersionIncrement++,
-                                                   movedChunkVersion.epoch(),
-                                                   movedChunkVersion.getTimestamp()));
+            leftSplitChunk.setVersion(
+                ChunkVersion({movedChunkVersion.epoch(), movedChunkVersion.getTimestamp()},
+                             {movedChunkVersion.majorVersion(), minVersionIncrement++}));
             newSplitChunks->emplace_back(std::move(leftSplitChunk));
         }
 
@@ -1199,10 +1193,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
             ChunkType rightSplitChunk = currentChunk;
             rightSplitChunk.setName(OID::gen());
             rightSplitChunk.setMin(movedChunkMax);
-            rightSplitChunk.setVersion(ChunkVersion(movedChunkVersion.majorVersion(),
-                                                    minVersionIncrement++,
-                                                    movedChunkVersion.epoch(),
-                                                    movedChunkVersion.getTimestamp()));
+            rightSplitChunk.setVersion(
+                ChunkVersion({movedChunkVersion.epoch(), movedChunkVersion.getTimestamp()},
+                             {movedChunkVersion.majorVersion(), minVersionIncrement++}));
             newSplitChunks->emplace_back(std::move(rightSplitChunk));
         }
     }
@@ -1218,10 +1211,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
 
         newControlChunk = std::make_shared<ChunkType>(origControlChunk);
         // Setting control chunk's minor version to 1 on the donor shard.
-        newControlChunk->setVersion(ChunkVersion(currentCollectionVersion.majorVersion() + 1,
-                                                 minVersionIncrement++,
-                                                 currentCollectionVersion.epoch(),
-                                                 currentCollectionVersion.getTimestamp()));
+        newControlChunk->setVersion(ChunkVersion(
+            {currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp()},
+            {currentCollectionVersion.majorVersion() + 1, minVersionIncrement++}));
     }
 
     _commitChunkMigrationInTransaction(
@@ -1232,7 +1224,7 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
         // We migrated the last chunk from the donor shard.
         newMigratedChunk->getVersion().serializeToBSON(kCollectionVersionField, &response);
         const ChunkVersion donorShardVersion(
-            0, 0, currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp());
+            {currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp()}, {0, 0});
         donorShardVersion.serializeToBSON(ChunkVersion::kShardVersionField, &response);
     } else {
         newControlChunk->getVersion().serializeToBSON(kCollectionVersionField, &response);
@@ -1349,8 +1341,8 @@ void ShardingCatalogManager::upgradeChunksHistory(OperationContext* opCtx,
     }();
 
     // Bump the major version in order to be guaranteed to trigger refresh on every shard
-    ChunkVersion newCollectionVersion(
-        collVersion.majorVersion() + 1, 0, collVersion.epoch(), collVersion.getTimestamp());
+    ChunkVersion newCollectionVersion({collVersion.epoch(), collVersion.getTimestamp()},
+                                      {collVersion.majorVersion() + 1, 0});
     std::set<ShardId> changedShardIds;
     for (const auto& chunk : allChunksVector) {
         auto upgradeChunk = uassertStatusOK(
@@ -1491,10 +1483,9 @@ void ShardingCatalogManager::clearJumboFlag(OperationContext* opCtx,
                           << chunk.toString() << ").",
             currentCollectionVersion.epoch() == collectionEpoch);
 
-    ChunkVersion newVersion(currentCollectionVersion.majorVersion() + 1,
-                            0,
-                            currentCollectionVersion.epoch(),
-                            currentCollectionVersion.getTimestamp());
+    ChunkVersion newVersion(
+        {currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp()},
+        {currentCollectionVersion.majorVersion() + 1, 0});
 
     BSONObj chunkQuery(BSON(ChunkType::min(chunk.getMin())
                             << ChunkType::max(chunk.getMax()) << ChunkType::collectionUUID
@@ -1653,8 +1644,8 @@ void ShardingCatalogManager::ensureChunkVersionIsGreaterThan(OperationContext* o
     // Generate a new version for the chunk by incrementing the collectionVersion's major
     // version.
     auto newChunk = matchingChunk;
-    newChunk.setVersion(ChunkVersion(
-        highestChunk.getVersion().majorVersion() + 1, 0, coll.getEpoch(), coll.getTimestamp()));
+    newChunk.setVersion(ChunkVersion({coll.getEpoch(), coll.getTimestamp()},
+                                     {highestChunk.getVersion().majorVersion() + 1, 0}));
 
     // Update the chunk, if it still exists, to have the bumped version.
     earlyReturnBeforeDoingWriteGuard.dismiss();
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp
index 762961eaac3..9f883997a3d 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/client/read_preference.h"
@@ -72,7 +70,7 @@ protected:
         ChunkType chunk;
         chunk.setName(OID::gen());
         chunk.setCollectionUUID(collUuid);
-        chunk.setVersion({12, 7, epoch, timestamp});
+        chunk.setVersion(ChunkVersion({epoch, timestamp}, {12, 7}));
         chunk.setShard(_shardName);
         chunk.setMin(jumboChunk().getMin());
         chunk.setMax(jumboChunk().getMax());
@@ -81,7 +79,7 @@ protected:
         ChunkType otherChunk;
         otherChunk.setName(OID::gen());
         otherChunk.setCollectionUUID(collUuid);
-        otherChunk.setVersion({14, 7, epoch, timestamp});
+        otherChunk.setVersion(ChunkVersion({epoch, timestamp}, {14, 7}));
         otherChunk.setShard(_shardName);
         otherChunk.setMin(nonJumboChunk().getMin());
         otherChunk.setMax(nonJumboChunk().getMax());
@@ -107,7 +105,7 @@ TEST_F(ClearJumboFlagTest, ClearJumboShouldBumpVersion) {
             operationContext(), collUuid, jumboChunk().getMin(), collEpoch, collTimestamp));
         ASSERT_FALSE(chunkDoc.getJumbo());
         auto chunkVersion = chunkDoc.getVersion();
-        ASSERT_EQ(ChunkVersion(15, 0, collEpoch, collTimestamp), chunkVersion);
+        ASSERT_EQ(ChunkVersion({collEpoch, collTimestamp}, {15, 0}), chunkVersion);
     };
 
     test(_nss2, Timestamp(42));
@@ -125,7 +123,7 @@ TEST_F(ClearJumboFlagTest, ClearJumboShouldNotBumpVersionIfChunkNotJumbo) {
         auto chunkDoc = uassertStatusOK(getChunkDoc(
             operationContext(), collUuid, nonJumboChunk().getMin(), collEpoch, collTimestamp));
         ASSERT_FALSE(chunkDoc.getJumbo());
-        ASSERT_EQ(ChunkVersion(14, 7, collEpoch, collTimestamp), chunkDoc.getVersion());
+        ASSERT_EQ(ChunkVersion({collEpoch, collTimestamp}, {14, 7}), chunkDoc.getVersion());
     };
 
     test(_nss2, Timestamp(42));
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp
index 235954c5d5d..fc8a55a9635 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/client/read_preference.h"
@@ -49,7 +47,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -104,7 +101,7 @@ TEST_F(CommitChunkMigrate, ChunksUpdatedCorrectly) {
 
     ChunkType migratedChunk, controlChunk;
     {
-        ChunkVersion origVersion(12, 7, collEpoch, collTimestamp);
+        ChunkVersion origVersion({collEpoch, collTimestamp}, {12, 7});
 
         migratedChunk.setName(OID::gen());
         migratedChunk.setCollectionUUID(collUUID);
@@ -140,15 +137,14 @@ TEST_F(CommitChunkMigrate, ChunksUpdatedCorrectly) {
                                                             validAfter));
 
     // Verify the versions returned match expected values.
-    auto mver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
-    ASSERT_EQ(ChunkVersion(migratedChunk.getVersion().majorVersion() + 1,
-                           1,
-                           migratedChunk.getVersion().epoch(),
-                           migratedChunk.getVersion().getTimestamp()),
+    auto mver = ChunkVersion::parse(versions["shardVersion"]);
+    ASSERT_EQ(ChunkVersion(
+                  {migratedChunk.getVersion().epoch(), migratedChunk.getVersion().getTimestamp()},
+                  {migratedChunk.getVersion().majorVersion() + 1, 1}),
               mver);
 
     // Verify that a collection version is returned
-    auto cver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["collectionVersion"]);
+    auto cver = ChunkVersion::parse(versions["collectionVersion"]);
     ASSERT_TRUE(mver.isOlderOrEqualThan(cver));
 
     // Verify the chunks ended up in the right shards.
@@ -188,8 +184,8 @@ TEST_F(CommitChunkMigrate, ChunksUpdatedCorrectlyWithoutControlChunk) {
 
     setupShards({shard0, shard1});
 
-    int origMajorVersion = 15;
-    auto const origVersion = ChunkVersion(origMajorVersion, 4, collEpoch, collTimestamp);
+    uint32_t origMajorVersion = 15;
+    auto const origVersion = ChunkVersion({collEpoch, collTimestamp}, {origMajorVersion, 4});
 
     ChunkType chunk0;
     chunk0.setName(OID::gen());
@@ -222,12 +218,12 @@ TEST_F(CommitChunkMigrate, ChunksUpdatedCorrectlyWithoutControlChunk) {
 
     // Verify the version returned matches expected value.
     BSONObj versions = resultBSON.getValue();
-    auto mver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
-    ASSERT_EQ(ChunkVersion(0, 0, origVersion.epoch(), origVersion.getTimestamp()), mver);
+    auto mver = ChunkVersion::parse(versions["shardVersion"]);
+    ASSERT_EQ(ChunkVersion({origVersion.epoch(), origVersion.getTimestamp()}, {0, 0}), mver);
 
     // Verify that a collection version is returned
-    auto cver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["collectionVersion"]);
-    ASSERT_EQ(ChunkVersion(origMajorVersion + 1, 0, collEpoch, collTimestamp), cver);
+    auto cver = ChunkVersion::parse(versions["collectionVersion"]);
+    ASSERT_EQ(ChunkVersion({collEpoch, collTimestamp}, {origMajorVersion + 1, 0}), cver);
 
     // Verify the chunk ended up in the right shard.
     auto chunkDoc0 =
@@ -253,8 +249,8 @@ TEST_F(CommitChunkMigrate, CheckCorrectOpsCommandNoCtlTrimHistory) {
 
     setupShards({shard0, shard1});
 
-    int origMajorVersion = 15;
-    auto const origVersion = ChunkVersion(origMajorVersion, 4, collEpoch, collTimestamp);
+    uint32_t origMajorVersion = 15;
+    auto const origVersion = ChunkVersion({collEpoch, collTimestamp}, {origMajorVersion, 4});
 
     ChunkType chunk0;
     chunk0.setName(OID::gen());
@@ -288,8 +284,8 @@ TEST_F(CommitChunkMigrate, CheckCorrectOpsCommandNoCtlTrimHistory) {
 
     // Verify the version returned matches expected value.
     BSONObj versions = resultBSON.getValue();
-    auto mver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
-    ASSERT_EQ(ChunkVersion(0, 0, origVersion.epoch(), origVersion.getTimestamp()), mver);
+    auto mver = ChunkVersion::parse(versions["shardVersion"]);
+    ASSERT_EQ(ChunkVersion({origVersion.epoch(), origVersion.getTimestamp()}, {0, 0}), mver);
 
     // Verify the chunk ended up in the right shard.
     auto chunkDoc0 =
@@ -314,9 +310,8 @@ TEST_F(CommitChunkMigrate, RejectOutOfOrderHistory) {
 
     setupShards({shard0, shard1});
 
-    int origMajorVersion = 15;
-    auto const origVersion =
-        ChunkVersion(origMajorVersion, 4, OID::gen(), Timestamp(42) /* timestamp */);
+    uint32_t origMajorVersion = 15;
+    auto const origVersion = ChunkVersion({OID::gen(), Timestamp(42)}, {origMajorVersion, 4});
 
     ChunkType chunk0;
     chunk0.setName(OID::gen());
@@ -362,9 +357,8 @@ TEST_F(CommitChunkMigrate, RejectWrongCollectionEpoch0) {
 
     setupShards({shard0, shard1});
 
-    int origMajorVersion = 12;
-    auto const origVersion =
-        ChunkVersion(origMajorVersion, 7, OID::gen(), Timestamp(42) /* timestamp */);
+    uint32_t origMajorVersion = 12;
+    auto const origVersion = ChunkVersion({OID::gen(), Timestamp(42)}, {origMajorVersion, 7});
 
     ChunkType chunk0;
     chunk0.setName(OID::gen());
@@ -418,11 +412,9 @@ TEST_F(CommitChunkMigrate, RejectWrongCollectionEpoch1) {
 
     setupShards({shard0, shard1});
 
-    int origMajorVersion = 12;
-    auto const origVersion =
-        ChunkVersion(origMajorVersion, 7, OID::gen(), Timestamp(42) /* timestamp */);
-    auto const otherVersion =
-        ChunkVersion(origMajorVersion, 7, OID::gen(), Timestamp(42) /* timestamp */);
+    uint32_t origMajorVersion = 12;
+    auto const origVersion = ChunkVersion({OID::gen(), Timestamp(42)}, {origMajorVersion, 7});
+    auto const otherVersion = ChunkVersion({OID::gen(), Timestamp(42)}, {origMajorVersion, 7});
 
     ChunkType chunk0;
     chunk0.setName(OID::gen());
@@ -479,8 +471,8 @@ TEST_F(CommitChunkMigrate, CommitWithLastChunkOnShardShouldNotAffectOtherChunks)
 
     setupShards({shard0, shard1});
 
-    int origMajorVersion = 12;
-    auto const origVersion = ChunkVersion(origMajorVersion, 7, collEpoch, collTimestamp);
+    uint32_t origMajorVersion = 12;
+    auto const origVersion = ChunkVersion({collEpoch, collTimestamp}, {origMajorVersion, 7});
 
     ChunkType chunk0;
     chunk0.setName(OID::gen());
@@ -525,8 +517,8 @@ TEST_F(CommitChunkMigrate, CommitWithLastChunkOnShardShouldNotAffectOtherChunks)
 
     // Verify the versions returned match expected values.
     BSONObj versions = resultBSON.getValue();
-    auto mver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
-    ASSERT_EQ(ChunkVersion(0, 0, origVersion.epoch(), origVersion.getTimestamp()), mver);
+    auto mver = ChunkVersion::parse(versions["shardVersion"]);
+    ASSERT_EQ(ChunkVersion({origVersion.epoch(), origVersion.getTimestamp()}, {0, 0}), mver);
 
     // Verify the chunks ended up in the right shards.
     auto chunkDoc0 =
@@ -560,7 +552,7 @@ TEST_F(CommitChunkMigrate, RejectMissingChunkVersion) {
 
     setupShards({shard0, shard1});
 
-    ChunkVersion origVersion(12, 7, OID::gen(), Timestamp(42) /* timestamp */);
+    ChunkVersion origVersion({OID::gen(), Timestamp(42)}, {12, 7});
 
     // Create migrate chunk with no chunk version set.
     ChunkType migratedChunk;
@@ -610,7 +602,7 @@ TEST_F(CommitChunkMigrate, RejectOlderChunkVersion) {
     setupShards({shard0, shard1});
 
     auto epoch = OID::gen();
-    ChunkVersion origVersion(12, 7, epoch, Timestamp(42) /* timestamp */);
+    ChunkVersion origVersion({epoch, Timestamp(42)}, {12, 7});
 
     ChunkType migratedChunk;
     migratedChunk.setName(OID::gen());
@@ -621,7 +613,7 @@ TEST_F(CommitChunkMigrate, RejectOlderChunkVersion) {
     migratedChunk.setMin(BSON("a" << 1));
     migratedChunk.setMax(BSON("a" << 10));
 
-    ChunkVersion currentChunkVersion(14, 7, epoch, Timestamp(42) /* timestamp */);
+    ChunkVersion currentChunkVersion({epoch, Timestamp(42)}, {14, 7});
 
     ChunkType currentChunk;
     currentChunk.setName(OID::gen());
@@ -662,7 +654,7 @@ TEST_F(CommitChunkMigrate, RejectMismatchedEpoch) {
 
     setupShards({shard0, shard1});
 
-    ChunkVersion origVersion(12, 7, OID::gen(), Timestamp(42) /* timestamp */);
+    ChunkVersion origVersion({OID::gen(), Timestamp(42)}, {12, 7});
 
     ChunkType migratedChunk;
     migratedChunk.setName(OID::gen());
@@ -673,7 +665,7 @@ TEST_F(CommitChunkMigrate, RejectMismatchedEpoch) {
     migratedChunk.setMin(BSON("a" << 1));
     migratedChunk.setMax(BSON("a" << 10));
 
-    ChunkVersion currentChunkVersion(12, 7, OID::gen(), Timestamp(42) /* timestamp */);
+    ChunkVersion currentChunkVersion({OID::gen(), Timestamp(42)}, {12, 7});
 
     ChunkType currentChunk;
     currentChunk.setName(OID::gen());
@@ -730,7 +722,7 @@ public:
     void setupCollectionWithNChunks(int numberOfChunks) {
         invariant(numberOfChunks > 0);
 
-        int currentMajorVersion = 1;
+        uint32_t currentMajorVersion = 1;
         int historyTimestampSecond = 100;
 
         std::vector<ChunkHistory> history;
@@ -745,7 +737,7 @@ public:
             const auto max = chunksMin.at(i + 1);      // Max key of the chunk being created
             const auto shardId = _shardIds.at(i % 2);  // Shard owning the chunk
             ChunkVersion version =
-                ChunkVersion(currentMajorVersion++, 0, _collEpoch, _collTimestamp);
+                ChunkVersion({_collEpoch, _collTimestamp}, {currentMajorVersion++, 0});
             history.insert(history.begin(),
                            {ChunkHistory(Timestamp(historyTimestampSecond++, 0), shardId)});
             ChunkType chunk = createChunk(_collUUID, min, max, version, shardId, history);
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp
index 20e8b2ecc6a..8921d0c2e8b 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/s/config/config_server_test_fixture.h"
 #include "mongo/db/s/config/sharding_catalog_manager.h"
 
@@ -95,7 +93,7 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest, IfNoCollectionFoundReturnsSuccess) {
     const auto requestedChunkType =
         generateChunkType(_nss,
                           _collUuid,
-                          ChunkVersion(10, 2, OID::gen(), Timestamp(1, 1)),
+                          ChunkVersion({OID::gen(), Timestamp(1, 1)}, {10, 2}),
                           ShardId(_shardName),
                           BSON("a" << 1),
                           BSON("a" << 10));
@@ -112,12 +110,13 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest, IfNoChunkWithMatchingMinKeyFoundRetu
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(42);
 
-    const auto requestedChunkType = generateChunkType(_nss,
-                                                      _collUuid,
-                                                      ChunkVersion(10, 2, collEpoch, collTimestamp),
-                                                      ShardId(_shardName),
-                                                      BSON("a" << 1),
-                                                      BSON("a" << 10));
+    const auto requestedChunkType =
+        generateChunkType(_nss,
+                          _collUuid,
+                          ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+                          ShardId(_shardName),
+                          BSON("a" << 1),
+                          BSON("a" << 10));
 
     ChunkType existingChunkType = requestedChunkType;
     // Min key is different.
@@ -140,12 +139,13 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest, IfNoChunkWithMatchingMaxKeyFoundRetu
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(42);
 
-    const auto requestedChunkType = generateChunkType(_nss,
-                                                      _collUuid,
-                                                      ChunkVersion(10, 2, collEpoch, collTimestamp),
-                                                      ShardId(_shardName),
-                                                      BSON("a" << 1),
-                                                      BSON("a" << 10));
+    const auto requestedChunkType =
+        generateChunkType(_nss,
+                          _collUuid,
+                          ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+                          ShardId(_shardName),
+                          BSON("a" << 1),
+                          BSON("a" << 10));
 
     ChunkType existingChunkType = requestedChunkType;
     // Max key is different.
@@ -169,20 +169,22 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest,
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(42);
 
-    const auto requestedChunkType = generateChunkType(_nss,
-                                                      _collUuid,
-                                                      ChunkVersion(10, 2, collEpoch, collTimestamp),
-                                                      ShardId(_shardName),
-                                                      BSON("a" << 1),
-                                                      BSON("a" << 10));
+    const auto requestedChunkType =
+        generateChunkType(_nss,
+                          _collUuid,
+                          ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+                          ShardId(_shardName),
+                          BSON("a" << 1),
+                          BSON("a" << 10));
 
     const auto existingChunkType = requestedChunkType;
-    const auto highestChunkType = generateChunkType(_nss,
-                                                    _collUuid,
-                                                    ChunkVersion(20, 3, collEpoch, collTimestamp),
-                                                    ShardId("shard0001"),
-                                                    BSON("a" << 11),
-                                                    BSON("a" << 20));
+    const auto highestChunkType =
+        generateChunkType(_nss,
+                          _collUuid,
+                          ChunkVersion({collEpoch, collTimestamp}, {20, 3}),
+                          ShardId("shard0001"),
+                          BSON("a" << 11),
+                          BSON("a" << 20));
     setupCollection(_nss, _keyPattern, {existingChunkType, highestChunkType});
 
     ShardingCatalogManager::get(operationContext())
@@ -195,8 +197,8 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest,
     assertChunkVersionWasBumpedTo(
         existingChunkType,
         getChunkDoc(operationContext(), existingChunkType.getMin(), collEpoch, collTimestamp),
-        ChunkVersion(
-            highestChunkType.getVersion().majorVersion() + 1, 0, collEpoch, collTimestamp));
+        ChunkVersion({collEpoch, collTimestamp},
+                     {highestChunkType.getVersion().majorVersion() + 1, 0}));
 }
 
 TEST_F(EnsureChunkVersionIsGreaterThanTest,
@@ -204,20 +206,22 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest,
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(42);
 
-    const auto requestedChunkType = generateChunkType(_nss,
-                                                      _collUuid,
-                                                      ChunkVersion(10, 2, collEpoch, collTimestamp),
-                                                      ShardId(_shardName),
-                                                      BSON("a" << 1),
-                                                      BSON("a" << 10));
+    const auto requestedChunkType =
+        generateChunkType(_nss,
+                          _collUuid,
+                          ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+                          ShardId(_shardName),
+                          BSON("a" << 1),
+                          BSON("a" << 10));
 
     const auto existingChunkType = requestedChunkType;
-    const auto highestChunkType = generateChunkType(_nss,
-                                                    _collUuid,
-                                                    ChunkVersion(20, 3, collEpoch, collTimestamp),
-                                                    ShardId("shard0001"),
-                                                    BSON("a" << 11),
-                                                    BSON("a" << 20));
+    const auto highestChunkType =
+        generateChunkType(_nss,
+                          _collUuid,
+                          ChunkVersion({collEpoch, collTimestamp}, {20, 3}),
+                          ShardId("shard0001"),
+                          BSON("a" << 11),
+                          BSON("a" << 20));
     setupCollection(_nss, _keyPattern, {existingChunkType, highestChunkType});
 
     ShardingCatalogManager::get(operationContext())
@@ -230,8 +234,8 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest,
     assertChunkVersionWasBumpedTo(
         existingChunkType,
         getChunkDoc(operationContext(), existingChunkType.getMin(), collEpoch, collTimestamp),
-        ChunkVersion(
-            highestChunkType.getVersion().majorVersion() + 1, 0, collEpoch, collTimestamp));
+        ChunkVersion({collEpoch, collTimestamp},
+                     {highestChunkType.getVersion().majorVersion() + 1, 0}));
 }
 
 TEST_F(
@@ -240,15 +244,16 @@ TEST_F(
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(42);
 
-    const auto requestedChunkType = generateChunkType(_nss,
-                                                      _collUuid,
-                                                      ChunkVersion(10, 2, collEpoch, collTimestamp),
-                                                      ShardId(_shardName),
-                                                      BSON("a" << 1),
-                                                      BSON("a" << 10));
+    const auto requestedChunkType =
+        generateChunkType(_nss,
+                          _collUuid,
+                          ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+                          ShardId(_shardName),
+                          BSON("a" << 1),
+                          BSON("a" << 10));
 
     ChunkType existingChunkType = requestedChunkType;
-    existingChunkType.setVersion(ChunkVersion(11, 1, collEpoch, collTimestamp));
+    existingChunkType.setVersion(ChunkVersion({collEpoch, collTimestamp}, {11, 1}));
     setupCollection(_nss, _keyPattern, {existingChunkType});
 
     ShardingCatalogManager::get(operationContext())
@@ -269,15 +274,16 @@ TEST_F(
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(42);
 
-    const auto requestedChunkType = generateChunkType(_nss,
-                                                      _collUuid,
-                                                      ChunkVersion(10, 2, collEpoch, collTimestamp),
-                                                      ShardId(_shardName),
-                                                      BSON("a" << 1),
-                                                      BSON("a" << 10));
+    const auto requestedChunkType =
+        generateChunkType(_nss,
+                          _collUuid,
+                          ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+                          ShardId(_shardName),
+                          BSON("a" << 1),
+                          BSON("a" << 10));
 
     ChunkType existingChunkType = requestedChunkType;
-    existingChunkType.setVersion(ChunkVersion(11, 1, collEpoch, collTimestamp));
+    existingChunkType.setVersion(ChunkVersion({collEpoch, collTimestamp}, {11, 1}));
     setupCollection(_nss, _keyPattern, {existingChunkType});
 
     ShardingCatalogManager::get(operationContext())
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp
index 3b5951cd82e..9d7e68c9a93 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/client/read_preference.h"
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/logical_session_cache_noop.h"
@@ -85,7 +83,7 @@ TEST_F(MergeChunkTest, MergeExistingChunksCorrectlyShouldSucceed) {
     chunk.setName(OID::gen());
     chunk.setCollectionUUID(collUuid);
 
-    auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+    auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
     chunk.setVersion(origVersion);
     chunk.setShard(_shardId);
 
@@ -119,17 +117,16 @@ TEST_F(MergeChunkTest, MergeExistingChunksCorrectlyShouldSucceed) {
                                                       _shardId,
                                                       validAfter));
 
-    auto collVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["collectionVersion"]);
-    auto shardVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
+    auto collVersion = ChunkVersion::parse(versions["collectionVersion"]);
+    auto shardVersion = ChunkVersion::parse(versions["shardVersion"]);
 
     ASSERT_TRUE(origVersion.isOlderThan(shardVersion));
     ASSERT_EQ(collVersion, shardVersion);
 
     // Check for increment on mergedChunk's minor version
-    auto expectedShardVersion = ChunkVersion(origVersion.majorVersion(),
-                                             origVersion.minorVersion() + 1,
-                                             origVersion.epoch(),
-                                             origVersion.getTimestamp());
+    auto expectedShardVersion =
+        ChunkVersion({origVersion.epoch(), origVersion.getTimestamp()},
+                     {origVersion.majorVersion(), origVersion.minorVersion() + 1});
     ASSERT_EQ(expectedShardVersion, shardVersion);
 
 
@@ -170,7 +167,7 @@ TEST_F(MergeChunkTest, MergeSeveralChunksCorrectlyShouldSucceed) {
     chunk.setName(OID::gen());
     chunk.setCollectionUUID(collUuid);
 
-    auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+    auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
     chunk.setVersion(origVersion);
     chunk.setShard(_shardId);
 
@@ -251,7 +248,7 @@ TEST_F(MergeChunkTest, NewMergeShouldClaimHighestVersion) {
     otherChunk.setName(OID::gen());
     otherChunk.setCollectionUUID(collUuid);
 
-    auto origVersion = ChunkVersion(1, 2, collEpoch, collTimestamp);
+    auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 2});
     chunk.setVersion(origVersion);
     chunk.setShard(_shardId);
 
@@ -273,7 +270,7 @@ TEST_F(MergeChunkTest, NewMergeShouldClaimHighestVersion) {
     ChunkRange rangeToBeMerged(chunk.getMin(), chunk2.getMax());
 
     // Set up other chunk with competing version
-    auto competingVersion = ChunkVersion(2, 1, collEpoch, collTimestamp);
+    auto competingVersion = ChunkVersion({collEpoch, collTimestamp}, {2, 1});
     otherChunk.setVersion(competingVersion);
     otherChunk.setShard(_shardId);
     otherChunk.setMin(BSON("a" << 10));
@@ -334,7 +331,7 @@ TEST_F(MergeChunkTest, MergeLeavesOtherChunksAlone) {
     chunk.setName(OID::gen());
     chunk.setCollectionUUID(collUuid);
 
-    auto origVersion = ChunkVersion(1, 2, collEpoch, collTimestamp);
+    auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 2});
     chunk.setVersion(origVersion);
     chunk.setShard(shardId);
 
@@ -415,7 +412,7 @@ TEST_F(MergeChunkTest, NonExistingNamespace) {
     ChunkType chunk;
     chunk.setCollectionUUID(UUID::gen());
 
-    auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+    auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
     chunk.setVersion(origVersion);
 
     // Construct chunk to be merged
@@ -457,7 +454,7 @@ TEST_F(MergeChunkTest, NonMatchingUUIDsOfChunkAndRequestErrors) {
     ChunkType chunk;
     chunk.setCollectionUUID(collUuid);
 
-    auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+    auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
     chunk.setVersion(origVersion);
     chunk.setShard(_shardId);
 
@@ -503,7 +500,7 @@ TEST_F(MergeChunkTest, MergeAlreadyHappenedSucceeds) {
     ChunkRange rangeToBeMerged(chunkMin, chunkMax);
 
     // Store a chunk that matches the range that will be requested
-    auto mergedVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+    auto mergedVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
     mergedVersion.incMinor();
     ChunkType mergedChunk;
     mergedChunk.setVersion(mergedVersion);
@@ -559,7 +556,7 @@ TEST_F(MergeChunkTest, MergingChunksWithDollarPrefixShouldSucceed) {
     chunk1.setCollectionUUID(collUuid);
 
 
-    auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+    auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
     chunk1.setVersion(origVersion);
     chunk1.setShard(_shardId);
 
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp
index 32544cacc7b..b54338947b1 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include <string>
 #include <vector>
 
@@ -58,7 +55,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 namespace {
 
@@ -227,15 +223,15 @@ TEST_F(RemoveShardTest, RemoveShardStillDrainingChunksRemaining) {
     const auto timestamp = Timestamp(1);
     ChunkType chunk1(uuid,
                      ChunkRange(BSON("_id" << 0), BSON("_id" << 20)),
-                     ChunkVersion(1, 1, epoch, timestamp),
+                     ChunkVersion({epoch, timestamp}, {1, 1}),
                      shard1.getName());
     ChunkType chunk2(uuid,
                      ChunkRange(BSON("_id" << 21), BSON("_id" << 50)),
-                     ChunkVersion(1, 2, epoch, timestamp),
+                     ChunkVersion({epoch, timestamp}, {1, 2}),
                      shard1.getName());
     ChunkType chunk3(uuid,
                      ChunkRange(BSON("_id" << 51), BSON("_id" << 1000)),
-                     ChunkVersion(1, 3, epoch, timestamp),
+                     ChunkVersion({epoch, timestamp}, {1, 3}),
                      shard1.getName());
 
     chunk3.setJumbo(true);
@@ -314,15 +310,15 @@ TEST_F(RemoveShardTest, RemoveShardCompletion) {
     Timestamp timestamp = Timestamp(1);
     ChunkType chunk1(uuid,
                      ChunkRange(BSON("_id" << 0), BSON("_id" << 20)),
-                     ChunkVersion(1, 1, epoch, timestamp),
+                     ChunkVersion({epoch, timestamp}, {1, 1}),
                      shard1.getName());
     ChunkType chunk2(uuid,
                      ChunkRange(BSON("_id" << 21), BSON("_id" << 50)),
-                     ChunkVersion(1, 2, epoch, timestamp),
+                     ChunkVersion({epoch, timestamp}, {1, 2}),
                      shard1.getName());
     ChunkType chunk3(uuid,
                      ChunkRange(BSON("_id" << 51), BSON("_id" << 1000)),
-                     ChunkVersion(1, 3, epoch, timestamp),
+                     ChunkVersion({epoch, timestamp}, {1, 3}),
                      shard1.getName());
 
     std::vector<ChunkType> chunks{chunk1, chunk2, chunk3};
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
index 7de1d4c3efe..947ec9fb3c2 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
@@ -1201,9 +1201,6 @@ void ShardingCatalogManager::_pushClusterParametersToNewShard(
 
 void ShardingCatalogManager::_standardizeClusterParameters(OperationContext* opCtx,
                                                            RemoteCommandTargeter* targeter) {
-    if (!gFeatureFlagClusterWideConfig.isEnabled(serverGlobalParams.featureCompatibility))
-        return;
-
     auto clusterParameterDocs =
         uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
             opCtx,
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp
index 1cc5f1c677d..9b9e48cfe0b 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp
@@ -80,7 +80,7 @@ TEST_F(SplitChunkTest, SplitExistingChunkCorrectlyShouldSucceed) {
         chunk.setName(OID::gen());
         chunk.setCollectionUUID(collUuid);
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -105,16 +105,16 @@ TEST_F(SplitChunkTest, SplitExistingChunkCorrectlyShouldSucceed) {
                                                          splitPoints,
                                                          "shard0000",
                                                          false /* fromChunkSplitter*/));
-        auto collVersion =
-            ChunkVersion::fromBSONPositionalOrNewerFormat(versions["collectionVersion"]);
-        auto shardVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
+        auto collVersion = ChunkVersion::parse(versions["collectionVersion"]);
+        auto shardVersion = ChunkVersion::parse(versions["shardVersion"]);
 
         ASSERT_TRUE(origVersion.isOlderThan(shardVersion));
         ASSERT_EQ(collVersion, shardVersion);
 
         // Check for increment on mergedChunk's minor version
-        auto expectedShardVersion = ChunkVersion(
-            origVersion.majorVersion(), origVersion.minorVersion() + 2, collEpoch, collTimestamp);
+        auto expectedShardVersion =
+            ChunkVersion({collEpoch, collTimestamp},
+                         {origVersion.majorVersion(), origVersion.minorVersion() + 2});
         ASSERT_EQ(expectedShardVersion, shardVersion);
         ASSERT_EQ(shardVersion, collVersion);
 
@@ -164,7 +164,7 @@ TEST_F(SplitChunkTest, MultipleSplitsOnExistingChunkShouldSucceed) {
         chunk.setName(OID::gen());
         chunk.setCollectionUUID(collUuid);
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -256,7 +256,7 @@ TEST_F(SplitChunkTest, NewSplitShouldClaimHighestVersion) {
         chunk2.setCollectionUUID(collUuid);
 
         // set up first chunk
-        auto origVersion = ChunkVersion(1, 2, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 2});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -270,7 +270,7 @@ TEST_F(SplitChunkTest, NewSplitShouldClaimHighestVersion) {
         splitPoints.push_back(chunkSplitPoint);
 
         // set up second chunk (chunk2)
-        auto competingVersion = ChunkVersion(2, 1, collEpoch, collTimestamp);
+        auto competingVersion = ChunkVersion({collEpoch, collTimestamp}, {2, 1});
         chunk2.setVersion(competingVersion);
         chunk2.setShard(ShardId(_shardName));
         chunk2.setMin(BSON("a" << 10));
@@ -324,7 +324,7 @@ TEST_F(SplitChunkTest, PreConditionFailErrors) {
         chunk.setName(OID::gen());
         chunk.setCollectionUUID(UUID::gen());
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -362,7 +362,7 @@ TEST_F(SplitChunkTest, NonExisingNamespaceErrors) {
         ChunkType chunk;
         chunk.setCollectionUUID(UUID::gen());
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -398,7 +398,7 @@ TEST_F(SplitChunkTest, NonMatchingEpochsOfChunkAndRequestErrors) {
         ChunkType chunk;
         chunk.setCollectionUUID(UUID::gen());
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -434,7 +434,7 @@ TEST_F(SplitChunkTest, SplitPointsOutOfOrderShouldFail) {
         chunk.setName(OID::gen());
         chunk.setCollectionUUID(UUID::gen());
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -470,7 +470,7 @@ TEST_F(SplitChunkTest, SplitPointsOutOfRangeAtMinShouldFail) {
         ChunkType chunk;
         chunk.setCollectionUUID(UUID::gen());
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -507,7 +507,7 @@ TEST_F(SplitChunkTest, SplitPointsOutOfRangeAtMaxShouldFail) {
         chunk.setName(OID::gen());
         chunk.setCollectionUUID(UUID::gen());
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -543,7 +543,7 @@ TEST_F(SplitChunkTest, SplitPointsWithDollarPrefixShouldFail) {
         ChunkType chunk;
         chunk.setCollectionUUID(UUID::gen());
 
-        auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+        auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
         chunk.setVersion(origVersion);
         chunk.setShard(ShardId(_shardName));
 
@@ -588,7 +588,7 @@ TEST_F(SplitChunkTest, CantCommitSplitFromChunkSplitterDuringDefragmentation) {
     chunk.setName(OID::gen());
     chunk.setCollectionUUID(collUuid);
 
-    auto version = ChunkVersion(1, 0, collEpoch, collTimestamp);
+    auto version = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
     chunk.setVersion(version);
     chunk.setShard(ShardId(_shardName));
 
diff --git a/src/mongo/db/s/create_collection_coordinator.cpp b/src/mongo/db/s/create_collection_coordinator.cpp
index a5b499cfe81..ccbad667d35 100644
--- a/src/mongo/db/s/create_collection_coordinator.cpp
+++ b/src/mongo/db/s/create_collection_coordinator.cpp
@@ -359,39 +359,8 @@ void broadcastDropCollection(OperationContext* opCtx,
 
 }  // namespace
 
-CreateCollectionCoordinator::CreateCollectionCoordinator(ShardingDDLCoordinatorService* service,
-                                                         const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState),
-      _doc(CreateCollectionCoordinatorDocument::parse(
-          IDLParserErrorContext("CreateCollectionCoordinatorDocument"), initialState)),
-      _request(_doc.getCreateCollectionRequest()),
-      _critSecReason(BSON("command"
-                          << "createCollection"
-                          << "ns" << nss().toString())) {}
-
-boost::optional<BSONObj> CreateCollectionCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-    cmdBob.appendElements(_request.toBSON());
-
-    const auto currPhase = [&]() {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getPhase();
-    }();
-
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "CreateCollectionCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("currentPhase", currPhase);
-    bob.append("active", true);
-    return bob.obj();
+void CreateCollectionCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+    cmdInfoBuilder->appendElements(_request.toBSON());
 }
 
 void CreateCollectionCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
@@ -435,9 +404,9 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
                     // Additionally we want to perform a majority write on the CSRS to ensure that
                     // all the subsequent reads will see all the writes performed from a previous
                     // execution of this coordinator.
-                    _doc = _updateSession(opCtx, _doc);
+                    _updateSession(opCtx);
                     _performNoopRetryableWriteOnAllShardsAndConfigsvr(
-                        opCtx, getCurrentSession(_doc), **executor);
+                        opCtx, getCurrentSession(), **executor);
                 }
 
                 // Log the start of the event only if we're not recovering.
@@ -461,7 +430,7 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
                         ->releaseRecoverableCriticalSection(
                             opCtx,
                             nss(),
-                            _getCriticalSectionReason(),
+                            _critSecReason,
                             ShardingCatalogClient::kMajorityWriteConcern);
 
                     _result = createCollectionResponseOpt;
@@ -474,10 +443,7 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
                 // presence of a stepdown.
                 RecoverableCriticalSectionService::get(opCtx)
                     ->acquireRecoverableCriticalSectionBlockWrites(
-                        opCtx,
-                        nss(),
-                        _getCriticalSectionReason(),
-                        ShardingCatalogClient::kMajorityWriteConcern);
+                        opCtx, nss(), _critSecReason, ShardingCatalogClient::kMajorityWriteConcern);
 
                 if (!_firstExecution) {
                     auto uuid = sharding_ddl_util::getCollectionUUID(opCtx, nss());
@@ -489,12 +455,11 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
                                     "Removing partial changes from previous run",
                                     "namespace"_attr = nss());
 
-                        _doc = _updateSession(opCtx, _doc);
-                        cleanupPartialChunksFromPreviousAttempt(
-                            opCtx, *uuid, getCurrentSession(_doc));
+                        _updateSession(opCtx);
+                        cleanupPartialChunksFromPreviousAttempt(opCtx, *uuid, getCurrentSession());
 
-                        _doc = _updateSession(opCtx, _doc);
-                        broadcastDropCollection(opCtx, nss(), **executor, getCurrentSession(_doc));
+                        _updateSession(opCtx);
+                        broadcastDropCollection(opCtx, nss(), **executor, getCurrentSession());
                     }
                 }
 
@@ -517,28 +482,18 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
                         ->promoteRecoverableCriticalSectionToBlockAlsoReads(
                             opCtx,
                             nss(),
-                            _getCriticalSectionReason(),
+                            _critSecReason,
                             ShardingCatalogClient::kMajorityWriteConcern);
 
-                    _doc = _updateSession(opCtx, _doc);
-                    try {
-                        _createCollectionOnNonPrimaryShards(opCtx, getCurrentSession(_doc));
-                    } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
-                        // Older 5.0 binaries don't support running the
-                        // _shardsvrCreateCollectionParticipant command as a retryable write yet. In
-                        // that case, retry without attaching session info.
-                        _createCollectionOnNonPrimaryShards(opCtx, boost::none);
-                    }
+                    _updateSession(opCtx);
+                    _createCollectionOnNonPrimaryShards(opCtx, getCurrentSession());
 
                     _commit(opCtx);
                 }
 
                 // End of the critical section, from now on, read and writes are permitted.
                 RecoverableCriticalSectionService::get(opCtx)->releaseRecoverableCriticalSection(
-                    opCtx,
-                    nss(),
-                    _getCriticalSectionReason(),
-                    ShardingCatalogClient::kMajorityWriteConcern);
+                    opCtx, nss(), _critSecReason, ShardingCatalogClient::kMajorityWriteConcern);
 
                 // Slow path. Create chunks (which might incur in an index scan) and commit must be
                 // done outside of the critical section to prevent writes from stalling in unsharded
@@ -566,10 +521,7 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
                 auto* opCtx = opCtxHolder.get();
 
                 RecoverableCriticalSectionService::get(opCtx)->releaseRecoverableCriticalSection(
-                    opCtx,
-                    nss(),
-                    _getCriticalSectionReason(),
-                    ShardingCatalogClient::kMajorityWriteConcern);
+                    opCtx, nss(), _critSecReason, ShardingCatalogClient::kMajorityWriteConcern);
             }
             return status;
         });
@@ -751,7 +703,7 @@ void CreateCollectionCoordinator::_createChunks(OperationContext* opCtx) {
 }
 
 void CreateCollectionCoordinator::_createCollectionOnNonPrimaryShards(
-    OperationContext* opCtx, const boost::optional<OperationSessionInfo>& osi) {
+    OperationContext* opCtx, const OperationSessionInfo& osi) {
     LOGV2_DEBUG(5277905,
                 2,
                 "Create collection _createCollectionOnNonPrimaryShards",
@@ -778,10 +730,9 @@ void CreateCollectionCoordinator::_createCollectionOnNonPrimaryShards(
         createCollectionParticipantRequest.setIdIndex(idIndex);
         createCollectionParticipantRequest.setIndexes(indexes);
 
-        requests.emplace_back(
-            chunkShardId,
-            CommandHelpers::appendMajorityWriteConcern(
-                createCollectionParticipantRequest.toBSON(osi ? osi->toBSON() : BSONObj())));
+        requests.emplace_back(chunkShardId,
+                              CommandHelpers::appendMajorityWriteConcern(
+                                  createCollectionParticipantRequest.toBSON(osi.toBSON())));
 
         initializedShards.emplace(chunkShardId);
     }
@@ -817,8 +768,8 @@ void CreateCollectionCoordinator::_commit(OperationContext* opCtx) {
     LOGV2_DEBUG(5277906, 2, "Create collection _commit", "namespace"_attr = nss());
 
     // Upsert Chunks.
-    _doc = _updateSession(opCtx, _doc);
-    insertChunks(opCtx, _initialChunks->chunks, getCurrentSession(_doc));
+    _updateSession(opCtx);
+    insertChunks(opCtx, _initialChunks->chunks, getCurrentSession());
 
     CollectionType coll(nss(),
                         _initialChunks->collVersion().epoch(),
@@ -841,9 +792,9 @@ void CreateCollectionCoordinator::_commit(OperationContext* opCtx) {
         coll.setUnique(*_request.getUnique());
     }
 
-    _doc = _updateSession(opCtx, _doc);
+    _updateSession(opCtx);
     try {
-        insertCollectionEntry(opCtx, nss(), coll, getCurrentSession(_doc));
+        insertCollectionEntry(opCtx, nss(), coll, getCurrentSession());
 
         notifyChangeStreamsOnShardCollection(opCtx, nss(), *_collectionUUID, _request.toBSON());
 
@@ -927,57 +878,4 @@ void CreateCollectionCoordinator::_logEndCreateCollection(OperationContext* opCt
         opCtx, "shardCollection.end", nss().ns(), collectionDetail.obj());
 }
 
-// Phase change API.
-
-void CreateCollectionCoordinator::_enterPhase(Phase newPhase) {
-    CoordDoc newDoc(_doc);
-    newDoc.setPhase(newPhase);
-
-    LOGV2_DEBUG(5565600,
-                2,
-                "Create collection coordinator phase transition",
-                "namespace"_attr = nss(),
-                "newPhase"_attr = CreateCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
-                "oldPhase"_attr = CreateCollectionCoordinatorPhase_serializer(_doc.getPhase()));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        newDoc = _insertStateDocument(std::move(newDoc));
-    } else {
-        newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(newDoc);
-    }
-}
-
-const BSONObj CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields =
-    BSON(CreateCollectionRequest::kCollectionUUIDFieldName
-         << 1 << CreateCollectionRequest::kImplicitlyCreateIndexFieldName << 1
-         << CreateCollectionRequest::kEnforceUniquenessCheckFieldName << 1);
-
-void CreateCollectionCoordinatorDocumentPre60Compatible::serialize(BSONObjBuilder* builder) const {
-    BSONObjBuilder internalBuilder;
-    CreateCollectionCoordinatorDocument::serialize(&internalBuilder);
-    internalBuilder.asTempObj().filterFieldsUndotted(builder, kPre60IncompatibleFields, false);
-}
-
-BSONObj CreateCollectionCoordinatorDocumentPre60Compatible::toBSON() const {
-    BSONObjBuilder builder;
-    serialize(&builder);
-    return builder.obj();
-}
-
-CreateCollectionCoordinatorPre60Compatible::CreateCollectionCoordinatorPre60Compatible(
-    ShardingDDLCoordinatorService* service, const BSONObj& initialState)
-    : CreateCollectionCoordinator(service, initialState),
-      _critSecReason(
-          BSON("command"
-               << "createCollection"
-               << "ns" << nss().toString() << "request"
-               << _request.toBSON().filterFieldsUndotted(
-                      CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields,
-                      false))) {}
-
 }  // namespace mongo
diff --git a/src/mongo/db/s/create_collection_coordinator.h b/src/mongo/db/s/create_collection_coordinator.h
index 565972afcb1..a1f8bbea4e8 100644
--- a/src/mongo/db/s/create_collection_coordinator.h
+++ b/src/mongo/db/s/create_collection_coordinator.h
@@ -39,21 +39,26 @@
 
 namespace mongo {
 
-class CreateCollectionCoordinator : public ShardingDDLCoordinator {
+class CreateCollectionCoordinator
+    : public RecoverableShardingDDLCoordinator<CreateCollectionCoordinatorDocument,
+                                               CreateCollectionCoordinatorPhaseEnum> {
 public:
     using CoordDoc = CreateCollectionCoordinatorDocument;
     using Phase = CreateCollectionCoordinatorPhaseEnum;
 
-    CreateCollectionCoordinator(ShardingDDLCoordinatorService* service,
-                                const BSONObj& initialState);
+    CreateCollectionCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+        : RecoverableShardingDDLCoordinator(service, "CreateCollectionCoordinator", initialState),
+          _request(_doc.getCreateCollectionRequest()),
+          _critSecReason(BSON("command"
+                              << "createCollection"
+                              << "ns" << nss().toString())) {}
+
     ~CreateCollectionCoordinator() = default;
 
 
     void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+    void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
 
     /**
      * Waits for the termination of the parent DDLCoordinator (so all the resources are liberated)
@@ -66,38 +71,16 @@ public:
     }
 
 protected:
-    mutable Mutex _docMutex = MONGO_MAKE_LATCH("CreateCollectionCoordinator::_docMutex");
-    CoordDoc _doc;
-
     const mongo::CreateCollectionRequest _request;
 
 private:
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        return _doc.getShardingDDLCoordinatorMetadata();
+    StringData serializePhase(const Phase& phase) const override {
+        return CreateCollectionCoordinatorPhase_serializer(phase);
     }
 
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
 
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-
-            if (currPhase > newPhase) {
-                // Do not execute this phase if we already reached a subsequent one.
-                return;
-            }
-            if (currPhase < newPhase) {
-                // Persist the new phase if this is the first time we are executing it.
-                _enterPhase(newPhase);
-            }
-            return func();
-        };
-    };
-
-    void _enterPhase(Phase newState);
-
     /**
      * Performs all required checks before holding the critical sections.
      */
@@ -128,7 +111,7 @@ private:
      * participant shards.
      */
     void _createCollectionOnNonPrimaryShards(OperationContext* opCtx,
-                                             const boost::optional<OperationSessionInfo>& osi);
+                                             const OperationSessionInfo& osi);
 
     /**
      * Does the following writes:
@@ -147,16 +130,6 @@ private:
      */
     void _logEndCreateCollection(OperationContext* opCtx);
 
-    /**
-     * Returns the BSONObj used as critical section reason
-     *
-     * TODO SERVER-64720 remove this function, directly access _critSecReason
-     *
-     */
-    virtual const BSONObj& _getCriticalSectionReason() const {
-        return _critSecReason;
-    };
-
     const BSONObj _critSecReason;
 
     // The shard key of the collection, static for the duration of the coordinator and reflects the
@@ -177,32 +150,4 @@ private:
     boost::optional<bool> _collectionEmpty;
 };
 
-class CreateCollectionCoordinatorDocumentPre60Compatible final
-    : public CreateCollectionCoordinatorDocument {
-    // TODO SERVER-64720 remove once 6.0 becomes last LTS
-public:
-    using CreateCollectionCoordinatorDocument::CreateCollectionCoordinatorDocument;
-
-    static const BSONObj kPre60IncompatibleFields;
-    void serialize(BSONObjBuilder* builder) const;
-    BSONObj toBSON() const;
-};
-
-class CreateCollectionCoordinatorPre60Compatible final : public CreateCollectionCoordinator {
-    // TODO SERVER-64720 remove once 6.0 becomes last LTS
-public:
-    using CreateCollectionCoordinator::CreateCollectionCoordinator;
-    using CoordDoc = CreateCollectionCoordinatorDocumentPre60Compatible;
-
-    CreateCollectionCoordinatorPre60Compatible(ShardingDDLCoordinatorService* service,
-                                               const BSONObj& initialState);
-
-    virtual const BSONObj& _getCriticalSectionReason() const override {
-        return _critSecReason;
-    };
-
-private:
-    const BSONObj _critSecReason;
-};
-
 }  // namespace mongo
diff --git a/src/mongo/db/s/create_collection_coordinator_test.cpp b/src/mongo/db/s/create_collection_coordinator_test.cpp
deleted file mode 100644
index 772ac8933a1..00000000000
--- a/src/mongo/db/s/create_collection_coordinator_test.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/unittest/unittest.h"
-
-#include "mongo/db/s/create_collection_coordinator.h"
-
-namespace mongo {
-namespace {
-
-static const auto kShardKey = BSON("x" << 1);
-static const NamespaceString kNs{"db.test"};
-
-TEST(CreateCollectionCoordinator, pre60CompatibleGetters) {
-    const auto kUUID = UUID::gen();
-
-    auto req = [&] {
-        CreateCollectionRequest creq;
-        creq.setShardKey(kShardKey.getOwned());
-        creq.setCollectionUUID(kUUID);
-        creq.setImplicitlyCreateIndex(false);
-        creq.setEnforceUniquenessCheck(false);
-        return creq;
-    };
-
-    auto pre60CompatDoc = [&] {
-        auto doc = CreateCollectionCoordinatorDocumentPre60Compatible();
-        doc.setShardingDDLCoordinatorMetadata(
-            {{kNs, DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible}});
-        doc.setCreateCollectionRequest(req());
-        return doc;
-    }();
-
-    auto latestDoc = [&] {
-        auto doc = CreateCollectionCoordinatorDocument();
-        doc.setShardingDDLCoordinatorMetadata({{kNs, DDLCoordinatorTypeEnum::kCreateCollection}});
-        doc.setCreateCollectionRequest(req());
-        return doc;
-    }();
-
-    ASSERT(pre60CompatDoc.getShardKey());
-    ASSERT(latestDoc.getShardKey());
-    ASSERT_BSONOBJ_EQ(*pre60CompatDoc.getShardKey(), *latestDoc.getShardKey());
-    ASSERT(pre60CompatDoc.getCollectionUUID());
-    ASSERT(latestDoc.getCollectionUUID());
-    ASSERT_EQ(*pre60CompatDoc.getCollectionUUID(), *latestDoc.getCollectionUUID());
-    ASSERT_EQ(pre60CompatDoc.getImplicitlyCreateIndex(), latestDoc.getImplicitlyCreateIndex());
-    ASSERT_EQ(pre60CompatDoc.getEnforceUniquenessCheck(), latestDoc.getEnforceUniquenessCheck());
-}
-
-TEST(CreateCollectionCoordinator, pre60CompatibleSerialization) {
-    auto req = [&] {
-        CreateCollectionRequest creq;
-        creq.setShardKey(kShardKey.getOwned());
-        creq.setCollectionUUID(UUID::gen());
-        creq.setImplicitlyCreateIndex(false);
-        creq.setEnforceUniquenessCheck(false);
-        return creq;
-    };
-
-    auto pre60CompatDoc = [&] {
-        auto doc = CreateCollectionCoordinatorDocumentPre60Compatible();
-        doc.setShardingDDLCoordinatorMetadata(
-            {{kNs, DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible}});
-        doc.setCreateCollectionRequest(req());
-        return doc;
-    }();
-
-    BSONObjBuilder builder;
-    pre60CompatDoc.serialize(&builder);
-    auto serialized = builder.asTempObj();
-
-    ASSERT_BSONOBJ_EQ(
-        BSONObj{},
-        serialized.extractFieldsUndotted(
-            CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields));
-}
-
-TEST(CreateCollectionCoordinator, pre60CompatibleToBSON) {
-
-    auto req = [&] {
-        CreateCollectionRequest creq;
-        creq.setShardKey(kShardKey.getOwned());
-        creq.setCollectionUUID(UUID::gen());
-        creq.setImplicitlyCreateIndex(false);
-        creq.setEnforceUniquenessCheck(false);
-        return creq;
-    };
-
-    auto pre60CompatDoc = [&] {
-        auto doc = CreateCollectionCoordinatorDocumentPre60Compatible();
-        doc.setShardingDDLCoordinatorMetadata(
-            {{kNs, DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible}});
-        doc.setCreateCollectionRequest(req());
-        return doc;
-    }();
-
-    auto serialized = pre60CompatDoc.toBSON();
-
-    ASSERT_BSONOBJ_EQ(
-        BSONObj{},
-        serialized.extractFieldsUndotted(
-            CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields));
-}
-
-}  // namespace
-}  // namespace mongo
diff --git a/src/mongo/db/s/database_sharding_state.cpp b/src/mongo/db/s/database_sharding_state.cpp
index ad18b8b9526..776b23857d0 100644
--- a/src/mongo/db/s/database_sharding_state.cpp
+++ b/src/mongo/db/s/database_sharding_state.cpp
@@ -185,9 +185,12 @@ void DatabaseShardingState::checkDbVersion(OperationContext* opCtx, DSSLock&) co
         auto criticalSectionSignal = _critSec.getSignal(
             opCtx->lockState()->isWriteLocked() ? ShardingMigrationCriticalSection::kWrite
                                                 : ShardingMigrationCriticalSection::kRead);
+        const std::string reason =
+            _critSec.getReason() ? _critSec.getReason()->toString() : "unknown";
         uassert(
             StaleDbRoutingVersion(_dbName, *clientDbVersion, boost::none, criticalSectionSignal),
-            str::stream() << "movePrimary commit in progress for " << _dbName,
+            str::stream() << "The critical section for " << _dbName
+                          << " is acquired with reason: " << reason,
             !criticalSectionSignal);
     }
 
diff --git a/src/mongo/db/s/drop_collection_coordinator.cpp b/src/mongo/db/s/drop_collection_coordinator.cpp
index 7dabcae0f16..fa1e2f4b84e 100644
--- a/src/mongo/db/s/drop_collection_coordinator.cpp
+++ b/src/mongo/db/s/drop_collection_coordinator.cpp
@@ -47,37 +47,6 @@
 
 namespace mongo {
 
-DropCollectionCoordinator::DropCollectionCoordinator(ShardingDDLCoordinatorService* service,
-                                                     const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState),
-      _doc(DropCollectionCoordinatorDocument::parse(
-          IDLParserErrorContext("DropCollectionCoordinatorDocument"), initialState)) {}
-
-boost::optional<BSONObj> DropCollectionCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-
-    const auto currPhase = [&]() {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getPhase();
-    }();
-
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "DropCollectionCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("currentPhase", currPhase);
-    bob.append("active", true);
-    return bob.obj();
-}
-
 DropReply DropCollectionCoordinator::dropCollectionLocally(OperationContext* opCtx,
                                                            const NamespaceString& nss) {
     {
@@ -101,29 +70,6 @@ DropReply DropCollectionCoordinator::dropCollectionLocally(OperationContext* opC
     return result;
 }
 
-void DropCollectionCoordinator::_enterPhase(Phase newPhase) {
-    StateDoc newDoc(_doc);
-    newDoc.setPhase(newPhase);
-
-    LOGV2_DEBUG(5390501,
-                2,
-                "Drop collection coordinator phase transition",
-                "namespace"_attr = nss(),
-                "newPhase"_attr = DropCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
-                "oldPhase"_attr = DropCollectionCoordinatorPhase_serializer(_doc.getPhase()));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        newDoc = _insertStateDocument(std::move(newDoc));
-    } else {
-        newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(newDoc);
-    }
-}
-
 ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
     std::shared_ptr<executor::ScopedTaskExecutor> executor,
     const CancellationToken& token) noexcept {
@@ -161,7 +107,7 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
                 // Persist the collection info before sticking to using it's uuid. This ensures this
                 // node is still the RS primary, so it was also the primary at the moment we read
                 // the collection metadata.
-                _doc = _updateStateDocument(opCtx, StateDoc(_doc));
+                _updateStateDocument(opCtx, StateDoc(_doc));
 
                 if (_doc.getCollInfo()) {
                     sharding_ddl_util::stopMigrations(opCtx, nss(), _doc.getCollInfo()->getUuid());
@@ -178,9 +124,9 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
                     // Perform a noop write on the participants in order to advance the txnNumber
                     // for this coordinator's lsid so that requests with older txnNumbers can no
                     // longer execute.
-                    _doc = _updateSession(opCtx, _doc);
+                    _updateSession(opCtx);
                     _performNoopRetryableWriteOnAllShardsAndConfigsvr(
-                        opCtx, getCurrentSession(_doc), **executor);
+                        opCtx, getCurrentSession(), **executor);
                 }
 
                 const auto collIsSharded = bool(_doc.getCollInfo());
@@ -199,12 +145,11 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
                 }
 
                 // Remove tags even if the collection is not sharded or didn't exist
-                _doc = _updateSession(opCtx, _doc);
-                sharding_ddl_util::removeTagsMetadataFromConfig(
-                    opCtx, nss(), getCurrentSession(_doc));
+                _updateSession(opCtx);
+                sharding_ddl_util::removeTagsMetadataFromConfig(opCtx, nss(), getCurrentSession());
 
                 // get a Lsid and an incremented txnNumber. Ensures we are the primary
-                _doc = _updateSession(opCtx, _doc);
+                _updateSession(opCtx);
 
                 const auto primaryShardId = ShardingState::get(opCtx)->shardId();
 
@@ -217,13 +162,13 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
                     participants.end());
 
                 sharding_ddl_util::sendDropCollectionParticipantCommandToShards(
-                    opCtx, nss(), participants, **executor, getCurrentSession(_doc));
+                    opCtx, nss(), participants, **executor, getCurrentSession());
 
                 // The sharded collection must be dropped on the primary shard after it has been
                 // dropped on all of the other shards to ensure it can only be re-created as
                 // unsharded with a higher optime than all of the drops.
                 sharding_ddl_util::sendDropCollectionParticipantCommandToShards(
-                    opCtx, nss(), {primaryShardId}, **executor, getCurrentSession(_doc));
+                    opCtx, nss(), {primaryShardId}, **executor, getCurrentSession());
 
                 ShardingLogging::get(opCtx)->logChange(opCtx, "dropCollection", nss().ns());
                 LOGV2(5390503, "Collection dropped", "namespace"_attr = nss());
diff --git a/src/mongo/db/s/drop_collection_coordinator.h b/src/mongo/db/s/drop_collection_coordinator.h
index 140013e41e1..46b37d2a415 100644
--- a/src/mongo/db/s/drop_collection_coordinator.h
+++ b/src/mongo/db/s/drop_collection_coordinator.h
@@ -35,20 +35,20 @@
 #include "mongo/db/s/sharding_ddl_coordinator.h"
 namespace mongo {
 
-class DropCollectionCoordinator final : public ShardingDDLCoordinator {
+class DropCollectionCoordinator final
+    : public RecoverableShardingDDLCoordinator<DropCollectionCoordinatorDocument,
+                                               DropCollectionCoordinatorPhaseEnum> {
 public:
     using StateDoc = DropCollectionCoordinatorDocument;
     using Phase = DropCollectionCoordinatorPhaseEnum;
 
-    DropCollectionCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState);
+    DropCollectionCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+        : RecoverableShardingDDLCoordinator(service, "DropCollectionCoordinator", initialState) {}
+
     ~DropCollectionCoordinator() = default;
 
     void checkIfOptionsConflict(const BSONObj& doc) const override {}
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
-
     /**
      * Locally drops a collection, cleans its CollectionShardingRuntime metadata and refreshes the
      * catalog cache.
@@ -56,34 +56,12 @@ public:
     static DropReply dropCollectionLocally(OperationContext* opCtx, const NamespaceString& nss);
 
 private:
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        return _doc.getShardingDDLCoordinatorMetadata();
+    StringData serializePhase(const Phase& phase) const override {
+        return DropCollectionCoordinatorPhase_serializer(phase);
     }
 
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
-
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-
-            if (currPhase > newPhase) {
-                // Do not execute this phase if we already reached a subsequent one.
-                return;
-            }
-            if (currPhase < newPhase) {
-                // Persist the new phase if this is the first time we are executing it.
-                _enterPhase(newPhase);
-            }
-            return func();
-        };
-    }
-
-    void _enterPhase(Phase newPhase);
-
-    mutable Mutex _docMutex = MONGO_MAKE_LATCH("DropCollectionCoordinator::_docMutex");
-    DropCollectionCoordinatorDocument _doc;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/s/drop_database_coordinator.cpp b/src/mongo/db/s/drop_database_coordinator.cpp
index b49e36d302c..54b8ef1108e 100644
--- a/src/mongo/db/s/drop_database_coordinator.cpp
+++ b/src/mongo/db/s/drop_database_coordinator.cpp
@@ -122,11 +122,11 @@ void DropDatabaseCoordinator::_dropShardedCollection(
     sharding_ddl_util::removeCollAndChunksMetadataFromConfig(
         opCtx, coll, ShardingCatalogClient::kMajorityWriteConcern);
 
-    _doc = _updateSession(opCtx, _doc);
-    sharding_ddl_util::removeTagsMetadataFromConfig(opCtx, nss, getCurrentSession(_doc));
+    _updateSession(opCtx);
+    sharding_ddl_util::removeTagsMetadataFromConfig(opCtx, nss, getCurrentSession());
 
     const auto primaryShardId = ShardingState::get(opCtx)->shardId();
-    _doc = _updateSession(opCtx, _doc);
+    _updateSession(opCtx);
 
     // We need to send the drop to all the shards because both movePrimary and
     // moveChunk leave garbage behind for sharded collections.
@@ -135,67 +135,13 @@ void DropDatabaseCoordinator::_dropShardedCollection(
     participants.erase(std::remove(participants.begin(), participants.end(), primaryShardId),
                        participants.end());
     sharding_ddl_util::sendDropCollectionParticipantCommandToShards(
-        opCtx, nss, participants, **executor, getCurrentSession(_doc));
+        opCtx, nss, participants, **executor, getCurrentSession());
 
     // The sharded collection must be dropped on the primary shard after it has been dropped on all
     // of the other shards to ensure it can only be re-created as unsharded with a higher optime
     // than all of the drops.
     sharding_ddl_util::sendDropCollectionParticipantCommandToShards(
-        opCtx, nss, {primaryShardId}, **executor, getCurrentSession(_doc));
-}
-
-DropDatabaseCoordinator::DropDatabaseCoordinator(ShardingDDLCoordinatorService* service,
-                                                 const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState),
-      _doc(DropDatabaseCoordinatorDocument::parse(
-          IDLParserErrorContext("DropDatabaseCoordinatorDocument"), initialState)),
-      _dbName(nss().db()) {}
-
-boost::optional<BSONObj> DropDatabaseCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-
-    const auto currPhase = [&]() {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getPhase();
-    }();
-
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "DropDatabaseCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("currentPhase", currPhase);
-    bob.append("active", true);
-    return bob.obj();
-}
-
-void DropDatabaseCoordinator::_enterPhase(Phase newPhase) {
-    StateDoc newDoc(_doc);
-    newDoc.setPhase(newPhase);
-
-    LOGV2_DEBUG(5494501,
-                2,
-                "Drop database coordinator phase transition",
-                "db"_attr = _dbName,
-                "newPhase"_attr = DropDatabaseCoordinatorPhase_serializer(newDoc.getPhase()),
-                "oldPhase"_attr = DropDatabaseCoordinatorPhase_serializer(_doc.getPhase()));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        newDoc = _insertStateDocument(std::move(newDoc));
-    } else {
-        newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(newDoc);
-    }
+        opCtx, nss, {primaryShardId}, **executor, getCurrentSession());
 }
 
 void DropDatabaseCoordinator::_clearDatabaseInfoOnPrimary(OperationContext* opCtx) {
@@ -238,9 +184,9 @@ ExecutorFuture<void> DropDatabaseCoordinator::_runImpl(
                     // Perform a noop write on the participants in order to advance the txnNumber
                     // for this coordinator's lsid so that requests with older txnNumbers can no
                     // longer execute.
-                    _doc = _updateSession(opCtx, _doc);
+                    _updateSession(opCtx);
                     _performNoopRetryableWriteOnAllShardsAndConfigsvr(
-                        opCtx, getCurrentSession(_doc), **executor);
+                        opCtx, getCurrentSession(), **executor);
                 }
 
                 ShardingLogging::get(opCtx)->logChange(opCtx, "dropDatabase.start", _dbName);
@@ -284,7 +230,7 @@ ExecutorFuture<void> DropDatabaseCoordinator::_runImpl(
 
                     auto newStateDoc = _doc;
                     newStateDoc.setCollInfo(coll);
-                    _doc = _updateStateDocument(opCtx, std::move(newStateDoc));
+                    _updateStateDocument(opCtx, std::move(newStateDoc));
 
                     _dropShardedCollection(opCtx, coll, executor);
                 }
diff --git a/src/mongo/db/s/drop_database_coordinator.h b/src/mongo/db/s/drop_database_coordinator.h
index 47d63310a19..f70ea2981cb 100644
--- a/src/mongo/db/s/drop_database_coordinator.h
+++ b/src/mongo/db/s/drop_database_coordinator.h
@@ -34,48 +34,29 @@
 
 namespace mongo {
 
-class DropDatabaseCoordinator final : public ShardingDDLCoordinator {
+class DropDatabaseCoordinator final
+    : public RecoverableShardingDDLCoordinator<DropDatabaseCoordinatorDocument,
+                                               DropDatabaseCoordinatorPhaseEnum> {
+
 public:
     using StateDoc = DropDatabaseCoordinatorDocument;
     using Phase = DropDatabaseCoordinatorPhaseEnum;
 
-    DropDatabaseCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState);
+    DropDatabaseCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+        : RecoverableShardingDDLCoordinator(service, "DropDatabaseCoordinator", initialState),
+          _dbName(nss().db()) {}
     ~DropDatabaseCoordinator() = default;
 
     void checkIfOptionsConflict(const BSONObj& doc) const override {}
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
-
 private:
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getShardingDDLCoordinatorMetadata();
+    StringData serializePhase(const Phase& phase) const override {
+        return DropDatabaseCoordinatorPhase_serializer(phase);
     }
 
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
 
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-
-            if (currPhase > newPhase) {
-                // Do not execute this phase if we already reached a subsequent one.
-                return;
-            }
-            if (currPhase < newPhase) {
-                // Persist the new phase if this is the first time we are executing it.
-                _enterPhase(newPhase);
-            }
-            return func();
-        };
-    }
-
-    void _enterPhase(Phase newPhase);
-
     void _dropShardedCollection(OperationContext* opCtx,
                                 const CollectionType& coll,
                                 std::shared_ptr<executor::ScopedTaskExecutor> executor);
@@ -84,10 +65,6 @@ private:
 
     void _clearDatabaseInfoOnSecondaries(OperationContext* opCtx);
 
-    mutable Mutex _docMutex = MONGO_MAKE_LATCH("DropDatabaseCoordinator::_docMutex");
-    DropDatabaseCoordinatorDocument _doc;
-
-
     StringData _dbName;
 };
 
diff --git a/src/mongo/db/s/flush_resharding_state_change_command.cpp b/src/mongo/db/s/flush_resharding_state_change_command.cpp
index 85f0c42cff0..95439564643 100644
--- a/src/mongo/db/s/flush_resharding_state_change_command.cpp
+++ b/src/mongo/db/s/flush_resharding_state_change_command.cpp
@@ -131,7 +131,7 @@ public:
                 .getAsync([](auto) {});
 
             // Ensure the command isn't run on a stale primary.
-            doNoopWrite(opCtx, "_flushReshardingStateChange no-op", ns());
+            resharding::doNoopWrite(opCtx, "_flushReshardingStateChange no-op", ns());
         }
     };
 } _flushReshardingStateChange;
diff --git a/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp b/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
index 9ea3f94af97..f85f73c0ef4 100644
--- a/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
+++ b/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
@@ -117,7 +117,8 @@ public:
             boost::optional<SharedSemiFuture<void>> criticalSectionSignal;
 
             {
-                AutoGetCollection autoColl(opCtx, ns(), MODE_IS);
+                AutoGetCollection autoColl(
+                    opCtx, ns(), MODE_IS, AutoGetCollectionViewMode::kViewsPermitted);
 
                 // If the primary is in the critical section, secondaries must wait for the commit
                 // to finish on the primary in case a secondary's caller has an afterClusterTime
diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp
index 2f39ef09147..a842e4cfe03 100644
--- a/src/mongo/db/s/metadata_manager_test.cpp
+++ b/src/mongo/db/s/metadata_manager_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include <boost/optional.hpp>
 
 #include "mongo/bson/bsonobjbuilder.h"
@@ -93,7 +91,7 @@ protected:
             boost::none,
             boost::none /* chunkSizeBytes */,
             true,
-            {ChunkType{uuid, range, ChunkVersion(1, 0, epoch, Timestamp(1, 1)), kOtherShard}});
+            {ChunkType{uuid, range, ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}), kOtherShard}});
 
         return CollectionMetadata(ChunkManager(kThisShard,
                                                DatabaseVersion(UUID::gen(), Timestamp(1, 1)),
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index 462baee5069..1a76fd9a5a5 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -300,12 +300,12 @@ Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* opCtx,
         opCtx->recoveryUnit()->setPrepareConflictBehavior(
             PrepareConflictBehavior::kIgnoreConflicts);
 
-        auto storeCurrentLocsStatus = _storeCurrentLocs(opCtx);
-        if (storeCurrentLocsStatus == ErrorCodes::ChunkTooBig && _forceJumbo) {
+        auto storeCurrentRecordIdStatus = _storeCurrentRecordId(opCtx);
+        if (storeCurrentRecordIdStatus == ErrorCodes::ChunkTooBig && _forceJumbo) {
             stdx::lock_guard<Latch> sl(_mutex);
             _jumboChunkCloneState.emplace();
-        } else if (!storeCurrentLocsStatus.isOK()) {
-            return storeCurrentLocsStatus;
+        } else if (!storeCurrentRecordIdStatus.isOK()) {
+            return storeCurrentRecordIdStatus;
         }
     }
 
@@ -381,7 +381,7 @@ StatusWith<BSONObj> MigrationChunkClonerSourceLegacy::commitClone(OperationConte
             }
         } else {
             invariant(PlanExecutor::IS_EOF == _jumboChunkCloneState->clonerState);
-            invariant(_cloneLocs.empty());
+            invariant(_cloneRecordIds.empty());
         }
     }
 
@@ -680,17 +680,16 @@ void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromIndexScan(OperationCon
     _jumboChunkCloneState->clonerExec->detachFromOperationContext();
 }
 
-void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromCloneLocs(OperationContext* opCtx,
-                                                                    const CollectionPtr& collection,
-                                                                    BSONArrayBuilder* arrBuilder) {
+void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromCloneRecordIds(
+    OperationContext* opCtx, const CollectionPtr& collection, BSONArrayBuilder* arrBuilder) {
     ElapsedTracker tracker(opCtx->getServiceContext()->getFastClockSource(),
                            internalQueryExecYieldIterations.load(),
                            Milliseconds(internalQueryExecYieldPeriodMS.load()));
 
     stdx::unique_lock<Latch> lk(_mutex);
-    auto iter = _cloneLocs.begin();
+    auto iter = _cloneRecordIds.begin();
 
-    for (; iter != _cloneLocs.end(); ++iter) {
+    for (; iter != _cloneRecordIds.end(); ++iter) {
         // We must always make progress in this method by at least one document because empty
         // return indicates there is no more initial clone data.
         if (arrBuilder->arrSize() && tracker.intervalHasElapsed()) {
@@ -718,7 +717,7 @@ void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromCloneLocs(OperationCon
         lk.lock();
     }
 
-    _cloneLocs.erase(_cloneLocs.begin(), iter);
+    _cloneRecordIds.erase(_cloneRecordIds.begin(), iter);
 }
 
 uint64_t MigrationChunkClonerSourceLegacy::getCloneBatchBufferAllocationSize() {
@@ -727,7 +726,7 @@ uint64_t MigrationChunkClonerSourceLegacy::getCloneBatchBufferAllocationSize() {
         return static_cast<uint64_t>(BSONObjMaxUserSize);
 
     return std::min(static_cast<uint64_t>(BSONObjMaxUserSize),
-                    _averageObjectSizeForCloneLocs * _cloneLocs.size());
+                    _averageObjectSizeForCloneRecordIds * _cloneRecordIds.size());
 }
 
 Status MigrationChunkClonerSourceLegacy::nextCloneBatch(OperationContext* opCtx,
@@ -735,8 +734,8 @@ Status MigrationChunkClonerSourceLegacy::nextCloneBatch(OperationContext* opCtx,
                                                         BSONArrayBuilder* arrBuilder) {
     dassert(opCtx->lockState()->isCollectionLockedForMode(nss(), MODE_IS));
 
-    // If this chunk is too large to store records in _cloneLocs and the command args specify to
-    // attempt to move it, scan the collection directly.
+    // If this chunk is too large to store records in _cloneRecordIds and the command args specify
+    // to attempt to move it, scan the collection directly.
     if (_jumboChunkCloneState && _forceJumbo) {
         try {
             _nextCloneBatchFromIndexScan(opCtx, collection, arrBuilder);
@@ -746,12 +745,11 @@ Status MigrationChunkClonerSourceLegacy::nextCloneBatch(OperationContext* opCtx,
         }
     }
 
-    _nextCloneBatchFromCloneLocs(opCtx, collection, arrBuilder);
+    _nextCloneBatchFromCloneRecordIds(opCtx, collection, arrBuilder);
     return Status::OK();
 }
 
 Status MigrationChunkClonerSourceLegacy::nextModsBatch(OperationContext* opCtx,
-                                                       Database* db,
                                                        BSONObjBuilder* builder) {
     dassert(opCtx->lockState()->isCollectionLockedForMode(nss(), MODE_IS));
 
@@ -761,7 +759,7 @@ Status MigrationChunkClonerSourceLegacy::nextModsBatch(OperationContext* opCtx,
     {
         // All clone data must have been drained before starting to fetch the incremental changes.
         stdx::unique_lock<Latch> lk(_mutex);
-        invariant(_cloneLocs.empty());
+        invariant(_cloneRecordIds.empty());
 
         // The "snapshot" for delete and update list must be taken under a single lock. This is to
         // ensure that we will preserve the causal order of writes. Always consume the delete
@@ -784,8 +782,8 @@ Status MigrationChunkClonerSourceLegacy::nextModsBatch(OperationContext* opCtx,
 
     if (deleteList.empty()) {
         BSONArrayBuilder arrUpd(builder->subarrayStart("reload"));
-        auto findByIdWrapper = [opCtx, db, ns](BSONObj idDoc, BSONObj* fullDoc) {
-            return Helpers::findById(opCtx, db, ns, idDoc, *fullDoc);
+        auto findByIdWrapper = [opCtx, ns](BSONObj idDoc, BSONObj* fullDoc) {
+            return Helpers::findById(opCtx, ns, idDoc, *fullDoc);
         };
         totalDocSize = xferMods(&arrUpd, &updateList, totalDocSize, findByIdWrapper);
         arrUpd.done();
@@ -874,7 +872,7 @@ MigrationChunkClonerSourceLegacy::_getIndexScanExecutor(
     if (!shardKeyIdx) {
         return {ErrorCodes::IndexNotFound,
                 str::stream() << "can't find index with prefix " << _shardKeyPattern.toBSON()
-                              << " in storeCurrentLocs for " << nss().ns()};
+                              << " in storeCurrentRecordId for " << nss().ns()};
     }
 
     // Assume both min and max non-empty, append MinKey's to make them fit chosen index
@@ -896,7 +894,7 @@ MigrationChunkClonerSourceLegacy::_getIndexScanExecutor(
                                               scanOption);
 }
 
-Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opCtx) {
+Status MigrationChunkClonerSourceLegacy::_storeCurrentRecordId(OperationContext* opCtx) {
     AutoGetCollection collection(opCtx, nss(), MODE_IS);
     if (!collection) {
         return {ErrorCodes::NamespaceNotFound,
@@ -948,14 +946,14 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
 
             if (!isLargeChunk) {
                 stdx::lock_guard<Latch> lk(_mutex);
-                _cloneLocs.insert(recordId);
+                _cloneRecordIds.insert(recordId);
             }
 
             if (++recCount > maxRecsWhenFull) {
                 isLargeChunk = true;
 
                 if (_forceJumbo) {
-                    _cloneLocs.clear();
+                    _cloneRecordIds.clear();
                     break;
                 }
             }
@@ -975,7 +973,7 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
         const auto idIdx = collection->getIndexCatalog()->findIdIndex(opCtx)->getEntry();
         if (!idIdx) {
             return {ErrorCodes::IndexNotFound,
-                    str::stream() << "can't find index '_id' in storeCurrentLocs for "
+                    str::stream() << "can't find index '_id' in storeCurrentRecordId for "
                                   << nss().ns()};
         }
         averageObjectIdSize = idIdx->accessMethod()->getSpaceUsedBytes(opCtx) / totalRecs;
@@ -992,7 +990,7 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
     }
 
     stdx::lock_guard<Latch> lk(_mutex);
-    _averageObjectSizeForCloneLocs = collectionAverageObjectSize + defaultObjectIdSize;
+    _averageObjectSizeForCloneRecordIds = collectionAverageObjectSize + defaultObjectIdSize;
     _averageObjectIdSize = std::max(averageObjectIdSize, defaultObjectIdSize);
     return Status::OK();
 }
@@ -1057,9 +1055,9 @@ Status MigrationChunkClonerSourceLegacy::_checkRecipientCloningStatus(OperationC
 
         stdx::lock_guard<Latch> sl(_mutex);
 
-        const std::size_t cloneLocsRemaining = _cloneLocs.size();
+        const std::size_t cloneRecordIdsRemaining = _cloneRecordIds.size();
         int64_t untransferredModsSizeBytes = _untransferredDeletesCounter * _averageObjectIdSize +
-            _untransferredUpsertsCounter * _averageObjectSizeForCloneLocs;
+            _untransferredUpsertsCounter * _averageObjectSizeForCloneRecordIds;
 
         if (_forceJumbo && _jumboChunkCloneState) {
             LOGV2(21992,
@@ -1079,13 +1077,13 @@ Status MigrationChunkClonerSourceLegacy::_checkRecipientCloningStatus(OperationC
                   "moveChunk data transfer progress",
                   "response"_attr = redact(res),
                   "memoryUsedBytes"_attr = _memoryUsed,
-                  "docsRemainingToClone"_attr = cloneLocsRemaining,
+                  "docsRemainingToClone"_attr = cloneRecordIdsRemaining,
                   "untransferredModsSizeBytes"_attr = untransferredModsSizeBytes);
         }
 
         if (res["state"].String() == "steady" && sessionCatalogSourceInCatchupPhase &&
             estimateUntransferredSessionsSize == 0) {
-            if (cloneLocsRemaining != 0 ||
+            if (cloneRecordIdsRemaining != 0 ||
                 (_jumboChunkCloneState && _forceJumbo &&
                  PlanExecutor::IS_EOF != _jumboChunkCloneState->clonerState)) {
                 return {ErrorCodes::OperationIncomplete,
@@ -1124,7 +1122,8 @@ Status MigrationChunkClonerSourceLegacy::_checkRecipientCloningStatus(OperationC
                       "moveChunk data transfer within threshold to allow write blocking",
                       "_untransferredUpsertsCounter"_attr = _untransferredUpsertsCounter,
                       "_untransferredDeletesCounter"_attr = _untransferredDeletesCounter,
-                      "_averageObjectSizeForCloneLocs"_attr = _averageObjectSizeForCloneLocs,
+                      "_averageObjectSizeForCloneRecordIds"_attr =
+                          _averageObjectSizeForCloneRecordIds,
                       "_averageObjectIdSize"_attr = _averageObjectIdSize,
                       "untransferredModsSizeBytes"_attr = untransferredModsSizeBytes,
                       "untransferredSessionDataInBytes"_attr = estimateUntransferredSessionsSize,
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
index 8c15fa7a0cb..1912c947ad9 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
@@ -174,7 +174,7 @@ public:
      *
      * NOTE: Must be called with the collection lock held in at least IS mode.
      */
-    Status nextModsBatch(OperationContext* opCtx, Database* db, BSONObjBuilder* builder);
+    Status nextModsBatch(OperationContext* opCtx, BSONObjBuilder* builder);
 
     /**
      * Appends to 'arrBuilder' oplog entries which wrote to the currently migrated chunk and contain
@@ -248,17 +248,17 @@ private:
                                       const CollectionPtr& collection,
                                       BSONArrayBuilder* arrBuilder);
 
-    void _nextCloneBatchFromCloneLocs(OperationContext* opCtx,
-                                      const CollectionPtr& collection,
-                                      BSONArrayBuilder* arrBuilder);
+    void _nextCloneBatchFromCloneRecordIds(OperationContext* opCtx,
+                                           const CollectionPtr& collection,
+                                           BSONArrayBuilder* arrBuilder);
 
     /**
-     * Get the disklocs that belong to the chunk migrated and sort them in _cloneLocs (to avoid
-     * seeking disk later).
+     * Get the recordIds that belong to the chunk migrated and sort them in _cloneRecordIds (to
+     * avoid seeking disk later).
      *
      * Returns OK or any error status otherwise.
      */
-    Status _storeCurrentLocs(OperationContext* opCtx);
+    Status _storeCurrentRecordId(OperationContext* opCtx);
 
     /**
      * Adds the OpTime to the list of OpTimes for oplog entries that we should consider migrating as
@@ -349,11 +349,11 @@ private:
     State _state{kNew};
 
     // List of record ids that needs to be transferred (initial clone)
-    std::set<RecordId> _cloneLocs;
+    std::set<RecordId> _cloneRecordIds;
 
     // The estimated average object size during the clone phase. Used for buffer size
     // pre-allocation (initial clone).
-    uint64_t _averageObjectSizeForCloneLocs{0};
+    uint64_t _averageObjectSizeForCloneRecordIds{0};
 
     // The estimated average object _id size during the clone phase.
     uint64_t _averageObjectIdSize{0};
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp
index d4c7593370c..8be0acd90df 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp
@@ -103,11 +103,6 @@ public:
             _autoColl = boost::none;
     }
 
-    Database* getDb() const {
-        invariant(_autoColl);
-        return _autoColl->getDb();
-    }
-
     const CollectionPtr& getColl() const {
         invariant(_autoColl);
         return _autoColl->getCollection();
@@ -235,7 +230,7 @@ public:
 
         AutoGetActiveCloner autoCloner(opCtx, migrationSessionId, true);
 
-        uassertStatusOK(autoCloner.getCloner()->nextModsBatch(opCtx, autoCloner.getDb(), &result));
+        uassertStatusOK(autoCloner.getCloner()->nextModsBatch(opCtx, &result));
         return true;
     }
 
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
index 91e1b4a21bc..dc1eb4579e5 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
@@ -179,7 +179,7 @@ protected:
                 true,
                 {ChunkType{uuid,
                            ChunkRange{BSON(kShardKey << MINKEY), BSON(kShardKey << MAXKEY)},
-                           ChunkVersion(1, 0, epoch, timestamp),
+                           ChunkVersion({epoch, timestamp}, {1, 0}),
                            ShardId("dummyShardId")}});
 
             AutoGetDb autoDb(operationContext(), kNss.db(), MODE_IX);
@@ -355,7 +355,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, CorrectDocumentsFetched) {
 
         {
             BSONObjBuilder modsBuilder;
-            ASSERT_OK(cloner.nextModsBatch(operationContext(), autoColl.getDb(), &modsBuilder));
+            ASSERT_OK(cloner.nextModsBatch(operationContext(), &modsBuilder));
 
             const auto modsObj = modsBuilder.obj();
             ASSERT_EQ(2U, modsObj["reload"].Array().size());
@@ -455,7 +455,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, RemoveDuplicateDocuments) {
         AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
         {
             BSONObjBuilder modsBuilder;
-            ASSERT_OK(cloner.nextModsBatch(operationContext(), autoColl.getDb(), &modsBuilder));
+            ASSERT_OK(cloner.nextModsBatch(operationContext(), &modsBuilder));
 
             const auto modsObj = modsBuilder.obj();
             ASSERT_EQ(1U, modsObj["reload"].Array().size());
@@ -522,7 +522,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, OneLargeDocumentTransferMods) {
         AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
         {
             BSONObjBuilder modsBuilder;
-            ASSERT_OK(cloner.nextModsBatch(operationContext(), autoColl.getDb(), &modsBuilder));
+            ASSERT_OK(cloner.nextModsBatch(operationContext(), &modsBuilder));
 
             const auto modsObj = modsBuilder.obj();
             ASSERT_EQ(1, modsObj["reload"].Array().size());
@@ -600,7 +600,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, ManySmallDocumentsTransferMods) {
         AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
         {
             BSONObjBuilder modsBuilder;
-            ASSERT_OK(cloner.nextModsBatch(operationContext(), autoColl.getDb(), &modsBuilder));
+            ASSERT_OK(cloner.nextModsBatch(operationContext(), &modsBuilder));
             const auto modsObj = modsBuilder.obj();
             ASSERT_EQ(modsObj["reload"].Array().size(), numDocuments);
         }
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 09a39686779..fa303126008 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -201,11 +201,10 @@ bool willOverrideLocalId(OperationContext* opCtx,
                          BSONObj min,
                          BSONObj max,
                          BSONObj shardKeyPattern,
-                         Database* db,
                          BSONObj remoteDoc,
                          BSONObj* localDoc) {
     *localDoc = BSONObj();
-    if (Helpers::findById(opCtx, db, nss.ns(), remoteDoc, *localDoc)) {
+    if (Helpers::findById(opCtx, nss.ns(), remoteDoc, *localDoc)) {
         return !isInRange(*localDoc, min, max, shardKeyPattern);
     }
 
@@ -819,7 +818,7 @@ MigrationDestinationManager::IndexesAndIdIndex MigrationDestinationManager::getC
                                               Milliseconds(-1)));
 
     for (auto&& spec : indexes.docs) {
-        if (spec["clustered"]) {
+        if (spec[IndexDescriptor::kClusteredFieldName]) {
             // The 'clustered' index is implicitly created upon clustered collection creation.
         } else {
             donorIndexSpecs.push_back(spec);
@@ -1774,7 +1773,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
 
             // Do not apply delete if doc does not belong to the chunk being migrated
             BSONObj fullObj;
-            if (Helpers::findById(opCtx, autoColl.getDb(), _nss.ns(), id, fullObj)) {
+            if (Helpers::findById(opCtx, _nss.ns(), id, fullObj)) {
                 if (!isInRange(fullObj, _min, _max, _shardKeyPattern)) {
                     if (MONGO_unlikely(failMigrationReceivedOutOfRangeOperation.shouldFail())) {
                         MONGO_UNREACHABLE;
@@ -1823,14 +1822,8 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
             }
 
             BSONObj localDoc;
-            if (willOverrideLocalId(opCtx,
-                                    _nss,
-                                    _min,
-                                    _max,
-                                    _shardKeyPattern,
-                                    autoColl.getDb(),
-                                    updatedDoc,
-                                    &localDoc)) {
+            if (willOverrideLocalId(
+                    opCtx, _nss, _min, _max, _shardKeyPattern, updatedDoc, &localDoc)) {
                 // Exception will abort migration cleanly
                 LOGV2_ERROR_OPTIONS(
                     16977,
diff --git a/src/mongo/db/s/migration_destination_manager_legacy_commands.cpp b/src/mongo/db/s/migration_destination_manager_legacy_commands.cpp
index 4b0d94e98bc..7ee89b9f26c 100644
--- a/src/mongo/db/s/migration_destination_manager_legacy_commands.cpp
+++ b/src/mongo/db/s/migration_destination_manager_legacy_commands.cpp
@@ -91,6 +91,14 @@ public:
         out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
+    bool shouldCheckoutSession() const final {
+        return false;
+    }
+
     bool errmsgRun(OperationContext* opCtx,
                    const std::string& dbname,
                    const BSONObj& cmdObj,
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index 1a6909e629f..a0fc3e650ee 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -39,6 +39,7 @@
 #include "mongo/db/read_concern.h"
 #include "mongo/db/repl/replication_coordinator.h"
 #include "mongo/db/s/auto_split_vector.h"
+#include "mongo/db/s/commit_chunk_migration_gen.h"
 #include "mongo/db/s/migration_chunk_cloner_source_legacy.h"
 #include "mongo/db/s/migration_coordinator.h"
 #include "mongo/db/s/migration_util.h"
@@ -59,8 +60,6 @@
 #include "mongo/s/catalog_cache_loader.h"
 #include "mongo/s/grid.h"
 #include "mongo/s/pm2423_feature_flags_gen.h"
-#include "mongo/s/request_types/commit_chunk_migration_request_type.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
 #include "mongo/s/shard_key_pattern.h"
 #include "mongo/util/duration.h"
 #include "mongo/util/elapsed_tracker.h"
@@ -93,12 +92,10 @@ void refreshRecipientRoutingTable(OperationContext* opCtx,
                                   const NamespaceString& nss,
                                   const HostAndPort& toShardHost,
                                   const ChunkVersion& newCollVersion) {
-    SetShardVersionRequest ssv(nss, newCollVersion, false);
-
     const executor::RemoteCommandRequest request(
         toShardHost,
         NamespaceString::kAdminDb.toString(),
-        ssv.toBSON(),
+        BSON("_flushRoutingTableCacheUpdates" << nss.ns()),
         ReadPreferenceSetting{ReadPreference::PrimaryOnly}.toContainingBSON(),
         opCtx,
         executor::RemoteCommandRequest::kNoTimeout);
@@ -560,20 +557,18 @@ void MigrationSourceManager::commitChunkMetadataOnConfig() {
     {
         const auto metadata = _getCurrentMetadataAndCheckEpoch();
 
-        ChunkType migratedChunkType;
-        migratedChunkType.setMin(*_args.getMin());
-        migratedChunkType.setMax(*_args.getMax());
-        migratedChunkType.setVersion(*_chunkVersion);
+        auto migratedChunk = MigratedChunkType(*_chunkVersion, *_args.getMin(), *_args.getMax());
 
         const auto currentTime = VectorClock::get(_opCtx)->getTime();
-        CommitChunkMigrationRequest::appendAsCommand(&builder,
-                                                     nss(),
-                                                     _args.getFromShard(),
-                                                     _args.getToShard(),
-                                                     migratedChunkType,
-                                                     metadata.getCollVersion(),
-                                                     currentTime.clusterTime().asTimestamp());
 
+        CommitChunkMigrationRequest request(nss(),
+                                            _args.getFromShard(),
+                                            _args.getToShard(),
+                                            migratedChunk,
+                                            metadata.getCollVersion(),
+                                            currentTime.clusterTime().asTimestamp());
+
+        request.serialize({}, &builder);
         builder.append(kWriteConcernField, kMajorityWriteConcern.toBSON());
     }
 
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index bd8600c1518..b877424f86a 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -37,7 +37,6 @@
 #include "mongo/base/error_codes.h"
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/client/query.h"
 #include "mongo/db/catalog/collection_catalog_helper.h"
 #include "mongo/db/catalog_raii.h"
 #include "mongo/db/commands.h"
@@ -520,14 +519,27 @@ void resubmitRangeDeletionsOnStepUp(ServiceContext* serviceContext) {
             FindCommandRequest findCommand(NamespaceString::kRangeDeletionNamespace);
             findCommand.setFilter(BSON(RangeDeletionTask::kProcessingFieldName << true));
             auto cursor = client.find(std::move(findCommand));
-            if (cursor->more()) {
-                return migrationutil::submitRangeDeletionTask(
+
+            auto retFuture = ExecutorFuture<void>(getMigrationUtilExecutor(serviceContext));
+
+            int rangeDeletionsMarkedAsProcessing = 0;
+            while (cursor->more()) {
+                retFuture = migrationutil::submitRangeDeletionTask(
                     opCtx.get(),
                     RangeDeletionTask::parse(IDLParserErrorContext("rangeDeletionRecovery"),
                                              cursor->next()));
-            } else {
-                return ExecutorFuture<void>(getMigrationUtilExecutor(serviceContext));
+                rangeDeletionsMarkedAsProcessing++;
             }
+
+            if (rangeDeletionsMarkedAsProcessing > 1) {
+                LOGV2_WARNING(
+                    6695800,
+                    "Rescheduling several range deletions marked as processing. Orphans count "
+                    "may be off while they are not drained",
+                    "numRangeDeletionsMarkedAsProcessing"_attr = rangeDeletionsMarkedAsProcessing);
+            }
+
+            return retFuture;
         })
         .then([serviceContext] {
             ThreadClient tc("ResubmitRangeDeletions", serviceContext);
diff --git a/src/mongo/db/s/migration_util_test.cpp b/src/mongo/db/s/migration_util_test.cpp
index 8e6f02043da..90a1e9016a1 100644
--- a/src/mongo/db/s/migration_util_test.cpp
+++ b/src/mongo/db/s/migration_util_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/client/remote_command_targeter_factory_mock.h"
 #include "mongo/client/remote_command_targeter_mock.h"
 #include "mongo/db/catalog_raii.h"
@@ -591,7 +589,7 @@ TEST_F(SubmitRangeDeletionTaskTest, SucceedsIfFilteringMetadataUUIDMatchesTaskUU
     _mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
     _mockCatalogCacheLoader->setCollectionRefreshReturnValue(coll);
     _mockCatalogCacheLoader->setChunkRefreshReturnValue(
-        makeChangedChunks(ChunkVersion(1, 0, kEpoch, kDefaultTimestamp)));
+        makeChangedChunks(ChunkVersion({kEpoch, kDefaultTimestamp}, {1, 0})));
     _mockCatalogClient->setCollections({coll});
     forceShardFilteringMetadataRefresh(opCtx, kTestNss);
 
@@ -619,7 +617,7 @@ TEST_F(
     _mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
     _mockCatalogCacheLoader->setCollectionRefreshReturnValue(coll);
     _mockCatalogCacheLoader->setChunkRefreshReturnValue(
-        makeChangedChunks(ChunkVersion(1, 0, kEpoch, kDefaultTimestamp)));
+        makeChangedChunks(ChunkVersion({kEpoch, kDefaultTimestamp}, {1, 0})));
     _mockCatalogClient->setCollections({coll});
 
     auto metadata = makeShardedMetadata(opCtx, collectionUUID);
@@ -654,7 +652,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
     auto matchingColl = makeCollectionType(collectionUUID, kEpoch, kDefaultTimestamp);
     _mockCatalogCacheLoader->setCollectionRefreshReturnValue(matchingColl);
     _mockCatalogCacheLoader->setChunkRefreshReturnValue(
-        makeChangedChunks(ChunkVersion(10, 0, kEpoch, kDefaultTimestamp)));
+        makeChangedChunks(ChunkVersion({kEpoch, kDefaultTimestamp}, {10, 0})));
     _mockCatalogClient->setCollections({matchingColl});
 
     auto metadata = makeShardedMetadata(opCtx, collectionUUID);
@@ -684,7 +682,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
     _mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
     _mockCatalogCacheLoader->setCollectionRefreshReturnValue(otherColl);
     _mockCatalogCacheLoader->setChunkRefreshReturnValue(
-        makeChangedChunks(ChunkVersion(1, 0, otherEpoch, otherTimestamp)));
+        makeChangedChunks(ChunkVersion({otherEpoch, otherTimestamp}, {1, 0})));
     _mockCatalogClient->setCollections({otherColl});
 
     // The task should not have been submitted, and the task's entry should have been removed from
diff --git a/src/mongo/db/s/move_primary_coordinator.cpp b/src/mongo/db/s/move_primary_coordinator.cpp
index c7fb0d9e44d..863a4c17b9d 100644
--- a/src/mongo/db/s/move_primary_coordinator.cpp
+++ b/src/mongo/db/s/move_primary_coordinator.cpp
@@ -46,30 +46,10 @@
 
 namespace mongo {
 
-MovePrimaryCoordinator::MovePrimaryCoordinator(ShardingDDLCoordinatorService* service,
-                                               const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState),
-      _doc(MovePrimaryCoordinatorDocument::parse(
-          IDLParserErrorContext("MovePrimaryCoordinatorDocument"), initialState)) {}
-
-boost::optional<BSONObj> MovePrimaryCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-    cmdBob.append("request", BSON(_doc.kToShardIdFieldName << _doc.getToShardId()));
-
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "MovePrimaryCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("active", true);
-    return bob.obj();
-}
+void MovePrimaryCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+    stdx::lock_guard lk{_docMutex};
+    cmdInfoBuilder->append("request", BSON(_doc.kToShardIdFieldName << _doc.getToShardId()));
+};
 
 void MovePrimaryCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
     // If we have two shard collections on the same namespace, then the arguments must be the same.
diff --git a/src/mongo/db/s/move_primary_coordinator.h b/src/mongo/db/s/move_primary_coordinator.h
index 2e501419255..80a1586e0a4 100644
--- a/src/mongo/db/s/move_primary_coordinator.h
+++ b/src/mongo/db/s/move_primary_coordinator.h
@@ -35,30 +35,25 @@
 
 namespace mongo {
 
-class MovePrimaryCoordinator final : public ShardingDDLCoordinator {
+class MovePrimaryCoordinator final
+    : public ShardingDDLCoordinatorImpl<MovePrimaryCoordinatorDocument> {
 public:
-    MovePrimaryCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState);
+    MovePrimaryCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+        : ShardingDDLCoordinatorImpl(service, "MovePrimaryCoordinator", initialState) {}
+
     ~MovePrimaryCoordinator() = default;
 
     void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+    void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
 
     bool canAlwaysStartWhenUserWritesAreDisabled() const override {
         return true;
     }
 
 private:
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        return _doc.getShardingDDLCoordinatorMetadata();
-    }
-
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
-
-    MovePrimaryCoordinatorDocument _doc;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/s/move_primary_source_manager.cpp b/src/mongo/db/s/move_primary_source_manager.cpp
index b4382f21e0f..aaca2f82bbb 100644
--- a/src/mongo/db/s/move_primary_source_manager.cpp
+++ b/src/mongo/db/s/move_primary_source_manager.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/s/move_primary_source_manager.h"
 
 #include "mongo/client/connpool.h"
@@ -50,14 +47,11 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 
 MONGO_FAIL_POINT_DEFINE(hangInCloneStage);
 MONGO_FAIL_POINT_DEFINE(hangInCleanStaleDataStage);
 
-using namespace shardmetadatautil;
-
 MovePrimarySourceManager::MovePrimarySourceManager(OperationContext* opCtx,
                                                    ShardMovePrimary requestArgs,
                                                    StringData dbname,
@@ -192,7 +186,7 @@ Status MovePrimarySourceManager::enterCriticalSection(OperationContext* opCtx) {
     // time inclusive of the move primary config commit update from accessing secondary data.
     // Note: this write must occur after the critSec flag is set, to ensure the secondary refresh
     // will stall behind the flag.
-    Status signalStatus = updateShardDatabasesEntry(
+    Status signalStatus = shardmetadatautil::updateShardDatabasesEntry(
         opCtx,
         BSON(ShardDatabaseType::kNameFieldName << getNss().toString()),
         BSONObj(),
diff --git a/src/mongo/db/s/op_observer_sharding_test.cpp b/src/mongo/db/s/op_observer_sharding_test.cpp
index 1ef41426f26..2c4859ba782 100644
--- a/src/mongo/db/s/op_observer_sharding_test.cpp
+++ b/src/mongo/db/s/op_observer_sharding_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/catalog_raii.h"
 #include "mongo/db/op_observer_util.h"
 #include "mongo/db/s/collection_sharding_runtime.h"
@@ -59,8 +57,10 @@ protected:
         const UUID uuid = UUID::gen();
         const OID epoch = OID::gen();
         auto range = ChunkRange(BSON("key" << MINKEY), BSON("key" << MAXKEY));
-        auto chunk = ChunkType(
-            uuid, std::move(range), ChunkVersion(1, 0, epoch, Timestamp(1, 1)), ShardId("other"));
+        auto chunk = ChunkType(uuid,
+                               std::move(range),
+                               ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
+                               ShardId("other"));
         auto rt = RoutingTableHistory::makeNew(kTestNss,
                                                uuid,
                                                KeyPattern(keyPattern),
diff --git a/src/mongo/db/s/operation_sharding_state_test.cpp b/src/mongo/db/s/operation_sharding_state_test.cpp
index 0c4732b51ab..9c275398f85 100644
--- a/src/mongo/db/s/operation_sharding_state_test.cpp
+++ b/src/mongo/db/s/operation_sharding_state_test.cpp
@@ -47,7 +47,7 @@ TEST_F(OperationShardingStateTest, ScopedSetShardRoleDbVersion) {
 }
 
 TEST_F(OperationShardingStateTest, ScopedSetShardRoleShardVersion) {
-    ChunkVersion shardVersion(1, 0, OID::gen(), Timestamp(1, 0));
+    ChunkVersion shardVersion({OID::gen(), Timestamp(1, 0)}, {1, 0});
     ScopedSetShardRole scopedSetShardRole(operationContext(), kNss, shardVersion, boost::none);
 
     auto& oss = OperationShardingState::get(operationContext());
@@ -58,13 +58,13 @@ TEST_F(OperationShardingStateTest, ScopedSetShardRoleChangeShardVersionSameNames
     auto& oss = OperationShardingState::get(operationContext());
 
     {
-        ChunkVersion shardVersion1(1, 0, OID::gen(), Timestamp(10, 0));
+        ChunkVersion shardVersion1({OID::gen(), Timestamp(10, 0)}, {1, 0});
         ScopedSetShardRole scopedSetShardRole1(
             operationContext(), kNss, shardVersion1, boost::none);
         ASSERT_EQ(shardVersion1, *oss.getShardVersion(kNss));
     }
     {
-        ChunkVersion shardVersion2(1, 0, OID::gen(), Timestamp(20, 0));
+        ChunkVersion shardVersion2({OID::gen(), Timestamp(20, 0)}, {1, 0});
         ScopedSetShardRole scopedSetShardRole2(
             operationContext(), kNss, shardVersion2, boost::none);
         ASSERT_EQ(shardVersion2, *oss.getShardVersion(kNss));
@@ -72,8 +72,8 @@ TEST_F(OperationShardingStateTest, ScopedSetShardRoleChangeShardVersionSameNames
 }
 
 TEST_F(OperationShardingStateTest, ScopedSetShardRoleRecursiveShardVersionDifferentNamespaces) {
-    ChunkVersion shardVersion1(1, 0, OID::gen(), Timestamp(10, 0));
-    ChunkVersion shardVersion2(1, 0, OID::gen(), Timestamp(20, 0));
+    ChunkVersion shardVersion1({OID::gen(), Timestamp(10, 0)}, {1, 0});
+    ChunkVersion shardVersion2({OID::gen(), Timestamp(20, 0)}, {1, 0});
 
     ScopedSetShardRole scopedSetShardRole1(operationContext(), kNss, shardVersion1, boost::none);
     ScopedSetShardRole scopedSetShardRole2(
diff --git a/src/mongo/db/s/range_deletion_util_test.cpp b/src/mongo/db/s/range_deletion_util_test.cpp
index 6efd33ce9d6..567d50748df 100644
--- a/src/mongo/db/s/range_deletion_util_test.cpp
+++ b/src/mongo/db/s/range_deletion_util_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/catalog/create_collection.h"
 #include "mongo/db/db_raii.h"
 #include "mongo/db/dbdirectclient.h"
@@ -113,7 +111,7 @@ public:
             true,
             {ChunkType{uuid,
                        ChunkRange{BSON(kShardKey << MINKEY), BSON(kShardKey << MAXKEY)},
-                       ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+                       ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
                        ShardId("dummyShardId")}});
         ChunkManager cm(ShardId("dummyShardId"),
                         DatabaseVersion(UUID::gen(), Timestamp(1, 1)),
diff --git a/src/mongo/db/s/refine_collection_shard_key_coordinator.cpp b/src/mongo/db/s/refine_collection_shard_key_coordinator.cpp
index a5a01e7f309..b8d981bb4ce 100644
--- a/src/mongo/db/s/refine_collection_shard_key_coordinator.cpp
+++ b/src/mongo/db/s/refine_collection_shard_key_coordinator.cpp
@@ -86,9 +86,8 @@ void notifyChangeStreamsOnRefineCollectionShardKeyComplete(OperationContext* opC
 
 RefineCollectionShardKeyCoordinator::RefineCollectionShardKeyCoordinator(
     ShardingDDLCoordinatorService* service, const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState),
-      _doc(RefineCollectionShardKeyCoordinatorDocument::parse(
-          IDLParserErrorContext("RefineCollectionShardKeyCoordinatorDocument"), initialState)),
+    : RecoverableShardingDDLCoordinator(
+          service, "RefineCollectionShardKeyCoordinator", initialState),
       _request(_doc.getRefineCollectionShardKeyRequest()),
       _newShardKey(_doc.getNewShardKey()) {}
 
@@ -104,47 +103,8 @@ void RefineCollectionShardKeyCoordinator::checkIfOptionsConflict(const BSONObj&
                 _request.toBSON() == otherDoc.getRefineCollectionShardKeyRequest().toBSON()));
 }
 
-boost::optional<BSONObj> RefineCollectionShardKeyCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-    cmdBob.appendElements(_request.toBSON());
-
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "RefineCollectionShardKeyCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("active", true);
-    return bob.obj();
-}
-
-void RefineCollectionShardKeyCoordinator::_enterPhase(Phase newPhase) {
-    StateDoc newDoc(_doc);
-    newDoc.setPhase(newPhase);
-
-    LOGV2_DEBUG(
-        6233200,
-        2,
-        "Refine collection shard key coordinator phase transition",
-        "namespace"_attr = nss(),
-        "newPhase"_attr = RefineCollectionShardKeyCoordinatorPhase_serializer(newDoc.getPhase()),
-        "oldPhase"_attr = RefineCollectionShardKeyCoordinatorPhase_serializer(_doc.getPhase()));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        newDoc = _insertStateDocument(std::move(newDoc));
-    } else {
-        newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(newDoc);
-    }
+void RefineCollectionShardKeyCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+    cmdInfoBuilder->appendElements(_request.toBSON());
 }
 
 ExecutorFuture<void> RefineCollectionShardKeyCoordinator::_runImpl(
diff --git a/src/mongo/db/s/refine_collection_shard_key_coordinator.h b/src/mongo/db/s/refine_collection_shard_key_coordinator.h
index c2e70a0d067..c461383e876 100644
--- a/src/mongo/db/s/refine_collection_shard_key_coordinator.h
+++ b/src/mongo/db/s/refine_collection_shard_key_coordinator.h
@@ -35,7 +35,9 @@
 
 namespace mongo {
 
-class RefineCollectionShardKeyCoordinator : public ShardingDDLCoordinator {
+class RefineCollectionShardKeyCoordinator
+    : public RecoverableShardingDDLCoordinator<RefineCollectionShardKeyCoordinatorDocument,
+                                               RefineCollectionShardKeyCoordinatorPhaseEnum> {
 public:
     using StateDoc = RefineCollectionShardKeyCoordinatorDocument;
     using Phase = RefineCollectionShardKeyCoordinatorPhaseEnum;
@@ -45,40 +47,16 @@ public:
 
     void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+    void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
 
 private:
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        return _doc.getShardingDDLCoordinatorMetadata();
+    StringData serializePhase(const Phase& phase) const override {
+        return RefineCollectionShardKeyCoordinatorPhase_serializer(phase);
     }
 
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
 
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-
-            if (currPhase > newPhase) {
-                // Do not execute this phase if we already reached a subsequent one.
-                return;
-            }
-            if (currPhase < newPhase) {
-                // Persist the new phase if this is the first time we are executing it.
-                _enterPhase(newPhase);
-            }
-            return func();
-        };
-    }
-
-    void _enterPhase(Phase newPhase);
-
-    mutable Mutex _docMutex = MONGO_MAKE_LATCH("RefineCollectionShardKeyCoordinator::_docMutex");
-    RefineCollectionShardKeyCoordinatorDocument _doc;
-
     const mongo::RefineCollectionShardKeyRequest _request;
 
     const KeyPattern _newShardKey;
diff --git a/src/mongo/db/s/rename_collection_coordinator.cpp b/src/mongo/db/s/rename_collection_coordinator.cpp
index 789f8ade994..64680e96cc2 100644
--- a/src/mongo/db/s/rename_collection_coordinator.cpp
+++ b/src/mongo/db/s/rename_collection_coordinator.cpp
@@ -90,9 +90,7 @@ boost::optional<UUID> getCollectionUUID(OperationContext* opCtx,
 
 RenameCollectionCoordinator::RenameCollectionCoordinator(ShardingDDLCoordinatorService* service,
                                                          const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState),
-      _doc(RenameCollectionCoordinatorDocument::parse(
-          IDLParserErrorContext("RenameCollectionCoordinatorDocument"), initialState)),
+    : RecoverableShardingDDLCoordinator(service, "RenameCollectionCoordinator", initialState),
       _request(_doc.getRenameCollectionRequest()) {}
 
 void RenameCollectionCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
@@ -113,54 +111,8 @@ std::vector<StringData> RenameCollectionCoordinator::_acquireAdditionalLocks(
     return {_request.getTo().ns()};
 }
 
-boost::optional<BSONObj> RenameCollectionCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-    cmdBob.appendElements(_request.toBSON());
-
-    const auto currPhase = [&]() {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getPhase();
-    }();
-
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "RenameCollectionCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("currentPhase", currPhase);
-    bob.append("active", true);
-    return bob.obj();
-}
-
-void RenameCollectionCoordinator::_enterPhase(Phase newPhase) {
-    StateDoc newDoc(_doc);
-    newDoc.setPhase(newPhase);
-
-    LOGV2_DEBUG(5460501,
-                2,
-                "Rename collection coordinator phase transition",
-                "fromNs"_attr = nss(),
-                "toNs"_attr = _request.getTo(),
-                "newPhase"_attr = RenameCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
-                "oldPhase"_attr = RenameCollectionCoordinatorPhase_serializer(_doc.getPhase()));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        newDoc = _insertStateDocument(std::move(newDoc));
-    } else {
-        newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(newDoc);
-    }
+void RenameCollectionCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+    cmdInfoBuilder->appendElements(_request.toBSON());
 }
 
 ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
@@ -275,15 +227,15 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
                 getForwardableOpMetadata().setOn(opCtx);
 
                 if (!_firstExecution) {
-                    _doc = _updateSession(opCtx, _doc);
+                    _updateSession(opCtx);
                     _performNoopRetryableWriteOnAllShardsAndConfigsvr(
-                        opCtx, getCurrentSession(_doc), **executor);
+                        opCtx, getCurrentSession(), **executor);
                 }
 
                 const auto& fromNss = nss();
 
-                _doc = _updateSession(opCtx, _doc);
-                const OperationSessionInfo osi = getCurrentSession(_doc);
+                _updateSession(opCtx);
+                const OperationSessionInfo osi = getCurrentSession();
 
                 // On participant shards:
                 // - Block CRUD on source and target collection in case at least one
@@ -303,20 +255,8 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
                 const auto cmdObj = CommandHelpers::appendMajorityWriteConcern(
                     renameCollParticipantRequest.toBSON({}));
 
-                try {
-                    sharding_ddl_util::sendAuthenticatedCommandToShards(
-                        opCtx,
-                        fromNss.db(),
-                        cmdObj.addFields(osi.toBSON()),
-                        participants,
-                        **executor);
-
-                } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
-                    // Older 5.0 binaries don't support running the command as a
-                    // retryable write yet. In that case, retry without attaching session info.
-                    sharding_ddl_util::sendAuthenticatedCommandToShards(
-                        opCtx, fromNss.db(), cmdObj, participants, **executor);
-                }
+                sharding_ddl_util::sendAuthenticatedCommandToShards(
+                    opCtx, fromNss.db(), cmdObj.addFields(osi.toBSON()), participants, **executor);
             }))
         .then(_executePhase(
             Phase::kRenameMetadata,
@@ -325,10 +265,13 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
                 auto* opCtx = opCtxHolder.get();
                 getForwardableOpMetadata().setOn(opCtx);
 
+                // For an unsharded collection the CSRS server can not verify the targetUUID.
+                // Use the session ID + txnNumber to ensure no stale requests get through.
+                _updateSession(opCtx);
+
                 if (!_firstExecution) {
-                    _doc = _updateSession(opCtx, _doc);
                     _performNoopRetryableWriteOnAllShardsAndConfigsvr(
-                        opCtx, getCurrentSession(_doc), **executor);
+                        opCtx, getCurrentSession(), **executor);
                 }
 
                 ConfigsvrRenameCollectionMetadata req(nss(), _request.getTo());
@@ -336,28 +279,12 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
                 const auto cmdObj = CommandHelpers::appendMajorityWriteConcern(req.toBSON({}));
                 const auto& configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
 
-                // For an unsharded collection the CSRS server can not verify the targetUUID.
-                // Use the session ID + txnNumber to ensure no stale requests get through.
-                _doc = _updateSession(opCtx, _doc);
-                const OperationSessionInfo osi = getCurrentSession(_doc);
-
-                try {
-                    uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(
-                        configShard->runCommand(opCtx,
-                                                ReadPreferenceSetting(ReadPreference::PrimaryOnly),
-                                                "admin",
-                                                cmdObj.addFields(osi.toBSON()),
-                                                Shard::RetryPolicy::kIdempotent)));
-                } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
-                    // Older 5.0 binaries don't support running the command as a
-                    // retryable write yet. In that case, retry without attaching session info.
-                    uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(
-                        configShard->runCommand(opCtx,
-                                                ReadPreferenceSetting(ReadPreference::PrimaryOnly),
-                                                "admin",
-                                                cmdObj,
-                                                Shard::RetryPolicy::kIdempotent)));
-                }
+                uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(
+                    configShard->runCommand(opCtx,
+                                            ReadPreferenceSetting(ReadPreference::PrimaryOnly),
+                                            "admin",
+                                            cmdObj.addFields(getCurrentSession().toBSON()),
+                                            Shard::RetryPolicy::kIdempotent)));
             }))
         .then(_executePhase(
             Phase::kUnblockCRUD,
@@ -367,9 +294,9 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
                 getForwardableOpMetadata().setOn(opCtx);
 
                 if (!_firstExecution) {
-                    _doc = _updateSession(opCtx, _doc);
+                    _updateSession(opCtx);
                     _performNoopRetryableWriteOnAllShardsAndConfigsvr(
-                        opCtx, getCurrentSession(_doc), **executor);
+                        opCtx, getCurrentSession(), **executor);
                 }
 
                 const auto& fromNss = nss();
@@ -383,22 +310,11 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
                     unblockParticipantRequest.toBSON({}));
                 auto participants = Grid::get(opCtx)->shardRegistry()->getAllShardIds(opCtx);
 
-                _doc = _updateSession(opCtx, _doc);
-                const OperationSessionInfo osi = getCurrentSession(_doc);
+                _updateSession(opCtx);
+                const OperationSessionInfo osi = getCurrentSession();
 
-                try {
-                    sharding_ddl_util::sendAuthenticatedCommandToShards(
-                        opCtx,
-                        fromNss.db(),
-                        cmdObj.addFields(osi.toBSON()),
-                        participants,
-                        **executor);
-                } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
-                    // Older 5.0 binaries don't support running the command as a
-                    // retryable write yet. In that case, retry without attaching session info.
-                    sharding_ddl_util::sendAuthenticatedCommandToShards(
-                        opCtx, fromNss.db(), cmdObj, participants, **executor);
-                }
+                sharding_ddl_util::sendAuthenticatedCommandToShards(
+                    opCtx, fromNss.db(), cmdObj.addFields(osi.toBSON()), participants, **executor);
             }))
         .then(_executePhase(Phase::kSetResponse,
                             [this, anchor = shared_from_this()] {
diff --git a/src/mongo/db/s/rename_collection_coordinator.h b/src/mongo/db/s/rename_collection_coordinator.h
index af395745001..32621bb6ea4 100644
--- a/src/mongo/db/s/rename_collection_coordinator.h
+++ b/src/mongo/db/s/rename_collection_coordinator.h
@@ -35,7 +35,9 @@
 
 namespace mongo {
 
-class RenameCollectionCoordinator final : public ShardingDDLCoordinator {
+class RenameCollectionCoordinator final
+    : public RecoverableShardingDDLCoordinator<RenameCollectionCoordinatorDocument,
+                                               RenameCollectionCoordinatorPhaseEnum> {
 public:
     using StateDoc = RenameCollectionCoordinatorDocument;
     using Phase = RenameCollectionCoordinatorPhaseEnum;
@@ -45,9 +47,7 @@ public:
 
     void checkIfOptionsConflict(const BSONObj& doc) const override;
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+    void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
 
     /**
      * Waits for the rename to complete and returns the collection version.
@@ -59,41 +59,19 @@ public:
     }
 
 private:
+    StringData serializePhase(const Phase& phase) const override {
+        return RenameCollectionCoordinatorPhase_serializer(phase);
+    }
+
     bool _mustAlwaysMakeProgress() override {
         return _doc.getPhase() >= Phase::kFreezeMigrations;
     };
 
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        return _doc.getShardingDDLCoordinatorMetadata();
-    }
-
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
 
     std::vector<StringData> _acquireAdditionalLocks(OperationContext* opCtx) override;
 
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-
-            if (currPhase > newPhase) {
-                // Do not execute this phase if we already reached a subsequent one.
-                return;
-            }
-            if (currPhase < newPhase) {
-                // Persist the new phase if this is the first time we are executing it.
-                _enterPhase(newPhase);
-            }
-            return func();
-        };
-    }
-
-    void _enterPhase(Phase newPhase);
-
-    mutable Mutex _docMutex = MONGO_MAKE_LATCH("RenameCollectionCoordinator::_docMutex");
-    RenameCollectionCoordinatorDocument _doc;
-
     boost::optional<RenameCollectionResponse> _response;
     const RenameCollectionRequest _request;
 };
diff --git a/src/mongo/db/s/rename_collection_participant_service.cpp b/src/mongo/db/s/rename_collection_participant_service.cpp
index bf48f41b581..64419c6c5e4 100644
--- a/src/mongo/db/s/rename_collection_participant_service.cpp
+++ b/src/mongo/db/s/rename_collection_participant_service.cpp
@@ -27,8 +27,7 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
+#include "mongo/db/s/rename_collection_participant_service.h"
 
 #include "mongo/base/checked_cast.h"
 #include "mongo/db/catalog/collection_catalog.h"
@@ -40,8 +39,6 @@
 #include "mongo/db/s/operation_sharding_state.h"
 #include "mongo/db/s/range_deletion_util.h"
 #include "mongo/db/s/recoverable_critical_section_service.h"
-#include "mongo/db/s/rename_collection_participant_service.h"
-#include "mongo/db/s/shard_metadata_util.h"
 #include "mongo/logv2/log.h"
 #include "mongo/s/catalog/sharding_catalog_client.h"
 #include "mongo/s/grid.h"
@@ -49,9 +46,7 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
-
 namespace {
 
 const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max());
@@ -76,7 +71,6 @@ void dropCollectionLocally(OperationContext* opCtx, const NamespaceString& nss)
                 "collectionExisted"_attr = knownNss);
 }
 
-/* Clear the CollectionShardingRuntime entry for the specified namespace */
 void clearFilteringMetadata(OperationContext* opCtx, const NamespaceString& nss) {
     UninterruptibleLockGuard noInterrupt(opCtx->lockState());
     Lock::DBLock dbLock(opCtx, nss.db(), MODE_IX);
@@ -135,6 +129,7 @@ void renameOrDropTarget(OperationContext* opCtx,
         deleteRangeDeletionTasksForRename(opCtx, fromNss, toNss);
     }
 }
+
 }  // namespace
 
 RenameCollectionParticipantService* RenameCollectionParticipantService::getService(
diff --git a/src/mongo/db/s/reshard_collection_coordinator.cpp b/src/mongo/db/s/reshard_collection_coordinator.cpp
index 30ff299e538..c0c9648e0a4 100644
--- a/src/mongo/db/s/reshard_collection_coordinator.cpp
+++ b/src/mongo/db/s/reshard_collection_coordinator.cpp
@@ -107,10 +107,7 @@ ReshardCollectionCoordinator::ReshardCollectionCoordinator(ShardingDDLCoordinato
 ReshardCollectionCoordinator::ReshardCollectionCoordinator(ShardingDDLCoordinatorService* service,
                                                            const BSONObj& initialState,
                                                            bool persistCoordinatorDocument)
-    : ShardingDDLCoordinator(service, initialState),
-      _initialState(initialState.getOwned()),
-      _doc(ReshardCollectionCoordinatorDocument::parse(
-          IDLParserErrorContext("ReshardCollectionCoordinatorDocument"), _initialState)),
+    : RecoverableShardingDDLCoordinator(service, "ReshardCollectionCoordinator", initialState),
       _request(_doc.getReshardCollectionRequest()),
       _persistCoordinatorDocument(persistCoordinatorDocument) {}
 
@@ -125,50 +122,15 @@ void ReshardCollectionCoordinator::checkIfOptionsConflict(const BSONObj& doc) co
                 _request.toBSON() == otherDoc.getReshardCollectionRequest().toBSON()));
 }
 
-boost::optional<BSONObj> ReshardCollectionCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-    cmdBob.appendElements(_request.toBSON());
-
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "ReshardCollectionCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("active", true);
-    return bob.obj();
+void ReshardCollectionCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+    cmdInfoBuilder->appendElements(_request.toBSON());
 }
 
 void ReshardCollectionCoordinator::_enterPhase(Phase newPhase) {
     if (!_persistCoordinatorDocument) {
         return;
     }
-
-    StateDoc newDoc(_doc);
-    newDoc.setPhase(newPhase);
-
-    LOGV2_DEBUG(6206400,
-                2,
-                "Reshard collection coordinator phase transition",
-                "namespace"_attr = nss(),
-                "newPhase"_attr = ReshardCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
-                "oldPhase"_attr = ReshardCollectionCoordinatorPhase_serializer(_doc.getPhase()));
-
-    if (_doc.getPhase() == Phase::kUnset) {
-        newDoc = _insertStateDocument(std::move(newDoc));
-    } else {
-        newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
-    }
-
-    {
-        stdx::unique_lock ul{_docMutex};
-        _doc = std::move(newDoc);
-    }
+    RecoverableShardingDDLCoordinator::_enterPhase(newPhase);
 }
 
 ExecutorFuture<void> ReshardCollectionCoordinator::_runImpl(
@@ -196,7 +158,7 @@ ExecutorFuture<void> ReshardCollectionCoordinator::_runImpl(
                     StateDoc newDoc(_doc);
                     newDoc.setOldShardKey(cmOld.getShardKeyPattern().getKeyPattern().toBSON());
                     newDoc.setOldCollectionUUID(cmOld.getUUID());
-                    _doc = _updateStateDocument(opCtx, std::move(newDoc));
+                    _updateStateDocument(opCtx, std::move(newDoc));
                 } else {
                     _doc.setOldShardKey(cmOld.getShardKeyPattern().getKeyPattern().toBSON());
                     _doc.setOldCollectionUUID(cmOld.getUUID());
diff --git a/src/mongo/db/s/reshard_collection_coordinator.h b/src/mongo/db/s/reshard_collection_coordinator.h
index 54d98ee03d1..085c183dc55 100644
--- a/src/mongo/db/s/reshard_collection_coordinator.h
+++ b/src/mongo/db/s/reshard_collection_coordinator.h
@@ -34,7 +34,9 @@
 #include "mongo/util/future.h"
 
 namespace mongo {
-class ReshardCollectionCoordinator : public ShardingDDLCoordinator {
+class ReshardCollectionCoordinator
+    : public RecoverableShardingDDLCoordinator<ReshardCollectionCoordinatorDocument,
+                                               ReshardCollectionCoordinatorPhaseEnum> {
 public:
     using StateDoc = ReshardCollectionCoordinatorDocument;
     using Phase = ReshardCollectionCoordinatorPhaseEnum;
@@ -44,9 +46,7 @@ public:
 
     void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+    void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
 
 protected:
     ReshardCollectionCoordinator(ShardingDDLCoordinatorService* service,
@@ -54,37 +54,15 @@ protected:
                                  bool persistCoordinatorDocument);
 
 private:
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        stdx::lock_guard l{_docMutex};
-        return _doc.getShardingDDLCoordinatorMetadata();
+    StringData serializePhase(const Phase& phase) const override {
+        return ReshardCollectionCoordinatorPhase_serializer(phase);
     }
 
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
 
-    template <typename Func>
-    auto _executePhase(const Phase& newPhase, Func&& func) {
-        return [=] {
-            const auto& currPhase = _doc.getPhase();
-
-            if (currPhase > newPhase) {
-                // Do not execute this phase if we already reached a subsequent one.
-                return;
-            }
-            if (currPhase < newPhase) {
-                // Persist the new phase if this is the first time we are executing it.
-                _enterPhase(newPhase);
-            }
-            return func();
-        };
-    }
-
     void _enterPhase(Phase newPhase);
 
-    const BSONObj _initialState;
-    mutable Mutex _docMutex = MONGO_MAKE_LATCH("ReshardCollectionCoordinator::_docMutex");
-    ReshardCollectionCoordinatorDocument _doc;
-
     const mongo::ReshardCollectionRequest _request;
 
     const bool _persistCoordinatorDocument;  // TODO: SERVER-62338 remove this then 6.0 branches out
diff --git a/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp b/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp
index aaeb6180654..dc3176cf3e8 100644
--- a/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp
+++ b/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp
@@ -117,7 +117,8 @@ DocumentSource::GetModPathsReturn DocumentSourceReshardingOwnershipMatch::getMod
 DocumentSource::GetNextResult DocumentSourceReshardingOwnershipMatch::doGetNext() {
     if (!_tempReshardingChunkMgr) {
         // TODO: Actually propagate the temporary resharding namespace from the recipient.
-        auto tempReshardingNss = constructTemporaryReshardingNss(pExpCtx->ns.db(), *pExpCtx->uuid);
+        auto tempReshardingNss =
+            resharding::constructTemporaryReshardingNss(pExpCtx->ns.db(), *pExpCtx->uuid);
 
         auto* catalogCache = Grid::get(pExpCtx->opCtx)->catalogCache();
         _tempReshardingChunkMgr =
diff --git a/src/mongo/db/s/resharding/resharding_agg_test.cpp b/src/mongo/db/s/resharding/resharding_agg_test.cpp
index ce8d110e5ab..c49467f79f9 100644
--- a/src/mongo/db/s/resharding/resharding_agg_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_agg_test.cpp
@@ -362,7 +362,7 @@ protected:
         expCtx->ns = kRemoteOplogNss;
         expCtx->mongoProcessInterface = std::make_shared<MockMongoInterface>(pipelineSource);
 
-        auto pipeline = createOplogFetchingPipelineForResharding(
+        auto pipeline = resharding::createOplogFetchingPipelineForResharding(
             expCtx,
             ReshardingDonorOplogId(Timestamp::min(), Timestamp::min()),
             _reshardingCollUUID,
@@ -524,13 +524,14 @@ TEST_F(ReshardingAggTest, VerifyPipelineOutputHasOplogSchema) {
     expCtx->ns = kRemoteOplogNss;
     expCtx->mongoProcessInterface = std::make_shared<MockMongoInterface>(pipelineSource);
 
-    std::unique_ptr<Pipeline, PipelineDeleter> pipeline = createOplogFetchingPipelineForResharding(
-        expCtx,
-        // Use the test to also exercise the stages for resuming. The timestamp passed in is
-        // excluded from the results.
-        ReshardingDonorOplogId(insertOplog.getTimestamp(), insertOplog.getTimestamp()),
-        _reshardingCollUUID,
-        {_destinedRecipient});
+    std::unique_ptr<Pipeline, PipelineDeleter> pipeline =
+        resharding::createOplogFetchingPipelineForResharding(
+            expCtx,
+            // Use the test to also exercise the stages for resuming. The timestamp passed in is
+            // excluded from the results.
+            ReshardingDonorOplogId(insertOplog.getTimestamp(), insertOplog.getTimestamp()),
+            _reshardingCollUUID,
+            {_destinedRecipient});
     auto bsonPipeline = pipeline->serializeToBson();
     if (debug) {
         std::cout << "Pipeline stages:" << std::endl;
@@ -624,11 +625,12 @@ TEST_F(ReshardingAggTest, VerifyPipelinePreparedTxn) {
     expCtx->ns = kRemoteOplogNss;
     expCtx->mongoProcessInterface = std::make_shared<MockMongoInterface>(pipelineSource);
 
-    std::unique_ptr<Pipeline, PipelineDeleter> pipeline = createOplogFetchingPipelineForResharding(
-        expCtx,
-        ReshardingDonorOplogId(Timestamp::min(), Timestamp::min()),
-        _reshardingCollUUID,
-        {_destinedRecipient});
+    std::unique_ptr<Pipeline, PipelineDeleter> pipeline =
+        resharding::createOplogFetchingPipelineForResharding(
+            expCtx,
+            ReshardingDonorOplogId(Timestamp::min(), Timestamp::min()),
+            _reshardingCollUUID,
+            {_destinedRecipient});
     if (debug) {
         std::cout << "Pipeline stages:" << std::endl;
         // This is can be changed to process a prefix of the pipeline for debugging.
@@ -1476,7 +1478,7 @@ TEST_F(ReshardingAggWithStorageTest, RetryableFindAndModifyWithImageLookup) {
         expCtx->mongoProcessInterface = std::move(mockMongoInterface);
     }
 
-    auto pipeline = createOplogFetchingPipelineForResharding(
+    auto pipeline = resharding::createOplogFetchingPipelineForResharding(
         expCtx, ReshardingDonorOplogId(Timestamp::min(), Timestamp::min()), kCrudUUID, kMyShardId);
 
     pipeline->addInitialSource(DocumentSourceMock::createForTest(pipelineSource, expCtx));
@@ -1578,8 +1580,8 @@ TEST_F(ReshardingAggWithStorageTest,
             expCtx->mongoProcessInterface = std::move(mockMongoInterface);
         }
 
-        auto pipeline =
-            createOplogFetchingPipelineForResharding(expCtx, startAt, kCrudUUID, kMyShardId);
+        auto pipeline = resharding::createOplogFetchingPipelineForResharding(
+            expCtx, startAt, kCrudUUID, kMyShardId);
         pipeline->addInitialSource(DocumentSourceMock::createForTest(pipelineSource, expCtx));
         return pipeline;
     };
diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
index a80bfbb88ec..8bd04ebfe37 100644
--- a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
+++ b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
@@ -50,7 +50,7 @@
 #include "mongo/db/s/resharding/document_source_resharding_ownership_match.h"
 #include "mongo/db/s/resharding/resharding_data_copy_util.h"
 #include "mongo/db/s/resharding/resharding_future_util.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_server_parameters_gen.h"
 #include "mongo/db/s/resharding/resharding_util.h"
 #include "mongo/db/service_context.h"
@@ -80,7 +80,7 @@ bool collectionHasSimpleCollation(OperationContext* opCtx, const NamespaceString
 
 }  // namespace
 
-ReshardingCollectionCloner::ReshardingCollectionCloner(ReshardingMetricsNew* metrics,
+ReshardingCollectionCloner::ReshardingCollectionCloner(ReshardingMetrics* metrics,
                                                        ShardKeyPattern newShardKeyPattern,
                                                        NamespaceString sourceNss,
                                                        const UUID& sourceUUID,
@@ -109,7 +109,7 @@ std::unique_ptr<Pipeline, PipelineDeleter> ReshardingCollectionCloner::makePipel
     resolvedNamespaces[_sourceNss.coll()] = {_sourceNss, std::vector<BSONObj>{}};
 
     // Assume that the config.cache.chunks collection isn't a view either.
-    auto tempNss = constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
+    auto tempNss = resharding::constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
     auto tempCacheChunksNss =
         NamespaceString(NamespaceString::kConfigDb, "cache.chunks." + tempNss.ns());
     resolvedNamespaces[tempCacheChunksNss.coll()] = {tempCacheChunksNss, std::vector<BSONObj>{}};
diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.h b/src/mongo/db/s/resharding/resharding_collection_cloner.h
index e24b03c76b6..97e28a4fce5 100644
--- a/src/mongo/db/s/resharding/resharding_collection_cloner.h
+++ b/src/mongo/db/s/resharding/resharding_collection_cloner.h
@@ -52,7 +52,7 @@ class TaskExecutor;
 
 class OperationContext;
 class MongoProcessInterface;
-class ReshardingMetricsNew;
+class ReshardingMetrics;
 class ServiceContext;
 
 /**
@@ -61,7 +61,7 @@ class ServiceContext;
  */
 class ReshardingCollectionCloner {
 public:
-    ReshardingCollectionCloner(ReshardingMetricsNew* metrics,
+    ReshardingCollectionCloner(ReshardingMetrics* metrics,
                                ShardKeyPattern newShardKeyPattern,
                                NamespaceString sourceNss,
                                const UUID& sourceUUID,
@@ -99,7 +99,7 @@ private:
 
     std::unique_ptr<Pipeline, PipelineDeleter> _restartPipeline(OperationContext* opCtx);
 
-    ReshardingMetricsNew* _metrics;
+    ReshardingMetrics* _metrics;
     const ShardKeyPattern _newShardKeyPattern;
     const NamespaceString _sourceNss;
     const UUID _sourceUUID;
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
index ae6b61fb314..61eb1a620c4 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
@@ -88,13 +88,13 @@ boost::optional<Milliseconds> extractOperationRemainingTime(const BSONObj& obj)
 }  // namespace
 
 CoordinatorCommitMonitor::CoordinatorCommitMonitor(
-    std::shared_ptr<ReshardingMetricsNew> metricsNew,
+    std::shared_ptr<ReshardingMetrics> metrics,
     NamespaceString ns,
     std::vector<ShardId> recipientShards,
     CoordinatorCommitMonitor::TaskExecutorPtr executor,
     CancellationToken cancelToken,
     Milliseconds maxDelayBetweenQueries)
-    : _metricsNew{std::move(metricsNew)},
+    : _metrics{std::move(metrics)},
       _ns(std::move(ns)),
       _recipientShards(std::move(recipientShards)),
       _executor(std::move(executor)),
@@ -209,8 +209,8 @@ ExecutorFuture<void> CoordinatorCommitMonitor::_makeFuture() const {
             return RemainingOperationTimes{Milliseconds(0), Milliseconds::max()};
         })
         .then([this, anchor = shared_from_this()](RemainingOperationTimes remainingTimes) {
-            _metricsNew->setCoordinatorHighEstimateRemainingTimeMillis(remainingTimes.max);
-            _metricsNew->setCoordinatorLowEstimateRemainingTimeMillis(remainingTimes.min);
+            _metrics->setCoordinatorHighEstimateRemainingTimeMillis(remainingTimes.max);
+            _metrics->setCoordinatorLowEstimateRemainingTimeMillis(remainingTimes.min);
 
             // Check if all recipient shards are within the commit threshold.
             if (remainingTimes.max <= _threshold)
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h
index fb9f55d614f..aa3ff0727e9 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h
+++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h
@@ -33,7 +33,7 @@
 #include <vector>
 
 #include "mongo/db/namespace_string.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/executor/task_executor.h"
 #include "mongo/s/shard_id.h"
 #include "mongo/util/cancellation.h"
@@ -69,7 +69,7 @@ public:
         Milliseconds max;
     };
 
-    CoordinatorCommitMonitor(std::shared_ptr<ReshardingMetricsNew> metricsNew,
+    CoordinatorCommitMonitor(std::shared_ptr<ReshardingMetrics> metrics,
                              NamespaceString ns,
                              std::vector<ShardId> recipientShards,
                              TaskExecutorPtr executor,
@@ -95,7 +95,7 @@ private:
     static constexpr auto kDiagnosticLogLevel = 0;
     static constexpr auto kMaxDelayBetweenQueries = Seconds(30);
 
-    std::shared_ptr<ReshardingMetricsNew> _metricsNew;
+    std::shared_ptr<ReshardingMetrics> _metrics;
     const NamespaceString _ns;
     const std::vector<ShardId> _recipientShards;
     const TaskExecutorPtr _executor;
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
index 1cc717b7aec..d8740053c80 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
@@ -40,7 +40,7 @@
 #include "mongo/db/namespace_string.h"
 #include "mongo/db/s/config/config_server_test_fixture.h"
 #include "mongo/db/s/resharding/resharding_coordinator_commit_monitor.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_server_parameters_gen.h"
 #include "mongo/executor/thread_pool_task_executor_test_fixture.h"
 #include "mongo/logv2/log.h"
@@ -109,7 +109,7 @@ private:
     boost::optional<Callback> _runOnMockingNextResponse;
 
     ShardingDataTransformCumulativeMetrics _cumulativeMetrics{"dummyForTest"};
-    std::shared_ptr<ReshardingMetricsNew> _metrics;
+    std::shared_ptr<ReshardingMetrics> _metrics;
 };
 
 auto makeExecutor() {
@@ -151,7 +151,7 @@ void CoordinatorCommitMonitorTest::setUp() {
     _cancellationSource = std::make_unique<CancellationSource>();
 
     auto clockSource = getServiceContext()->getFastClockSource();
-    _metrics = std::make_shared<ReshardingMetricsNew>(
+    _metrics = std::make_shared<ReshardingMetrics>(
         UUID::gen(),
         BSON("y" << 1),
         _ns,
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_observer.cpp b/src/mongo/db/s/resharding/resharding_coordinator_observer.cpp
index 5f78cac592c..da457d8eab3 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_observer.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_observer.cpp
@@ -112,7 +112,7 @@ bool stateTransistionsComplete(WithLock lk,
 template <class TParticipant>
 Status getStatusFromAbortReasonWithShardInfo(const TParticipant& participant,
                                              StringData participantType) {
-    return getStatusFromAbortReason(participant.getMutableState())
+    return resharding::getStatusFromAbortReason(participant.getMutableState())
         .withContext("{} shard {} reached an unrecoverable error"_format(
             participantType, participant.getId().toString()));
 }
@@ -128,7 +128,7 @@ boost::optional<Status> getAbortReasonIfExists(
     if (updatedStateDoc.getAbortReason()) {
         // Note: the absence of context specifying which shard the abortReason originates from
         // implies the abortReason originates from the coordinator.
-        return getStatusFromAbortReason(updatedStateDoc);
+        return resharding::getStatusFromAbortReason(updatedStateDoc);
     }
 
     for (const auto& donorShard : updatedStateDoc.getDonorShards()) {
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
index 0f3803ab04e..bd893bd6dee 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
@@ -53,7 +53,7 @@ protected:
         auto coordinatorDoc = ReshardingCoordinatorDocument();
         coordinatorDoc.setRecipientShards(std::move(recipients));
         coordinatorDoc.setDonorShards(std::move(donors));
-        emplaceTruncatedAbortReasonIfExists(coordinatorDoc, abortReason);
+        resharding::emplaceTruncatedAbortReasonIfExists(coordinatorDoc, abortReason);
         return coordinatorDoc;
     }
 
@@ -62,9 +62,9 @@ protected:
         boost::optional<Timestamp> timestamp = boost::none,
         boost::optional<Status> abortReason = boost::none) {
         // The mock state here is simulating only one donor shard having errored locally.
-        return {makeDonorShard(ShardId{"s1"}, donorState, timestamp),
-                makeDonorShard(ShardId{"s2"}, donorState, timestamp, abortReason),
-                makeDonorShard(ShardId{"s3"}, donorState, timestamp)};
+        return {resharding::makeDonorShard(ShardId{"s1"}, donorState, timestamp),
+                resharding::makeDonorShard(ShardId{"s2"}, donorState, timestamp, abortReason),
+                resharding::makeDonorShard(ShardId{"s3"}, donorState, timestamp)};
     }
 
     std::vector<RecipientShardEntry> makeMockRecipientsInState(
@@ -72,9 +72,9 @@ protected:
         boost::optional<Timestamp> timestamp = boost::none,
         boost::optional<Status> abortReason = boost::none) {
         // The mock state here is simulating only one donor shard having errored locally.
-        return {makeRecipientShard(ShardId{"s1"}, recipientState),
-                makeRecipientShard(ShardId{"s2"}, recipientState, abortReason),
-                makeRecipientShard(ShardId{"s3"}, recipientState)};
+        return {resharding::makeRecipientShard(ShardId{"s1"}, recipientState),
+                resharding::makeRecipientShard(ShardId{"s2"}, recipientState, abortReason),
+                resharding::makeRecipientShard(ShardId{"s3"}, recipientState)};
     }
 };
 
@@ -85,15 +85,15 @@ TEST_F(ReshardingCoordinatorObserverTest, onReshardingParticipantTransitionSucce
 
     auto donorShards = makeMockDonorsInState(DonorStateEnum::kDonatingInitialData, Timestamp(1, 1));
     std::vector<RecipientShardEntry> recipientShards0{
-        makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning),
-        makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)};
+        resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning),
+        resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)};
     auto coordinatorDoc0 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards0, donorShards);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
     ASSERT_FALSE(fut.isReady());
 
     std::vector<RecipientShardEntry> recipientShards1{
-        makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying),
-        makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)};
+        resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying),
+        resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)};
     auto coordinatorDoc1 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards1, donorShards);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
     ASSERT_TRUE(fut.isReady());
@@ -110,25 +110,25 @@ TEST_F(ReshardingCoordinatorObserverTest, onReshardingParticipantTransitionTwoOu
     auto donorShards = makeMockDonorsInState(DonorStateEnum::kDonatingInitialData, Timestamp(1, 1));
 
     std::vector<RecipientShardEntry> recipientShards0{
-        {makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
-        {makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
-        {makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
+        {resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
+        {resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
+        {resharding::makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
     auto coordinatorDoc0 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards0, donorShards);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
     ASSERT_FALSE(fut.isReady());
 
     std::vector<RecipientShardEntry> recipientShards1{
-        {makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
-        {makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
-        {makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kCloning)}};
+        {resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
+        {resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
+        {resharding::makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kCloning)}};
     auto coordinatorDoc1 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards1, donorShards);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
     ASSERT_FALSE(fut.isReady());
 
     std::vector<RecipientShardEntry> recipientShards2{
-        {makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying)},
-        {makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
-        {makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
+        {resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying)},
+        {resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
+        {resharding::makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
     auto coordinatorDoc2 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards2, donorShards);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc2);
     ASSERT_TRUE(fut.isReady());
@@ -145,11 +145,11 @@ TEST_F(ReshardingCoordinatorObserverTest, participantReportsError) {
     auto donorShards = makeMockDonorsInState(DonorStateEnum::kDonatingInitialData, Timestamp(1, 1));
 
     std::vector<RecipientShardEntry> recipientShards{
-        {makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
-        {makeRecipientShard(ShardId{"s2"},
-                            RecipientStateEnum::kError,
-                            Status{ErrorCodes::InternalError, "We gotta abort"})},
-        {makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
+        {resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
+        {resharding::makeRecipientShard(ShardId{"s2"},
+                                        RecipientStateEnum::kError,
+                                        Status{ErrorCodes::InternalError, "We gotta abort"})},
+        {resharding::makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
     auto coordinatorDoc = makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc);
     auto resp = fut.getNoThrow();
@@ -173,9 +173,11 @@ TEST_F(ReshardingCoordinatorObserverTest, participantsDoneAborting) {
     // donor who hasn't seen there was an error yet.
     auto recipientShards = makeMockRecipientsInState(RecipientStateEnum::kDone, Timestamp(1, 1));
     std::vector<DonorShardEntry> donorShards0{
-        {makeDonorShard(ShardId{"s1"}, DonorStateEnum::kDone, Timestamp(1, 1), abortReason)},
-        {makeDonorShard(ShardId{"s2"}, DonorStateEnum::kDonatingOplogEntries, Timestamp(1, 1))},
-        {makeDonorShard(ShardId{"s3"}, DonorStateEnum::kDone, Timestamp(1, 1))}};
+        {resharding::makeDonorShard(
+            ShardId{"s1"}, DonorStateEnum::kDone, Timestamp(1, 1), abortReason)},
+        {resharding::makeDonorShard(
+            ShardId{"s2"}, DonorStateEnum::kDonatingOplogEntries, Timestamp(1, 1))},
+        {resharding::makeDonorShard(ShardId{"s3"}, DonorStateEnum::kDone, Timestamp(1, 1))}};
     auto coordinatorDoc0 =
         makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards0, abortReason);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
@@ -183,9 +185,10 @@ TEST_F(ReshardingCoordinatorObserverTest, participantsDoneAborting) {
 
     // All participants are done.
     std::vector<DonorShardEntry> donorShards1{
-        {makeDonorShard(ShardId{"s1"}, DonorStateEnum::kDone, Timestamp(1, 1), abortReason)},
-        {makeDonorShard(ShardId{"s2"}, DonorStateEnum::kDone, Timestamp(1, 1))},
-        {makeDonorShard(ShardId{"s3"}, DonorStateEnum::kDone, Timestamp(1, 1))}};
+        {resharding::makeDonorShard(
+            ShardId{"s1"}, DonorStateEnum::kDone, Timestamp(1, 1), abortReason)},
+        {resharding::makeDonorShard(ShardId{"s2"}, DonorStateEnum::kDone, Timestamp(1, 1))},
+        {resharding::makeDonorShard(ShardId{"s3"}, DonorStateEnum::kDone, Timestamp(1, 1))}};
     auto coordinatorDoc1 =
         makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards1, abortReason);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
@@ -206,15 +209,15 @@ TEST_F(ReshardingCoordinatorObserverTest, onReshardingRecipientsOutOfSync) {
 
     auto donorShards = makeMockDonorsInState(DonorStateEnum::kDonatingInitialData, Timestamp(1, 1));
     std::vector<RecipientShardEntry> recipientShards0{
-        makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kUnused),
-        makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kStrictConsistency)};
+        resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kUnused),
+        resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kStrictConsistency)};
     auto coordinatorDoc0 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards0, donorShards);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
     ASSERT_FALSE(fut.isReady());
 
     std::vector<RecipientShardEntry> recipientShards1{
-        makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying),
-        makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kStrictConsistency)};
+        resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying),
+        resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kStrictConsistency)};
     auto coordinatorDoc1 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards1, donorShards);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
     ASSERT_TRUE(fut.isReady());
@@ -231,15 +234,18 @@ TEST_F(ReshardingCoordinatorObserverTest, onDonorsReportedMinFetchTimestamp) {
     auto recipientShards = makeMockRecipientsInState(RecipientStateEnum::kUnused);
 
     std::vector<DonorShardEntry> donorShards0{
-        {makeDonorShard(ShardId{"s1"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))},
-        {makeDonorShard(ShardId{"s2"}, DonorStateEnum::kPreparingToDonate)}};
+        {resharding::makeDonorShard(
+            ShardId{"s1"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))},
+        {resharding::makeDonorShard(ShardId{"s2"}, DonorStateEnum::kPreparingToDonate)}};
     auto coordinatorDoc0 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards0);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
     ASSERT_FALSE(fut.isReady());
 
     std::vector<DonorShardEntry> donorShards1{
-        {makeDonorShard(ShardId{"s1"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))},
-        {makeDonorShard(ShardId{"s2"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))}};
+        {resharding::makeDonorShard(
+            ShardId{"s1"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))},
+        {resharding::makeDonorShard(
+            ShardId{"s2"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))}};
     auto coordinatorDoc1 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards1);
     reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
     ASSERT_TRUE(fut.isReady());
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
index cbd6232a5d1..9aa5ed7c223 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/s/resharding/resharding_coordinator_service.h"
 
 #include "mongo/base/string_data.h"
@@ -79,7 +76,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kResharding
 
-
 namespace mongo {
 namespace {
 
@@ -150,12 +146,12 @@ using resharding_metrics::getIntervalStartFieldName;
 using DocT = ReshardingCoordinatorDocument;
 const auto metricsPrefix = resharding_metrics::getMetricsPrefix<DocT>();
 
-void buildStateDocumentCloneMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetricsNew* metrics) {
+void buildStateDocumentCloneMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetrics* metrics) {
     bob.append(getIntervalStartFieldName<DocT>(ReshardingRecipientMetrics::kDocumentCopyFieldName),
                metrics->getCopyingBegin());
 }
 
-void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetricsNew* metrics) {
+void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetrics* metrics) {
     bob.append(getIntervalEndFieldName<DocT>(ReshardingRecipientMetrics::kDocumentCopyFieldName),
                metrics->getCopyingEnd());
     bob.append(
@@ -164,14 +160,14 @@ void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetr
 }
 
 void buildStateDocumentBlockingWritesMetricsForUpdate(BSONObjBuilder& bob,
-                                                      ReshardingMetricsNew* metrics) {
+                                                      ReshardingMetrics* metrics) {
     bob.append(
         getIntervalEndFieldName<DocT>(ReshardingRecipientMetrics::kOplogApplicationFieldName),
         metrics->getApplyingEnd());
 }
 
 void buildStateDocumentMetricsForUpdate(BSONObjBuilder& bob,
-                                        ReshardingMetricsNew* metrics,
+                                        ReshardingMetrics* metrics,
                                         CoordinatorStateEnum newState) {
     switch (newState) {
         case CoordinatorStateEnum::kCloning:
@@ -189,7 +185,7 @@ void buildStateDocumentMetricsForUpdate(BSONObjBuilder& bob,
 }
 
 void writeToCoordinatorStateNss(OperationContext* opCtx,
-                                ReshardingMetricsNew* metrics,
+                                ReshardingMetrics* metrics,
                                 const ReshardingCoordinatorDocument& coordinatorDoc,
                                 TxnNumber txnNumber) {
     BatchedCommandRequest request([&] {
@@ -295,9 +291,9 @@ TypeCollectionRecipientFields constructRecipientFields(
         coordinatorDoc.getSourceNss(),
         resharding::gReshardingMinimumOperationDurationMillis.load());
 
-    emplaceCloneTimestampIfExists(recipientFields, coordinatorDoc.getCloneTimestamp());
-    emplaceApproxBytesToCopyIfExists(recipientFields,
-                                     coordinatorDoc.getReshardingApproxCopySizeStruct());
+    resharding::emplaceCloneTimestampIfExists(recipientFields, coordinatorDoc.getCloneTimestamp());
+    resharding::emplaceApproxBytesToCopyIfExists(
+        recipientFields, coordinatorDoc.getReshardingApproxCopySizeStruct());
 
     return recipientFields;
 }
@@ -323,10 +319,10 @@ BSONObj createReshardingFieldsUpdateForOriginalNss(
                                        << CollectionType::kAllowMigrationsFieldName << false));
         }
         case CoordinatorStateEnum::kPreparingToDonate: {
-            TypeCollectionDonorFields donorFields(
-                coordinatorDoc.getTempReshardingNss(),
-                coordinatorDoc.getReshardingKey(),
-                extractShardIdsFromParticipantEntries(coordinatorDoc.getRecipientShards()));
+            TypeCollectionDonorFields donorFields(coordinatorDoc.getTempReshardingNss(),
+                                                  coordinatorDoc.getReshardingKey(),
+                                                  resharding::extractShardIdsFromParticipantEntries(
+                                                      coordinatorDoc.getRecipientShards()));
 
             BSONObjBuilder updateBuilder;
             {
@@ -394,7 +390,7 @@ BSONObj createReshardingFieldsUpdateForOriginalNss(
                     // If the abortReason exists, include it in the update.
                     setBuilder.append("reshardingFields.abortReason", *abortReason);
 
-                    auto abortStatus = getStatusFromAbortReason(coordinatorDoc);
+                    auto abortStatus = resharding::getStatusFromAbortReason(coordinatorDoc);
                     setBuilder.append("reshardingFields.userCanceled",
                                       abortStatus == ErrorCodes::ReshardCollectionAborted);
                 }
@@ -504,7 +500,7 @@ void writeToConfigCollectionsForTempNss(OperationContext* opCtx,
                     if (auto abortReason = coordinatorDoc.getAbortReason()) {
                         setBuilder.append("reshardingFields.abortReason", *abortReason);
 
-                        auto abortStatus = getStatusFromAbortReason(coordinatorDoc);
+                        auto abortStatus = resharding::getStatusFromAbortReason(coordinatorDoc);
                         setBuilder.append("reshardingFields.userCanceled",
                                           abortStatus == ErrorCodes::ReshardCollectionAborted);
                     }
@@ -608,8 +604,8 @@ BSONObj makeFlushRoutingTableCacheUpdatesCmd(const NamespaceString& nss) {
         BSON(WriteConcernOptions::kWriteConcernField << kMajorityWriteConcern.toBSON()));
 }
 
-ReshardingMetricsNew::CoordinatorState toMetricsState(CoordinatorStateEnum state) {
-    return ReshardingMetricsNew::CoordinatorState(state);
+ReshardingMetrics::CoordinatorState toMetricsState(CoordinatorStateEnum state) {
+    return ReshardingMetrics::CoordinatorState(state);
 }
 
 }  // namespace
@@ -664,7 +660,7 @@ void cleanupSourceConfigCollections(OperationContext* opCtx,
 }
 
 void writeDecisionPersistedState(OperationContext* opCtx,
-                                 ReshardingMetricsNew* metrics,
+                                 ReshardingMetrics* metrics,
                                  const ReshardingCoordinatorDocument& coordinatorDoc,
                                  OID newCollectionEpoch,
                                  Timestamp newCollectionTimestamp) {
@@ -691,7 +687,7 @@ void writeDecisionPersistedState(OperationContext* opCtx,
 }
 
 void insertCoordDocAndChangeOrigCollEntry(OperationContext* opCtx,
-                                          ReshardingMetricsNew* metrics,
+                                          ReshardingMetrics* metrics,
                                           const ReshardingCoordinatorDocument& coordinatorDoc) {
     ShardingCatalogManager::get(opCtx)->bumpCollectionVersionAndChangeMetadataInTxn(
         opCtx,
@@ -741,7 +737,7 @@ void insertCoordDocAndChangeOrigCollEntry(OperationContext* opCtx,
 
 void writeParticipantShardsAndTempCollInfo(
     OperationContext* opCtx,
-    ReshardingMetricsNew* metrics,
+    ReshardingMetrics* metrics,
     const ReshardingCoordinatorDocument& updatedCoordinatorDoc,
     std::vector<ChunkType> initialChunks,
     std::vector<BSONObj> zones) {
@@ -770,7 +766,7 @@ void writeParticipantShardsAndTempCollInfo(
 
 void writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
     OperationContext* opCtx,
-    ReshardingMetricsNew* metrics,
+    ReshardingMetrics* metrics,
     const ReshardingCoordinatorDocument& coordinatorDoc) {
     // Run updates to config.reshardingOperations and config.collections in a transaction
     auto nextState = coordinatorDoc.getState();
@@ -804,7 +800,7 @@ void writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
 }
 
 void removeCoordinatorDocAndReshardingFields(OperationContext* opCtx,
-                                             ReshardingMetricsNew* metrics,
+                                             ReshardingMetrics* metrics,
                                              const ReshardingCoordinatorDocument& coordinatorDoc,
                                              boost::optional<Status> abortReason) {
     // If the coordinator needs to abort and isn't in kInitializing, additional collections need to
@@ -857,7 +853,7 @@ ChunkVersion ReshardingCoordinatorExternalState::calculateChunkVersionForInitial
     OperationContext* opCtx) {
     const auto now = VectorClock::get(opCtx)->getTime();
     const auto timestamp = now.clusterTime().asTimestamp();
-    return ChunkVersion(1, 0, OID::gen(), timestamp);
+    return ChunkVersion({OID::gen(), timestamp}, {1, 0});
 }
 
 std::vector<DonorShardEntry> constructDonorShardEntries(const std::set<ShardId>& donorShardIds) {
@@ -1036,7 +1032,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::ReshardingCoordinator(
     : PrimaryOnlyService::TypedInstance<ReshardingCoordinator>(),
       _id(coordinatorDoc.getReshardingUUID().toBSON()),
       _coordinatorService(coordinatorService),
-      _metricsNew{ReshardingMetricsNew::initializeFrom(coordinatorDoc, getGlobalServiceContext())},
+      _metrics{ReshardingMetrics::initializeFrom(coordinatorDoc, getGlobalServiceContext())},
       _metadata(coordinatorDoc.getCommonReshardingMetadata()),
       _coordinatorDoc(coordinatorDoc),
       _markKilledExecutor(std::make_shared<ThreadPool>([] {
@@ -1055,7 +1051,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::ReshardingCoordinator(
         _reshardingCoordinatorObserver->onReshardingParticipantTransition(coordinatorDoc);
     }
 
-    _metricsNew->onStateTransition(boost::none, toMetricsState(coordinatorDoc.getState()));
+    _metrics->onStateTransition(boost::none, toMetricsState(coordinatorDoc.getState()));
 }
 
 void ReshardingCoordinatorService::ReshardingCoordinator::installCoordinatorDoc(
@@ -1080,8 +1076,8 @@ void ReshardingCoordinatorService::ReshardingCoordinator::installCoordinatorDoc(
     const auto previousState = _coordinatorDoc.getState();
     _coordinatorDoc = doc;
 
-    _metricsNew->onStateTransition(toMetricsState(previousState),
-                                   toMetricsState(_coordinatorDoc.getState()));
+    _metrics->onStateTransition(toMetricsState(previousState),
+                                toMetricsState(_coordinatorDoc.getState()));
 
     ShardingLogging::get(opCtx)->logChange(opCtx,
                                            "resharding.coordinator.transition",
@@ -1090,7 +1086,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::installCoordinatorDoc(
                                            kMajorityWriteConcern);
 }
 
-void markCompleted(const Status& status, ReshardingMetricsNew* metrics) {
+void markCompleted(const Status& status, ReshardingMetrics* metrics) {
     if (status.isOK()) {
         metrics->onSuccess();
     } else if (status == ErrorCodes::ReshardCollectionAborted) {
@@ -1320,7 +1316,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::_commitAndFinishReshardOper
                    })
                    .then([this, executor] { return _awaitAllParticipantShardsDone(executor); })
                    .then([this, executor] {
-                       _metricsNew->onCriticalSectionEnd();
+                       _metrics->onCriticalSectionEnd();
 
                        // Best-effort attempt to trigger a refresh on the participant shards so
                        // they see the collection metadata without reshardingFields and no longer
@@ -1403,6 +1399,14 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run(
                 .onCompletion([outerStatus](Status) { return outerStatus; });
         })
         .onCompletion([this, self = shared_from_this()](Status status) {
+            _metrics->onStateTransition(toMetricsState(_coordinatorDoc.getState()), boost::none);
+
+            // Destroy metrics early so it's lifetime will not be tied to the lifetime of this
+            // state machine. This is because we have future callbacks copy shared pointers to this
+            // state machine that causes it to live longer than expected and potentially overlap
+            // with a newer instance when stepping up.
+            _metrics.reset();
+
             if (!status.isOK()) {
                 {
                     auto lg = stdx::lock_guard(_fulfillmentMutex);
@@ -1416,8 +1420,6 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run(
                 }
                 _reshardingCoordinatorObserver->interrupt(status);
             }
-
-            _metricsNew->onStateTransition(toMetricsState(_coordinatorDoc.getState()), boost::none);
         })
         .semi();
 }
@@ -1432,12 +1434,12 @@ ExecutorFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::_onAbo
                auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
 
                // Notify metrics as the operation is now complete for external observers.
-               markCompleted(status, _metricsNew.get());
+               markCompleted(status, _metrics.get());
 
                // The temporary collection and its corresponding entries were never created. Only
                // the coordinator document and reshardingFields require cleanup.
                resharding::removeCoordinatorDocAndReshardingFields(
-                   opCtx.get(), _metricsNew.get(), _coordinatorDoc, status);
+                   opCtx.get(), _metrics.get(), _coordinatorDoc, status);
                return status;
            })
         .onTransientError([](const Status& retryStatus) {
@@ -1506,7 +1508,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::abort() {
 boost::optional<BSONObj> ReshardingCoordinatorService::ReshardingCoordinator::reportForCurrentOp(
     MongoProcessInterface::CurrentOpConnectionsMode,
     MongoProcessInterface::CurrentOpSessionsMode) noexcept {
-    return _metricsNew->reportForCurrentOp();
+    return _metrics->reportForCurrentOp();
 }
 
 std::shared_ptr<ReshardingCoordinatorObserver>
@@ -1561,13 +1563,13 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_insertCoordDocAndChan
     ReshardingCoordinatorDocument updatedCoordinatorDoc = _coordinatorDoc;
     updatedCoordinatorDoc.setState(CoordinatorStateEnum::kInitializing);
     resharding::insertCoordDocAndChangeOrigCollEntry(
-        opCtx.get(), _metricsNew.get(), updatedCoordinatorDoc);
+        opCtx.get(), _metrics.get(), updatedCoordinatorDoc);
     installCoordinatorDoc(opCtx.get(), updatedCoordinatorDoc);
 
     {
         // Note: don't put blocking or interruptible code in this block.
         _coordinatorDocWrittenPromise.emplaceValue();
-        _metricsNew->onStarted();
+        _metrics->onStarted();
     }
 
     pauseBeforeInsertCoordinatorDoc.pauseWhileSet();
@@ -1592,14 +1594,14 @@ void ReshardingCoordinatorService::ReshardingCoordinator::
     // the possibility of the document reaching the BSONObj size constraint.
     std::vector<BSONObj> zones;
     if (updatedCoordinatorDoc.getZones()) {
-        zones = buildTagsDocsFromZones(updatedCoordinatorDoc.getTempReshardingNss(),
-                                       *updatedCoordinatorDoc.getZones());
+        zones = resharding::buildTagsDocsFromZones(updatedCoordinatorDoc.getTempReshardingNss(),
+                                                   *updatedCoordinatorDoc.getZones());
     }
     updatedCoordinatorDoc.setPresetReshardedChunks(boost::none);
     updatedCoordinatorDoc.setZones(boost::none);
 
     resharding::writeParticipantShardsAndTempCollInfo(opCtx.get(),
-                                                      _metricsNew.get(),
+                                                      _metrics.get(),
                                                       updatedCoordinatorDoc,
                                                       std::move(shardsAndChunks.initialChunks),
                                                       std::move(zones));
@@ -1652,14 +1654,14 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllDonorsReadyToDonat
                     opCtx.get(), _ctHolder->getAbortToken());
             }
 
-            auto highestMinFetchTimestamp =
-                getHighestMinFetchTimestamp(coordinatorDocChangedOnDisk.getDonorShards());
+            auto highestMinFetchTimestamp = resharding::getHighestMinFetchTimestamp(
+                coordinatorDocChangedOnDisk.getDonorShards());
             _updateCoordinatorDocStateAndCatalogEntries(
                 CoordinatorStateEnum::kCloning,
                 coordinatorDocChangedOnDisk,
                 highestMinFetchTimestamp,
                 computeApproxCopySize(coordinatorDocChangedOnDisk));
-            _metricsNew->onCopyingBegin();
+            _metrics->onCopyingBegin();
         })
         .then([this] { return _waitForMajority(_ctHolder->getAbortToken()); });
 }
@@ -1678,8 +1680,8 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllRecipientsFinished
         .then([this](ReshardingCoordinatorDocument coordinatorDocChangedOnDisk) {
             this->_updateCoordinatorDocStateAndCatalogEntries(CoordinatorStateEnum::kApplying,
                                                               coordinatorDocChangedOnDisk);
-            _metricsNew->onCopyingEnd();
-            _metricsNew->onApplyingBegin();
+            _metrics->onCopyingEnd();
+            _metrics->onApplyingBegin();
         })
         .then([this] { return _waitForMajority(_ctHolder->getAbortToken()); });
 }
@@ -1691,9 +1693,9 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_startCommitMonitor(
     }
 
     _commitMonitor = std::make_shared<resharding::CoordinatorCommitMonitor>(
-        _metricsNew,
+        _metrics,
         _coordinatorDoc.getSourceNss(),
-        extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards()),
+        resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards()),
         **executor,
         _ctHolder->getCommitMonitorToken());
 
@@ -1718,9 +1720,22 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllRecipientsFinished
             _startCommitMonitor(executor);
 
             LOGV2(5391602, "Resharding operation waiting for an okay to enter critical section");
-            return future_util::withCancellation(_canEnterCritical.getFuture(),
-                                                 _ctHolder->getAbortToken())
+
+            // The _reshardingCoordinatorObserver->awaitAllRecipientsInStrictConsistency() future is
+            // used for reporting recipient shard errors encountered during the Applying phase and
+            // in turn aborting the resharding operation.
+            // For all other cases, the _canEnterCritical.getFuture() resolves first and the
+            // operation can then proceed to entering the critical section depending on the status
+            // returned.
+            return future_util::withCancellation(
+                       whenAny(
+                           _canEnterCritical.getFuture().thenRunOn(**executor),
+                           _reshardingCoordinatorObserver->awaitAllRecipientsInStrictConsistency()
+                               .thenRunOn(**executor)
+                               .ignoreValue()),
+                       _ctHolder->getAbortToken())
                 .thenRunOn(**executor)
+                .then([](auto result) { return result.result; })
                 .onCompletion([this](Status status) {
                     _ctHolder->cancelCommitMonitor();
                     if (status.isOK()) {
@@ -1738,8 +1753,8 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllRecipientsFinished
 
             this->_updateCoordinatorDocStateAndCatalogEntries(CoordinatorStateEnum::kBlockingWrites,
                                                               _coordinatorDoc);
-            _metricsNew->onApplyingEnd();
-            _metricsNew->onCriticalSectionBegin();
+            _metrics->onApplyingEnd();
+            _metrics->onCriticalSectionBegin();
         })
         .then([this] { return _waitForMajority(_ctHolder->getAbortToken()); })
         .thenRunOn(**executor)
@@ -1805,7 +1820,7 @@ Future<void> ReshardingCoordinatorService::ReshardingCoordinator::_commit(
 
 
     resharding::writeDecisionPersistedState(opCtx.get(),
-                                            _metricsNew.get(),
+                                            _metrics.get(),
                                             updatedCoordinatorDoc,
                                             std::move(newCollectionEpoch),
                                             std::move(newCollectionTimestamp));
@@ -1836,7 +1851,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllParticipantShardsD
 
             boost::optional<Status> abortReason;
             if (coordinatorDoc.getAbortReason()) {
-                abortReason = getStatusFromAbortReason(coordinatorDoc);
+                abortReason = resharding::getStatusFromAbortReason(coordinatorDoc);
             }
 
             if (!abortReason) {
@@ -1849,40 +1864,18 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllParticipantShardsD
                 const auto cmdObj =
                     ShardsvrDropCollectionIfUUIDNotMatchingRequest(nss, notMatchingThisUUID)
                         .toBSON({});
-
-                try {
-                    sharding_ddl_util::sendAuthenticatedCommandToShards(
-                        opCtx.get(), nss.db(), cmdObj, allShardIds, **executor);
-                } catch (const DBException& ex) {
-                    if (ex.code() == ErrorCodes::CommandNotFound) {
-                        // TODO SERVER-60531 get rid of the catch logic
-                        // Cleanup failed because at least one shard could is using a binary
-                        // not supporting the ShardsvrDropCollectionIfUUIDNotMatching command.
-                        LOGV2_INFO(5423100,
-                                   "Resharding coordinator couldn't guarantee older incarnations "
-                                   "of the collection were dropped. A chunk migration to a shard "
-                                   "with an older incarnation of the collection will fail",
-                                   "namespace"_attr = nss.ns());
-                    } else if (opCtx->checkForInterruptNoAssert().isOK()) {
-                        LOGV2_INFO(
-                            5423101,
-                            "Resharding coordinator failed while trying to drop possible older "
-                            "incarnations of the collection. A chunk migration to a shard with "
-                            "an older incarnation of the collection will fail",
-                            "namespace"_attr = nss.ns(),
-                            "error"_attr = redact(ex.toStatus()));
-                    }
-                }
+                _reshardingCoordinatorExternalState->sendCommandToShards(
+                    opCtx.get(), nss.db(), cmdObj, allShardIds, **executor);
             }
 
             reshardingPauseCoordinatorBeforeRemovingStateDoc.pauseWhileSetAndNotCanceled(
                 opCtx.get(), _ctHolder->getStepdownToken());
 
             // Notify metrics as the operation is now complete for external observers.
-            markCompleted(abortReason ? *abortReason : Status::OK(), _metricsNew.get());
+            markCompleted(abortReason ? *abortReason : Status::OK(), _metrics.get());
 
             resharding::removeCoordinatorDocAndReshardingFields(
-                opCtx.get(), _metricsNew.get(), coordinatorDoc, abortReason);
+                opCtx.get(), _metrics.get(), coordinatorDoc, abortReason);
         });
 }
 
@@ -1896,13 +1889,13 @@ void ReshardingCoordinatorService::ReshardingCoordinator::
     // Build new state doc for coordinator state update
     ReshardingCoordinatorDocument updatedCoordinatorDoc = coordinatorDoc;
     updatedCoordinatorDoc.setState(nextState);
-    emplaceApproxBytesToCopyIfExists(updatedCoordinatorDoc, std::move(approxCopySize));
-    emplaceCloneTimestampIfExists(updatedCoordinatorDoc, std::move(cloneTimestamp));
-    emplaceTruncatedAbortReasonIfExists(updatedCoordinatorDoc, abortReason);
+    resharding::emplaceApproxBytesToCopyIfExists(updatedCoordinatorDoc, std::move(approxCopySize));
+    resharding::emplaceCloneTimestampIfExists(updatedCoordinatorDoc, std::move(cloneTimestamp));
+    resharding::emplaceTruncatedAbortReasonIfExists(updatedCoordinatorDoc, abortReason);
 
     auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
     resharding::writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
-        opCtx.get(), _metricsNew.get(), updatedCoordinatorDoc);
+        opCtx.get(), _metrics.get(), updatedCoordinatorDoc);
 
     // Update in-memory coordinator doc
     installCoordinatorDoc(opCtx.get(), updatedCoordinatorDoc);
@@ -1911,9 +1904,10 @@ void ReshardingCoordinatorService::ReshardingCoordinator::
 void ReshardingCoordinatorService::ReshardingCoordinator::_sendCommandToAllParticipants(
     const std::shared_ptr<executor::ScopedTaskExecutor>& executor, const BSONObj& command) {
     auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
-    auto donorShardIds = extractShardIdsFromParticipantEntries(_coordinatorDoc.getDonorShards());
+    auto donorShardIds =
+        resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getDonorShards());
     auto recipientShardIds =
-        extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards());
+        resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards());
     std::set<ShardId> participantShardIds{donorShardIds.begin(), donorShardIds.end()};
     participantShardIds.insert(recipientShardIds.begin(), recipientShardIds.end());
 
@@ -1929,7 +1923,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_sendCommandToAllRecip
     const std::shared_ptr<executor::ScopedTaskExecutor>& executor, const BSONObj& command) {
     auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
     auto recipientShardIds =
-        extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards());
+        resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards());
 
     _reshardingCoordinatorExternalState->sendCommandToShards(
         opCtx.get(),
@@ -1942,7 +1936,8 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_sendCommandToAllRecip
 void ReshardingCoordinatorService::ReshardingCoordinator::_sendCommandToAllDonors(
     const std::shared_ptr<executor::ScopedTaskExecutor>& executor, const BSONObj& command) {
     auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
-    auto donorShardIds = extractShardIdsFromParticipantEntries(_coordinatorDoc.getDonorShards());
+    auto donorShardIds =
+        resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getDonorShards());
 
     _reshardingCoordinatorExternalState->sendCommandToShards(
         opCtx.get(),
@@ -2036,7 +2031,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_updateChunkImbalanceM
         auto imbalanceCount =
             getMaxChunkImbalanceCount(routingInfo, allShardsWithOpTime.value, zoneInfo);
 
-        _metricsNew->setLastOpEndingChunkImbalance(imbalanceCount);
+        _metrics->setLastOpEndingChunkImbalance(imbalanceCount);
     } catch (const DBException& ex) {
         LOGV2_WARNING(5543000,
                       "Encountered error while trying to update resharding chunk imbalance metrics",
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.h b/src/mongo/db/s/resharding/resharding_coordinator_service.h
index d24c23f6b68..6f0eb95c79a 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.h
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.h
@@ -33,7 +33,7 @@
 #include "mongo/db/repl/primary_only_service.h"
 #include "mongo/db/s/resharding/coordinator_document_gen.h"
 #include "mongo/db/s/resharding/resharding_coordinator_observer.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/platform/mutex.h"
 #include "mongo/s/catalog/type_chunk.h"
 #include "mongo/s/catalog/type_collection.h"
@@ -55,28 +55,28 @@ void cleanupSourceConfigCollections(OperationContext* opCtx,
                                     const ReshardingCoordinatorDocument& coordinatorDoc);
 
 void writeDecisionPersistedState(OperationContext* opCtx,
-                                 ReshardingMetricsNew* metrics,
+                                 ReshardingMetrics* metrics,
                                  const ReshardingCoordinatorDocument& coordinatorDoc,
                                  OID newCollectionEpoch,
                                  Timestamp newCollectionTimestamp);
 
 void insertCoordDocAndChangeOrigCollEntry(OperationContext* opCtx,
-                                          ReshardingMetricsNew* metrics,
+                                          ReshardingMetrics* metrics,
                                           const ReshardingCoordinatorDocument& coordinatorDoc);
 
 void writeParticipantShardsAndTempCollInfo(OperationContext* opCtx,
-                                           ReshardingMetricsNew* metrics,
+                                           ReshardingMetrics* metrics,
                                            const ReshardingCoordinatorDocument& coordinatorDoc,
                                            std::vector<ChunkType> initialChunks,
                                            std::vector<BSONObj> zones);
 
 void writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
     OperationContext* opCtx,
-    ReshardingMetricsNew* metrics,
+    ReshardingMetrics* metrics,
     const ReshardingCoordinatorDocument& coordinatorDoc);
 
 void removeCoordinatorDocAndReshardingFields(OperationContext* opCtx,
-                                             ReshardingMetricsNew* metrics,
+                                             ReshardingMetrics* metrics,
                                              const ReshardingCoordinatorDocument& coordinatorDoc,
                                              boost::optional<Status> abortReason = boost::none);
 }  // namespace resharding
@@ -513,7 +513,7 @@ private:
     // The primary-only service instance corresponding to the coordinator instance. Not owned.
     const ReshardingCoordinatorService* const _coordinatorService;
 
-    std::shared_ptr<ReshardingMetricsNew> _metricsNew;
+    std::shared_ptr<ReshardingMetrics> _metrics;
 
     // The in-memory representation of the immutable portion of the document in
     // config.reshardingOperations.
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
index dc16d5fe271..1fc380093bf 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include <boost/optional.hpp>
 #include <functional>
 
@@ -59,7 +56,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -203,7 +199,7 @@ public:
                                           {DonorShardEntry(ShardId("shard0000"), {})},
                                           {RecipientShardEntry(ShardId("shard0001"), {})});
         doc.setCommonReshardingMetadata(meta);
-        emplaceCloneTimestampIfExists(doc, cloneTimestamp);
+        resharding::emplaceCloneTimestampIfExists(doc, cloneTimestamp);
         return doc;
     }
 
@@ -372,10 +368,11 @@ public:
 
         TypeCollectionReshardingFields reshardingFields(coordinatorDoc.getReshardingUUID());
         reshardingFields.setState(coordinatorDoc.getState());
-        reshardingFields.setDonorFields(TypeCollectionDonorFields(
-            coordinatorDoc.getTempReshardingNss(),
-            coordinatorDoc.getReshardingKey(),
-            extractShardIdsFromParticipantEntries(coordinatorDoc.getRecipientShards())));
+        reshardingFields.setDonorFields(
+            TypeCollectionDonorFields(coordinatorDoc.getTempReshardingNss(),
+                                      coordinatorDoc.getReshardingKey(),
+                                      resharding::extractShardIdsFromParticipantEntries(
+                                          coordinatorDoc.getRecipientShards())));
 
         auto originalNssCatalogEntry = makeOriginalCollectionCatalogEntry(
             coordinatorDoc,
@@ -414,7 +411,7 @@ public:
             _newShardKey.isShardKey(shardKey.toBSON()) ? _newChunkRanges : _oldChunkRanges;
 
         // Create two chunks, one on each shard with the given namespace and epoch
-        ChunkVersion version(1, 0, epoch, timestamp);
+        ChunkVersion version({epoch, timestamp}, {1, 0});
         ChunkType chunk1(uuid, chunkRanges[0], version, ShardId("shard0000"));
         chunk1.setName(ids[0]);
         version.incMinor();
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
index da56d0d8cb5..35ffa75b31a 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
@@ -27,10 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/db/s/resharding/coordinator_document_gen.h"
-
-#include "mongo/platform/basic.h"
-
 #include <boost/optional.hpp>
 
 #include "mongo/client/remote_command_targeter_mock.h"
@@ -40,6 +36,7 @@
 #include "mongo/db/repl/storage_interface_mock.h"
 #include "mongo/db/s/config/config_server_test_fixture.h"
 #include "mongo/db/s/config/index_on_config.h"
+#include "mongo/db/s/resharding/coordinator_document_gen.h"
 #include "mongo/db/s/resharding/resharding_coordinator_service.h"
 #include "mongo/db/s/resharding/resharding_util.h"
 #include "mongo/db/s/transaction_coordinator_service.h"
@@ -52,7 +49,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -85,13 +81,12 @@ protected:
         TransactionCoordinatorService::get(operationContext())
             ->onShardingInitialization(operationContext(), true);
 
-        _metrics =
-            ReshardingMetricsNew::makeInstance(_originalUUID,
-                                               _newShardKey.toBSON(),
-                                               _originalNss,
-                                               ReshardingMetricsNew::Role::kCoordinator,
-                                               getServiceContext()->getFastClockSource()->now(),
-                                               getServiceContext());
+        _metrics = ReshardingMetrics::makeInstance(_originalUUID,
+                                                   _newShardKey.toBSON(),
+                                                   _originalNss,
+                                                   ReshardingMetrics::Role::kCoordinator,
+                                                   getServiceContext()->getFastClockSource()->now(),
+                                                   getServiceContext());
     }
 
     void tearDown() override {
@@ -180,7 +175,7 @@ protected:
             _newShardKey.isShardKey(shardKey.toBSON()) ? _newChunkRanges : _oldChunkRanges;
 
         // Create two chunks, one on each shard with the given namespace and epoch
-        ChunkVersion version(1, 0, epoch, Timestamp(1, 2));
+        ChunkVersion version({epoch, Timestamp(1, 2)}, {1, 0});
         ChunkType chunk1(uuid, chunkRanges[0], version, ShardId("shard0000"));
         chunk1.setName(ids[0]);
         version.incMinor();
@@ -227,7 +222,7 @@ protected:
         client.insert(CollectionType::ConfigNS.ns(), originalNssCatalogEntry.toBSON());
 
         auto tempNssCatalogEntry = createTempReshardingCollectionType(
-            opCtx, coordinatorDoc, ChunkVersion(1, 1, OID::gen(), Timestamp(1, 2)), BSONObj());
+            opCtx, coordinatorDoc, ChunkVersion({OID::gen(), Timestamp(1, 2)}, {1, 1}), BSONObj());
         client.insert(CollectionType::ConfigNS.ns(), tempNssCatalogEntry.toBSON());
 
         return coordinatorDoc;
@@ -519,11 +514,11 @@ protected:
         // collection should have been removed.
         boost::optional<CollectionType> expectedTempCollType = boost::none;
         if (expectedCoordinatorDoc.getState() < CoordinatorStateEnum::kCommitting) {
-            expectedTempCollType =
-                createTempReshardingCollectionType(opCtx,
-                                                   expectedCoordinatorDoc,
-                                                   ChunkVersion(1, 1, OID::gen(), Timestamp(1, 2)),
-                                                   BSONObj());
+            expectedTempCollType = createTempReshardingCollectionType(
+                opCtx,
+                expectedCoordinatorDoc,
+                ChunkVersion({OID::gen(), Timestamp(1, 2)}, {1, 1}),
+                BSONObj());
 
             // It's necessary to add the userCanceled field because the call into
             // createTempReshardingCollectionType assumes that the collection entry is
@@ -723,7 +718,7 @@ protected:
     ShardKeyPattern _oldShardKey = ShardKeyPattern(BSON("oldSK" << 1));
     ShardKeyPattern _newShardKey = ShardKeyPattern(BSON("newSK" << 1));
 
-    std::unique_ptr<ReshardingMetricsNew> _metrics;
+    std::unique_ptr<ReshardingMetrics> _metrics;
 
     const std::vector<ChunkRange> _oldChunkRanges = {
         ChunkRange(_oldShardKey.getKeyPattern().globalMin(), BSON("oldSK" << 12345)),
diff --git a/src/mongo/db/s/resharding/resharding_data_replication.cpp b/src/mongo/db/s/resharding/resharding_data_replication.cpp
index 4143c8c0c76..ff7bc064939 100644
--- a/src/mongo/db/s/resharding/resharding_data_replication.cpp
+++ b/src/mongo/db/s/resharding/resharding_data_replication.cpp
@@ -81,12 +81,12 @@ void ensureFulfilledPromise(SharedPromise<void>& sp, Status error) {
 }  // namespace
 
 std::unique_ptr<ReshardingCollectionCloner> ReshardingDataReplication::_makeCollectionCloner(
-    ReshardingMetricsNew* metricsNew,
+    ReshardingMetrics* metrics,
     const CommonReshardingMetadata& metadata,
     const ShardId& myShardId,
     Timestamp cloneTimestamp) {
     return std::make_unique<ReshardingCollectionCloner>(
-        metricsNew,
+        metrics,
         ShardKeyPattern{metadata.getReshardingKey()},
         metadata.getSourceNss(),
         metadata.getSourceUUID(),
@@ -112,7 +112,7 @@ std::vector<std::unique_ptr<ReshardingTxnCloner>> ReshardingDataReplication::_ma
 
 std::vector<std::unique_ptr<ReshardingOplogFetcher>> ReshardingDataReplication::_makeOplogFetchers(
     OperationContext* opCtx,
-    ReshardingMetricsNew* metricsNew,
+    ReshardingMetrics* metrics,
     const CommonReshardingMetadata& metadata,
     const std::vector<DonorShardFetchTimestamp>& donorShards,
     const ShardId& myShardId) {
@@ -121,14 +121,14 @@ std::vector<std::unique_ptr<ReshardingOplogFetcher>> ReshardingDataReplication::
 
     for (const auto& donor : donorShards) {
         auto oplogBufferNss =
-            getLocalOplogBufferNamespace(metadata.getSourceUUID(), donor.getShardId());
+            resharding::getLocalOplogBufferNamespace(metadata.getSourceUUID(), donor.getShardId());
         auto minFetchTimestamp = *donor.getMinFetchTimestamp();
         auto idToResumeFrom = getOplogFetcherResumeId(
             opCtx, metadata.getReshardingUUID(), oplogBufferNss, minFetchTimestamp);
         invariant((idToResumeFrom >= ReshardingDonorOplogId{minFetchTimestamp, minFetchTimestamp}));
 
         oplogFetchers.emplace_back(std::make_unique<ReshardingOplogFetcher>(
-            std::make_unique<ReshardingOplogFetcher::Env>(opCtx->getServiceContext(), metricsNew),
+            std::make_unique<ReshardingOplogFetcher::Env>(opCtx->getServiceContext(), metrics),
             metadata.getReshardingUUID(),
             metadata.getSourceUUID(),
             // The recipient fetches oplog entries from the donor starting from the largest _id
@@ -182,7 +182,7 @@ std::vector<std::unique_ptr<ReshardingOplogApplier>> ReshardingDataReplication::
         invariant((idToResumeFrom >= ReshardingDonorOplogId{minFetchTimestamp, minFetchTimestamp}));
 
         const auto& oplogBufferNss =
-            getLocalOplogBufferNamespace(metadata.getSourceUUID(), donorShardId);
+            resharding::getLocalOplogBufferNamespace(metadata.getSourceUUID(), donorShardId);
 
         auto applierMetrics = (*applierMetricsMap)[donorShardId].get();
         oplogAppliers.emplace_back(std::make_unique<ReshardingOplogApplier>(
@@ -206,7 +206,7 @@ std::vector<std::unique_ptr<ReshardingOplogApplier>> ReshardingDataReplication::
 
 std::unique_ptr<ReshardingDataReplicationInterface> ReshardingDataReplication::make(
     OperationContext* opCtx,
-    ReshardingMetricsNew* metricsNew,
+    ReshardingMetrics* metrics,
     ReshardingApplierMetricsMap* applierMetricsMap,
     CommonReshardingMetadata metadata,
     const std::vector<DonorShardFetchTimestamp>& donorShards,
@@ -218,11 +218,11 @@ std::unique_ptr<ReshardingDataReplicationInterface> ReshardingDataReplication::m
     std::vector<std::unique_ptr<ReshardingTxnCloner>> txnCloners;
 
     if (!cloningDone) {
-        collectionCloner = _makeCollectionCloner(metricsNew, metadata, myShardId, cloneTimestamp);
+        collectionCloner = _makeCollectionCloner(metrics, metadata, myShardId, cloneTimestamp);
         txnCloners = _makeTxnCloners(metadata, donorShards);
     }
 
-    auto oplogFetchers = _makeOplogFetchers(opCtx, metricsNew, metadata, donorShards, myShardId);
+    auto oplogFetchers = _makeOplogFetchers(opCtx, metrics, metadata, donorShards, myShardId);
 
     auto oplogFetcherExecutor = _makeOplogFetcherExecutor(donorShards.size());
 
@@ -456,7 +456,7 @@ ReshardingDonorOplogId ReshardingDataReplication::getOplogFetcherResumeId(
 
         if (highestOplogBufferId) {
             auto oplogEntry = repl::OplogEntry{highestOplogBufferId->toBson()};
-            if (isFinalOplog(oplogEntry, reshardingUUID)) {
+            if (resharding::isFinalOplog(oplogEntry, reshardingUUID)) {
                 return ReshardingOplogFetcher::kFinalOpAlreadyFetched;
             }
 
diff --git a/src/mongo/db/s/resharding/resharding_data_replication.h b/src/mongo/db/s/resharding/resharding_data_replication.h
index f8348646758..2e44a5d2a21 100644
--- a/src/mongo/db/s/resharding/resharding_data_replication.h
+++ b/src/mongo/db/s/resharding/resharding_data_replication.h
@@ -140,7 +140,7 @@ private:
 public:
     static std::unique_ptr<ReshardingDataReplicationInterface> make(
         OperationContext* opCtx,
-        ReshardingMetricsNew* metricsNew,
+        ReshardingMetrics* metrics,
         ReshardingApplierMetricsMap* applierMetricsMap,
         CommonReshardingMetadata metadata,
         const std::vector<DonorShardFetchTimestamp>& donorShards,
@@ -196,7 +196,7 @@ public:
 
 private:
     static std::unique_ptr<ReshardingCollectionCloner> _makeCollectionCloner(
-        ReshardingMetricsNew* metricsNew,
+        ReshardingMetrics* metrics,
         const CommonReshardingMetadata& metadata,
         const ShardId& myShardId,
         Timestamp cloneTimestamp);
@@ -207,7 +207,7 @@ private:
 
     static std::vector<std::unique_ptr<ReshardingOplogFetcher>> _makeOplogFetchers(
         OperationContext* opCtx,
-        ReshardingMetricsNew* metricsNew,
+        ReshardingMetrics* metrics,
         const CommonReshardingMetadata& metadata,
         const std::vector<DonorShardFetchTimestamp>& donorShards,
         const ShardId& myShardId);
diff --git a/src/mongo/db/s/resharding/resharding_data_replication_test.cpp b/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
index f71ce9f0356..f5f588ac948 100644
--- a/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
@@ -27,12 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
-#include <memory>
-#include <vector>
-
 #include "mongo/bson/bsonmisc.h"
 #include "mongo/db/persistent_task_store.h"
 #include "mongo/db/query/collation/collator_factory_mock.h"
@@ -50,7 +44,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -78,7 +71,7 @@ public:
         std::vector<ChunkType> chunks = {ChunkType{
             _sourceUUID,
             ChunkRange{BSON(_currentShardKey << MINKEY), BSON(_currentShardKey << MAXKEY)},
-            ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+            ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
             _myDonorId}};
 
         auto rt = RoutingTableHistory::makeNew(_sourceNss,
@@ -193,7 +186,7 @@ TEST_F(ReshardingDataReplicationTest, GetOplogFetcherResumeId) {
     auto opCtx = makeOperationContext();
 
     const auto reshardingUUID = UUID::gen();
-    auto oplogBufferNss = getLocalOplogBufferNamespace(reshardingUUID, {"shard0"});
+    auto oplogBufferNss = resharding::getLocalOplogBufferNamespace(reshardingUUID, {"shard0"});
 
     const auto minFetchTimestamp = Timestamp{10, 0};
     const auto oplogId1 = ReshardingDonorOplogId{{20, 0}, {18, 0}};
diff --git a/src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp b/src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp
index d95f0fdc23e..632b387a817 100644
--- a/src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/catalog/create_collection.h"
 #include "mongo/db/catalog_raii.h"
 #include "mongo/db/dbdirectclient.h"
@@ -55,7 +52,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -164,11 +160,11 @@ protected:
                                         const std::string& shardKey) {
         auto range1 = ChunkRange(BSON(shardKey << MINKEY), BSON(shardKey << 5));
         ChunkType chunk1(
-            uuid, range1, ChunkVersion(1, 0, epoch, timestamp), kShardList[0].getName());
+            uuid, range1, ChunkVersion({epoch, timestamp}, {1, 0}), kShardList[0].getName());
 
         auto range2 = ChunkRange(BSON(shardKey << 5), BSON(shardKey << MAXKEY));
         ChunkType chunk2(
-            uuid, range2, ChunkVersion(1, 0, epoch, timestamp), kShardList[1].getName());
+            uuid, range2, ChunkVersion({epoch, timestamp}, {1, 0}), kShardList[1].getName());
 
         return {chunk1, chunk2};
     }
@@ -199,7 +195,7 @@ protected:
 
         ReshardingEnv env(CollectionCatalog::get(opCtx)->lookupUUIDByNSS(opCtx, kNss).value());
         env.destShard = kShardList[1].getName();
-        env.version = ChunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+        env.version = ChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
         env.tempNss =
             NamespaceString(kNss.db(),
                             fmt::format("{}{}",
diff --git a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp
index 5213b170753..0a9027deea2 100644
--- a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp
@@ -129,7 +129,7 @@ std::vector<repl::OplogEntry> ReshardingDonorOplogIterator::_fillBatch(Pipeline&
 
         numBytes += obj.objsize();
 
-        if (isFinalOplog(entry)) {
+        if (resharding::isFinalOplog(entry)) {
             // The ReshardingOplogFetcher should never insert documents after the reshardFinalOp
             // entry. We defensively check each oplog entry for being the reshardFinalOp and confirm
             // the pipeline has been exhausted.
@@ -185,7 +185,7 @@ ExecutorFuture<std::vector<repl::OplogEntry>> ReshardingDonorOplogIterator::getN
             const auto& lastEntryInBatch = batch.back();
             _resumeToken = getId(lastEntryInBatch);
 
-            if (isFinalOplog(lastEntryInBatch)) {
+            if (resharding::isFinalOplog(lastEntryInBatch)) {
                 _hasSeenFinalOplogEntry = true;
                 // Skip returning the final oplog entry because it is known to be a no-op.
                 batch.pop_back();
diff --git a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp
index 26b7646283f..a0491b06e7c 100644
--- a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp
@@ -95,7 +95,7 @@ public:
         const BSONObj oField(BSON("msg"
                                   << "Created temporary resharding collection"));
         const BSONObj o2Field(
-            BSON("type" << kReshardFinalOpLogType << "reshardingUUID" << UUID::gen()));
+            BSON("type" << resharding::kReshardFinalOpLogType << "reshardingUUID" << UUID::gen()));
         return makeOplog(_crudNss, _uuid, repl::OpTypeEnum::kNoop, oField, o2Field, oplogId);
     }
 
@@ -103,7 +103,7 @@ public:
         ReshardingDonorOplogId oplogId(ts, ts);
         const BSONObj oField(BSON("msg"
                                   << "Latest oplog ts from donor's cursor response"));
-        const BSONObj o2Field(BSON("type" << kReshardProgressMark));
+        const BSONObj o2Field(BSON("type" << resharding::kReshardProgressMark));
         return makeOplog(_crudNss, _uuid, repl::OpTypeEnum::kNoop, oField, o2Field, oplogId);
     }
 
diff --git a/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp b/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp
index 43d91e83b97..27157f82b66 100644
--- a/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp
@@ -332,7 +332,12 @@ void clearFilteringMetadata(OperationContext* opCtx, bool scheduleAsyncRefresh)
             return true;
         });
     }
+    clearFilteringMetadata(opCtx, namespacesToRefresh, scheduleAsyncRefresh);
+}
 
+void clearFilteringMetadata(OperationContext* opCtx,
+                            stdx::unordered_set<NamespaceString> namespacesToRefresh,
+                            bool scheduleAsyncRefresh) {
     for (const auto& nss : namespacesToRefresh) {
         AutoGetCollection autoColl(opCtx, nss, MODE_IX);
         CollectionShardingRuntime::get(opCtx, nss)->clearFilteringMetadata(opCtx);
diff --git a/src/mongo/db/s/resharding/resharding_donor_recipient_common.h b/src/mongo/db/s/resharding/resharding_donor_recipient_common.h
index 2efba26f659..10be195c586 100644
--- a/src/mongo/db/s/resharding/resharding_donor_recipient_common.h
+++ b/src/mongo/db/s/resharding/resharding_donor_recipient_common.h
@@ -77,6 +77,10 @@ void processReshardingFieldsForCollection(OperationContext* opCtx,
 
 void clearFilteringMetadata(OperationContext* opCtx, bool scheduleAsyncRefresh);
 
+void clearFilteringMetadata(OperationContext* opCtx,
+                            stdx::unordered_set<NamespaceString> namespacesToRefresh,
+                            bool scheduleAsyncRefresh);
+
 void refreshShardVersion(OperationContext* opCtx, const NamespaceString& nss);
 
 }  // namespace resharding
diff --git a/src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp b/src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp
index 3fccff9812c..e5bd8defdbd 100644
--- a/src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp
@@ -58,7 +58,7 @@ public:
     const NamespaceString kOriginalNss = NamespaceString("db", "foo");
 
     const NamespaceString kTemporaryReshardingNss =
-        constructTemporaryReshardingNss("db", kExistingUUID);
+        resharding::constructTemporaryReshardingNss("db", kExistingUUID);
     const std::string kOriginalShardKey = "oldKey";
     const BSONObj kOriginalShardKeyPattern = BSON(kOriginalShardKey << 1);
     const std::string kReshardingKey = "newKey";
@@ -111,8 +111,10 @@ protected:
                                            const OID& epoch,
                                            const ShardId& shardThatChunkExistsOn) {
         auto range = ChunkRange(BSON(shardKey << MINKEY), BSON(shardKey << MAXKEY));
-        auto chunk = ChunkType(
-            uuid, std::move(range), ChunkVersion(1, 0, epoch, timestamp), shardThatChunkExistsOn);
+        auto chunk = ChunkType(uuid,
+                               std::move(range),
+                               ChunkVersion({epoch, timestamp}, {1, 0}),
+                               shardThatChunkExistsOn);
         ChunkManager cm(kThisShard.getShardId(),
                         DatabaseVersion(uuid, timestamp),
                         makeStandaloneRoutingTableHistory(
@@ -133,17 +135,18 @@ protected:
         return CollectionMetadata(std::move(cm), kThisShard.getShardId());
     }
 
-    ReshardingDonorDocument makeDonorStateDoc() {
+    ReshardingDonorDocument makeDonorStateDoc(NamespaceString sourceNss,
+                                              NamespaceString tempReshardingNss,
+                                              BSONObj reshardingKey,
+                                              std::vector<mongo::ShardId> recipientShards) {
         DonorShardContext donorCtx;
         donorCtx.setState(DonorStateEnum::kPreparingToDonate);
 
-        ReshardingDonorDocument doc(std::move(donorCtx),
-                                    {kThisShard.getShardId(), kOtherShard.getShardId()});
+        ReshardingDonorDocument doc(std::move(donorCtx), recipientShards);
 
-        NamespaceString sourceNss = kOriginalNss;
         auto sourceUUID = UUID::gen();
         auto commonMetadata = CommonReshardingMetadata(
-            UUID::gen(), sourceNss, sourceUUID, kTemporaryReshardingNss, kReshardingKeyPattern);
+            UUID::gen(), sourceNss, sourceUUID, tempReshardingNss, reshardingKey);
 
         doc.setCommonReshardingMetadata(std::move(commonMetadata));
         return doc;
@@ -194,7 +197,7 @@ protected:
         const boost::optional<Timestamp>& cloneTimestamp = boost::none) {
         auto recipientFields =
             TypeCollectionRecipientFields(donorShards, existingUUID, originalNss, 5000);
-        emplaceCloneTimestampIfExists(recipientFields, cloneTimestamp);
+        resharding::emplaceCloneTimestampIfExists(recipientFields, cloneTimestamp);
         fields.setRecipientFields(std::move(recipientFields));
     }
 
@@ -262,6 +265,19 @@ protected:
         ASSERT(donorShardMap.empty());
     }
 
+    void addFilteringMetadata(OperationContext* opCtx, NamespaceString sourceNss, ShardId shardId) {
+        AutoGetCollection autoColl(opCtx, sourceNss, LockMode::MODE_IS);
+        const auto metadata{makeShardedMetadataForOriginalCollection(opCtx, shardId)};
+        ScopedSetShardRole scopedSetShardRole{opCtx,
+                                              sourceNss,
+                                              metadata.getShardVersion() /* shardVersion */,
+                                              boost::none /* databaseVersion */};
+
+        auto csr = CollectionShardingRuntime::get(opCtx, sourceNss);
+        csr->setFilteringMetadata(opCtx, metadata);
+        ASSERT(csr->getCurrentMetadataIfKnown());
+    }
+
 private:
     DonorShardFetchTimestamp makeDonorShardFetchTimestamp(
         ShardId shardId, boost::optional<Timestamp> fetchTimestamp) {
@@ -553,34 +569,10 @@ TEST_F(ReshardingDonorRecipientCommonInternalsTest, ClearReshardingFilteringMeta
         }
 
         // Add filtering metadata for the collection being resharded.
-        {
-            AutoGetCollection autoColl(opCtx, kOriginalNss, LockMode::MODE_IS);
-            const auto metadata{
-                makeShardedMetadataForOriginalCollection(opCtx, kThisShard.getShardId())};
-            ScopedSetShardRole scopedSetShardRole{opCtx,
-                                                  kOriginalNss,
-                                                  metadata.getShardVersion() /* shardVersion */,
-                                                  boost::none /* databaseVersion */};
-
-            auto csr = CollectionShardingRuntime::get(opCtx, kOriginalNss);
-            csr->setFilteringMetadata(opCtx, metadata);
-            ASSERT(csr->getCurrentMetadataIfKnown());
-        }
+        addFilteringMetadata(opCtx, kOriginalNss, kThisShard.getShardId());
 
         // Add filtering metadata for the temporary resharding namespace.
-        {
-            AutoGetCollection autoColl(opCtx, kTemporaryReshardingNss, LockMode::MODE_IS);
-            const auto metadata{makeShardedMetadataForTemporaryReshardingCollection(
-                opCtx, kThisShard.getShardId())};
-            ScopedSetShardRole scopedSetShardRole{opCtx,
-                                                  kTemporaryReshardingNss,
-                                                  metadata.getShardVersion() /* shardVersion */,
-                                                  boost::none /* databaseVersion */};
-
-            auto csr = CollectionShardingRuntime::get(opCtx, kTemporaryReshardingNss);
-            csr->setFilteringMetadata(opCtx, metadata);
-            ASSERT(csr->getCurrentMetadataIfKnown());
-        }
+        addFilteringMetadata(opCtx, kTemporaryReshardingNss, kThisShard.getShardId());
 
         // Prior to adding a resharding document, assert that attempting to clear filtering does
         // nothing.
@@ -595,7 +587,11 @@ TEST_F(ReshardingDonorRecipientCommonInternalsTest, ClearReshardingFilteringMeta
 
     doSetupFunc();
     // Add a resharding donor document that targets the namespaces involved in resharding.
-    ReshardingDonorDocument donorDoc = makeDonorStateDoc();
+    ReshardingDonorDocument donorDoc =
+        makeDonorStateDoc(kOriginalNss,
+                          kTemporaryReshardingNss,
+                          kReshardingKeyPattern,
+                          {kThisShard.getShardId(), kOtherShard.getShardId()});
     ReshardingDonorService::DonorStateMachine::insertStateDocument(opCtx, donorDoc);
 
     // Clear the filtering metadata (without scheduling a refresh) and assert the metadata is gone.
@@ -622,5 +618,49 @@ TEST_F(ReshardingDonorRecipientCommonInternalsTest, ClearReshardingFilteringMeta
     }
 }
 
+TEST_F(ReshardingDonorRecipientCommonInternalsTest, ClearReshardingFilteringMetaDataForActiveOp) {
+    OperationContext* opCtx = operationContext();
+    NamespaceString sourceNss1 = NamespaceString("db", "one");
+    NamespaceString tempReshardingNss1 =
+        resharding::constructTemporaryReshardingNss(sourceNss1.db(), UUID::gen());
+    NamespaceString sourceNss2 = NamespaceString("db", "two");
+    NamespaceString tempReshardingNss2 =
+        resharding::constructTemporaryReshardingNss(sourceNss2.db(), UUID::gen());
+    ShardId shardId1 = ShardId{"recipient1"};
+    ShardId shardId2 = ShardId{"recipient2"};
+    ReshardingDonorDocument doc1 =
+        makeDonorStateDoc(sourceNss1, tempReshardingNss1, BSON("newKey1" << 1), {shardId1});
+    ReshardingDonorDocument doc2 =
+        makeDonorStateDoc(sourceNss2, tempReshardingNss2, BSON("newKey2" << 1), {shardId2});
+
+    ReshardingDonorService::DonorStateMachine::insertStateDocument(opCtx, doc1);
+    ReshardingDonorService::DonorStateMachine::insertStateDocument(opCtx, doc2);
+
+    // Add filtering metadata for the collection being resharded.
+    addFilteringMetadata(opCtx, sourceNss1, {shardId1});
+    addFilteringMetadata(opCtx, sourceNss2, {shardId2});
+
+    // Add filtering metadata for the temporary resharding namespace.
+    addFilteringMetadata(opCtx, tempReshardingNss1, {shardId1});
+    addFilteringMetadata(opCtx, tempReshardingNss2, {shardId2});
+
+    // Clear the filtering metadata (without scheduling a refresh) for only on single operation
+    // related namespaces
+    resharding::clearFilteringMetadata(opCtx, {sourceNss1, tempReshardingNss1}, false);
+
+    for (auto const& nss : {sourceNss1, tempReshardingNss1}) {
+        AutoGetCollection autoColl(opCtx, nss, LockMode::MODE_IS);
+        auto csr = CollectionShardingRuntime::get(opCtx, nss);
+        ASSERT(csr->getCurrentMetadataIfKnown() == boost::none);
+    }
+
+    // Assert that the filtering metadata is not cleared for other operation
+    for (auto const& nss : {sourceNss2, tempReshardingNss2}) {
+        AutoGetCollection autoColl(opCtx, nss, LockMode::MODE_IS);
+        auto csr = CollectionShardingRuntime::get(opCtx, nss);
+        ASSERT(csr->getCurrentMetadataIfKnown() != boost::none);
+    }
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.cpp b/src/mongo/db/s/resharding/resharding_donor_service.cpp
index 7f870033a1f..40b1f17f179 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_service.cpp
@@ -178,13 +178,17 @@ public:
         }
     }
 
-    void clearFilteringMetadata(OperationContext* opCtx) {
-        resharding::clearFilteringMetadata(opCtx, true /* scheduleAsyncRefresh */);
+    void clearFilteringMetadata(OperationContext* opCtx,
+                                const NamespaceString& sourceNss,
+                                const NamespaceString& tempReshardingNss) {
+        stdx::unordered_set<NamespaceString> namespacesToRefresh{sourceNss, tempReshardingNss};
+        resharding::clearFilteringMetadata(
+            opCtx, namespacesToRefresh, true /* scheduleAsyncRefresh */);
     }
 };
 
-ReshardingMetricsNew::DonorState toMetricsState(DonorStateEnum state) {
-    return ReshardingMetricsNew::DonorState(state);
+ReshardingMetrics::DonorState toMetricsState(DonorStateEnum state) {
+    return ReshardingMetrics::DonorState(state);
 }
 
 }  // namespace
@@ -209,7 +213,7 @@ ReshardingDonorService::DonorStateMachine::DonorStateMachine(
     std::unique_ptr<DonorStateMachineExternalState> externalState)
     : repl::PrimaryOnlyService::TypedInstance<DonorStateMachine>(),
       _donorService(donorService),
-      _metricsNew{ReshardingMetricsNew::initializeFrom(donorDoc, getGlobalServiceContext())},
+      _metrics{ReshardingMetrics::initializeFrom(donorDoc, getGlobalServiceContext())},
       _metadata{donorDoc.getCommonReshardingMetadata()},
       _recipientShardIds{donorDoc.getRecipientShards()},
       _donorCtx{donorDoc.getMutableState()},
@@ -233,7 +237,7 @@ ReshardingDonorService::DonorStateMachine::DonorStateMachine(
       }()) {
     invariant(_externalState);
 
-    _metricsNew->onStateTransition(boost::none, toMetricsState(_donorCtx.getState()));
+    _metrics->onStateTransition(boost::none, toMetricsState(_donorCtx.getState()));
 }
 
 ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_runUntilBlockingWritesOrErrored(
@@ -375,8 +379,8 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin
 
                {
                    auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
-
-                   _externalState->clearFilteringMetadata(opCtx.get());
+                   _externalState->clearFilteringMetadata(
+                       opCtx.get(), _metadata.getSourceNss(), _metadata.getTempReshardingNss());
 
                    RecoverableCriticalSectionService::get(opCtx.get())
                        ->releaseRecoverableCriticalSection(
@@ -385,7 +389,7 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin
                            _critSecReason,
                            ShardingCatalogClient::kLocalWriteConcern);
 
-                   _metricsNew->onCriticalSectionEnd();
+                   _metrics->onCriticalSectionEnd();
                }
 
                auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
@@ -410,6 +414,14 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin
 
 Status ReshardingDonorService::DonorStateMachine::_runMandatoryCleanup(
     Status status, const CancellationToken& stepdownToken) {
+    _metrics->onStateTransition(toMetricsState(_donorCtx.getState()), boost::none);
+
+    // Destroy metrics early so it's lifetime will not be tied to the lifetime of this state
+    // machine. This is because we have future callbacks copy shared pointers to this state machine
+    // that causes it to live longer than expected and potentially overlap with a newer instance
+    // when stepping up.
+    _metrics.reset();
+
     if (!status.isOK()) {
         // If the stepdownToken was triggered, it takes priority in order to make sure that
         // the promise is set with an error that can be retried with. If it ran into an
@@ -427,8 +439,6 @@ Status ReshardingDonorService::DonorStateMachine::_runMandatoryCleanup(
         ensureFulfilledPromise(lk, _completionPromise, statusForPromise);
     }
 
-    _metricsNew->onStateTransition(toMetricsState(_donorCtx.getState()), boost::none);
-
     return status;
 }
 
@@ -493,7 +503,7 @@ void ReshardingDonorService::DonorStateMachine::interrupt(Status status) {}
 boost::optional<BSONObj> ReshardingDonorService::DonorStateMachine::reportForCurrentOp(
     MongoProcessInterface::CurrentOpConnectionsMode connMode,
     MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-    return _metricsNew->reportForCurrentOp();
+    return _metrics->reportForCurrentOp();
 }
 
 void ReshardingDonorService::DonorStateMachine::onReshardingFieldsChanges(
@@ -521,11 +531,11 @@ void ReshardingDonorService::DonorStateMachine::onReshardingFieldsChanges(
 }
 
 void ReshardingDonorService::DonorStateMachine::onWriteDuringCriticalSection() {
-    _metricsNew->onWriteDuringCriticalSection();
+    _metrics->onWriteDuringCriticalSection();
 }
 
 void ReshardingDonorService::DonorStateMachine::onReadDuringCriticalSection() {
-    _metricsNew->onReadDuringCriticalSection();
+    _metrics->onReadDuringCriticalSection();
 }
 
 SharedSemiFuture<void> ReshardingDonorService::DonorStateMachine::awaitCriticalSectionAcquired() {
@@ -690,7 +700,7 @@ void ReshardingDonorService::DonorStateMachine::
                 _critSecReason,
                 ShardingCatalogClient::kLocalWriteConcern);
 
-        _metricsNew->onCriticalSectionBegin();
+        _metrics->onCriticalSectionBegin();
     }
 
     {
@@ -711,7 +721,7 @@ void ReshardingDonorService::DonorStateMachine::
             oplog.setObject(
                 BSON("msg" << fmt::format("Writes to {} are temporarily blocked for resharding.",
                                           _metadata.getSourceNss().toString())));
-            oplog.setObject2(BSON("type" << kReshardFinalOpLogType << "reshardingUUID"
+            oplog.setObject2(BSON("type" << resharding::kReshardFinalOpLogType << "reshardingUUID"
                                          << _metadata.getReshardingUUID()));
             oplog.setOpTime(OplogSlot());
             oplog.setWallClockTime(opCtx->getServiceContext()->getFastClockSource()->now());
@@ -828,7 +838,7 @@ void ReshardingDonorService::DonorStateMachine::_transitionState(DonorShardConte
 
     _updateDonorDocument(std::move(newDonorCtx));
 
-    _metricsNew->onStateTransition(toMetricsState(oldState), toMetricsState(newState));
+    _metrics->onStateTransition(toMetricsState(oldState), toMetricsState(newState));
 
     LOGV2_INFO(5279505,
                "Transitioned resharding donor state",
@@ -852,7 +862,7 @@ void ReshardingDonorService::DonorStateMachine::_transitionToDonatingInitialData
 void ReshardingDonorService::DonorStateMachine::_transitionToError(Status abortReason) {
     auto newDonorCtx = _donorCtx;
     newDonorCtx.setState(DonorStateEnum::kError);
-    emplaceTruncatedAbortReasonIfExists(newDonorCtx, abortReason);
+    resharding::emplaceTruncatedAbortReasonIfExists(newDonorCtx, abortReason);
     _transitionState(std::move(newDonorCtx));
 }
 
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.h b/src/mongo/db/s/resharding/resharding_donor_service.h
index f2f4d99d2e8..3f3d88965db 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.h
+++ b/src/mongo/db/s/resharding/resharding_donor_service.h
@@ -32,7 +32,7 @@
 #include "mongo/db/cancelable_operation_context.h"
 #include "mongo/db/repl/primary_only_service.h"
 #include "mongo/db/s/resharding/donor_document_gen.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/s/resharding/type_collection_fields_gen.h"
 
 namespace mongo {
@@ -218,7 +218,7 @@ private:
     // The primary-only service instance corresponding to the donor instance. Not owned.
     const ReshardingDonorService* const _donorService;
 
-    std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+    std::unique_ptr<ReshardingMetrics> _metrics;
 
     // The in-memory representation of the immutable portion of the document in
     // config.localReshardingOperations.donor.
@@ -297,7 +297,9 @@ public:
                                            const BSONObj& query,
                                            const BSONObj& update) = 0;
 
-    virtual void clearFilteringMetadata(OperationContext* opCtx) = 0;
+    virtual void clearFilteringMetadata(OperationContext* opCtx,
+                                        const NamespaceString& sourceNss,
+                                        const NamespaceString& tempReshardingNss) = 0;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_donor_service_test.cpp b/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
index 0f40919d14d..4d83cfe5e44 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
@@ -85,7 +85,9 @@ public:
                                    const BSONObj& query,
                                    const BSONObj& update) override {}
 
-    void clearFilteringMetadata(OperationContext* opCtx) override {}
+    void clearFilteringMetadata(OperationContext* opCtx,
+                                const NamespaceString& sourceNss,
+                                const NamespaceString& tempReshardingNss) override {}
 };
 
 class DonorOpObserverForTest : public OpObserverForTest {
@@ -148,12 +150,12 @@ public:
 
         NamespaceString sourceNss("sourcedb.sourcecollection");
         auto sourceUUID = UUID::gen();
-        auto commonMetadata =
-            CommonReshardingMetadata(UUID::gen(),
-                                     sourceNss,
-                                     sourceUUID,
-                                     constructTemporaryReshardingNss(sourceNss.db(), sourceUUID),
-                                     BSON("newKey" << 1));
+        auto commonMetadata = CommonReshardingMetadata(
+            UUID::gen(),
+            sourceNss,
+            sourceUUID,
+            resharding::constructTemporaryReshardingNss(sourceNss.db(), sourceUUID),
+            BSON("newKey" << 1));
         commonMetadata.setStartTime(getServiceContext()->getFastClockSource()->now());
 
         doc.setCommonReshardingMetadata(std::move(commonMetadata));
@@ -348,7 +350,7 @@ TEST_F(ReshardingDonorServiceTest, WritesFinalReshardOpOplogEntriesWhileWritesBl
 
     DBDirectClient client(opCtx.get());
     FindCommandRequest findRequest{NamespaceString::kRsOplogNamespace};
-    findRequest.setFilter(BSON("o2.type" << kReshardFinalOpLogType));
+    findRequest.setFilter(BSON("o2.type" << resharding::kReshardFinalOpLogType));
     auto cursor = client.find(std::move(findRequest));
 
     ASSERT_TRUE(cursor->more()) << "Found no oplog entries for source collection";
@@ -710,7 +712,7 @@ TEST_F(ReshardingDonorServiceTest, TruncatesXLErrorOnDonorDocument) {
             // to the primitive truncation algorithm - Check that the total size is less than
             // kReshardErrorMaxBytes + a couple additional bytes to provide a buffer for the field
             // name sizes.
-            int maxReshardErrorBytesCeiling = kReshardErrorMaxBytes + 200;
+            int maxReshardErrorBytesCeiling = resharding::kReshardErrorMaxBytes + 200;
             ASSERT_LT(persistedAbortReasonBSON->objsize(), maxReshardErrorBytesCeiling);
             ASSERT_EQ(persistedAbortReasonBSON->getIntField("code"),
                       ErrorCodes::ReshardCollectionTruncatedError);
diff --git a/src/mongo/db/s/resharding/resharding_manual_cleanup.cpp b/src/mongo/db/s/resharding/resharding_manual_cleanup.cpp
index 9c2b78385fa..74911c8518f 100644
--- a/src/mongo/db/s/resharding/resharding_manual_cleanup.cpp
+++ b/src/mongo/db/s/resharding/resharding_manual_cleanup.cpp
@@ -48,8 +48,9 @@ namespace {
 std::vector<ShardId> getAllParticipantsFromCoordDoc(const ReshardingCoordinatorDocument& doc) {
     std::vector<ShardId> participants;
 
-    auto donorShards = extractShardIdsFromParticipantEntriesAsSet(doc.getDonorShards());
-    auto recipientShards = extractShardIdsFromParticipantEntriesAsSet(doc.getRecipientShards());
+    auto donorShards = resharding::extractShardIdsFromParticipantEntriesAsSet(doc.getDonorShards());
+    auto recipientShards =
+        resharding::extractShardIdsFromParticipantEntriesAsSet(doc.getRecipientShards());
     std::set_union(donorShards.begin(),
                    donorShards.end(),
                    recipientShards.begin(),
diff --git a/src/mongo/db/s/resharding/resharding_metrics_new.cpp b/src/mongo/db/s/resharding/resharding_metrics.cpp
index e07468ad1b9..610ef970475 100644
--- a/src/mongo/db/s/resharding/resharding_metrics_new.cpp
+++ b/src/mongo/db/s/resharding/resharding_metrics.cpp
@@ -26,15 +26,15 @@
  *    exception statement from all source files in the program, then also delete
  *    it in the license file.
  */
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/exec/document_value/document.h"
 
 
 namespace mongo {
 namespace {
 
-inline ReshardingMetricsNew::State getDefaultState(ReshardingMetricsNew::Role role) {
-    using Role = ReshardingMetricsNew::Role;
+inline ReshardingMetrics::State getDefaultState(ReshardingMetrics::Role role) {
+    using Role = ReshardingMetrics::Role;
     switch (role) {
         case Role::kCoordinator:
             return CoordinatorStateEnum::kUnused;
@@ -70,14 +70,13 @@ Date_t readStartTime(const CommonReshardingMetadata& metadata, ClockSource* fall
 
 }  // namespace
 
-ReshardingMetricsNew::ReshardingMetricsNew(
-    UUID instanceId,
-    BSONObj shardKey,
-    NamespaceString nss,
-    Role role,
-    Date_t startTime,
-    ClockSource* clockSource,
-    ShardingDataTransformCumulativeMetrics* cumulativeMetrics)
+ReshardingMetrics::ReshardingMetrics(UUID instanceId,
+                                     BSONObj shardKey,
+                                     NamespaceString nss,
+                                     Role role,
+                                     Date_t startTime,
+                                     ClockSource* clockSource,
+                                     ShardingDataTransformCumulativeMetrics* cumulativeMetrics)
     : ShardingDataTransformInstanceMetrics{std::move(instanceId),
                                            createOriginalCommand(nss, std::move(shardKey)),
                                            nss,
@@ -87,44 +86,42 @@ ReshardingMetricsNew::ReshardingMetricsNew(
                                            cumulativeMetrics},
       _state{getDefaultState(role)} {}
 
-ReshardingMetricsNew::ReshardingMetricsNew(
-    const CommonReshardingMetadata& metadata,
-    Role role,
-    ClockSource* clockSource,
-    ShardingDataTransformCumulativeMetrics* cumulativeMetrics)
-    : ReshardingMetricsNew{metadata.getReshardingUUID(),
-                           metadata.getReshardingKey().toBSON(),
-                           metadata.getSourceNss(),
-                           role,
-                           readStartTime(metadata, clockSource),
-                           clockSource,
-                           cumulativeMetrics} {}
-
-std::string ReshardingMetricsNew::createOperationDescription() const noexcept {
+ReshardingMetrics::ReshardingMetrics(const CommonReshardingMetadata& metadata,
+                                     Role role,
+                                     ClockSource* clockSource,
+                                     ShardingDataTransformCumulativeMetrics* cumulativeMetrics)
+    : ReshardingMetrics{metadata.getReshardingUUID(),
+                        metadata.getReshardingKey().toBSON(),
+                        metadata.getSourceNss(),
+                        role,
+                        readStartTime(metadata, clockSource),
+                        clockSource,
+                        cumulativeMetrics} {}
+
+std::string ReshardingMetrics::createOperationDescription() const noexcept {
     return fmt::format("ReshardingMetrics{}Service {}",
                        ShardingDataTransformMetrics::getRoleName(_role),
                        _instanceId.toString());
 }
 
-std::unique_ptr<ReshardingMetricsNew> ReshardingMetricsNew::makeInstance(
-    UUID instanceId,
-    BSONObj shardKey,
-    NamespaceString nss,
-    Role role,
-    Date_t startTime,
-    ServiceContext* serviceContext) {
+std::unique_ptr<ReshardingMetrics> ReshardingMetrics::makeInstance(UUID instanceId,
+                                                                   BSONObj shardKey,
+                                                                   NamespaceString nss,
+                                                                   Role role,
+                                                                   Date_t startTime,
+                                                                   ServiceContext* serviceContext) {
     auto cumulativeMetrics =
         ShardingDataTransformCumulativeMetrics::getForResharding(serviceContext);
-    return std::make_unique<ReshardingMetricsNew>(instanceId,
-                                                  createOriginalCommand(nss, std::move(shardKey)),
-                                                  std::move(nss),
-                                                  role,
-                                                  startTime,
-                                                  serviceContext->getFastClockSource(),
-                                                  cumulativeMetrics);
+    return std::make_unique<ReshardingMetrics>(instanceId,
+                                               createOriginalCommand(nss, std::move(shardKey)),
+                                               std::move(nss),
+                                               role,
+                                               startTime,
+                                               serviceContext->getFastClockSource(),
+                                               cumulativeMetrics);
 }
 
-StringData ReshardingMetricsNew::getStateString() const noexcept {
+StringData ReshardingMetrics::getStateString() const noexcept {
     return stdx::visit(
         visit_helper::Overloaded{
             [](CoordinatorStateEnum state) { return CoordinatorState_serializer(state); },
@@ -133,7 +130,7 @@ StringData ReshardingMetricsNew::getStateString() const noexcept {
         _state.load());
 }
 
-void ReshardingMetricsNew::accumulateFrom(const ReshardingOplogApplierProgress& progressDoc) {
+void ReshardingMetrics::accumulateFrom(const ReshardingOplogApplierProgress& progressDoc) {
     invariant(_role == Role::kRecipient);
 
     accumulateValues(progressDoc.getInsertsApplied(),
@@ -142,7 +139,7 @@ void ReshardingMetricsNew::accumulateFrom(const ReshardingOplogApplierProgress&
                      progressDoc.getWritesToStashCollections());
 }
 
-void ReshardingMetricsNew::restoreRecipientSpecificFields(
+void ReshardingMetrics::restoreRecipientSpecificFields(
     const ReshardingRecipientDocument& document) {
     auto metrics = document.getMetrics();
     if (!metrics) {
@@ -161,14 +158,14 @@ void ReshardingMetricsNew::restoreRecipientSpecificFields(
     restorePhaseDurationFields(document);
 }
 
-void ReshardingMetricsNew::restoreCoordinatorSpecificFields(
+void ReshardingMetrics::restoreCoordinatorSpecificFields(
     const ReshardingCoordinatorDocument& document) {
     restorePhaseDurationFields(document);
 }
 
-ReshardingMetricsNew::DonorState::DonorState(DonorStateEnum enumVal) : _enumVal(enumVal) {}
+ReshardingMetrics::DonorState::DonorState(DonorStateEnum enumVal) : _enumVal(enumVal) {}
 
-ShardingDataTransformCumulativeMetrics::DonorStateEnum ReshardingMetricsNew::DonorState::toMetrics()
+ShardingDataTransformCumulativeMetrics::DonorStateEnum ReshardingMetrics::DonorState::toMetrics()
     const {
     using MetricsEnum = ShardingDataTransformCumulativeMetrics::DonorStateEnum;
 
@@ -204,15 +201,14 @@ ShardingDataTransformCumulativeMetrics::DonorStateEnum ReshardingMetricsNew::Don
     }
 }
 
-DonorStateEnum ReshardingMetricsNew::DonorState::getState() const {
+DonorStateEnum ReshardingMetrics::DonorState::getState() const {
     return _enumVal;
 }
 
-ReshardingMetricsNew::RecipientState::RecipientState(RecipientStateEnum enumVal)
-    : _enumVal(enumVal) {}
+ReshardingMetrics::RecipientState::RecipientState(RecipientStateEnum enumVal) : _enumVal(enumVal) {}
 
 ShardingDataTransformCumulativeMetrics::RecipientStateEnum
-ReshardingMetricsNew::RecipientState::toMetrics() const {
+ReshardingMetrics::RecipientState::toMetrics() const {
     using MetricsEnum = ShardingDataTransformCumulativeMetrics::RecipientStateEnum;
 
     switch (_enumVal) {
@@ -248,15 +244,15 @@ ReshardingMetricsNew::RecipientState::toMetrics() const {
     }
 }
 
-RecipientStateEnum ReshardingMetricsNew::RecipientState::getState() const {
+RecipientStateEnum ReshardingMetrics::RecipientState::getState() const {
     return _enumVal;
 }
 
-ReshardingMetricsNew::CoordinatorState::CoordinatorState(CoordinatorStateEnum enumVal)
+ReshardingMetrics::CoordinatorState::CoordinatorState(CoordinatorStateEnum enumVal)
     : _enumVal(enumVal) {}
 
 ShardingDataTransformCumulativeMetrics::CoordinatorStateEnum
-ReshardingMetricsNew::CoordinatorState::toMetrics() const {
+ReshardingMetrics::CoordinatorState::toMetrics() const {
     switch (_enumVal) {
         case CoordinatorStateEnum::kUnused:
             return ShardingDataTransformCumulativeMetrics::CoordinatorStateEnum::kUnused;
@@ -292,7 +288,7 @@ ReshardingMetricsNew::CoordinatorState::toMetrics() const {
     }
 }
 
-CoordinatorStateEnum ReshardingMetricsNew::CoordinatorState::getState() const {
+CoordinatorStateEnum ReshardingMetrics::CoordinatorState::getState() const {
     return _enumVal;
 }
 
diff --git a/src/mongo/db/s/resharding/resharding_metrics_new.h b/src/mongo/db/s/resharding/resharding_metrics.h
index b8e96698b0d..a1faa5a96da 100644
--- a/src/mongo/db/s/resharding/resharding_metrics_new.h
+++ b/src/mongo/db/s/resharding/resharding_metrics.h
@@ -38,7 +38,7 @@
 
 namespace mongo {
 
-class ReshardingMetricsNew : public ShardingDataTransformInstanceMetrics {
+class ReshardingMetrics : public ShardingDataTransformInstanceMetrics {
 public:
     using State = stdx::variant<CoordinatorStateEnum, RecipientStateEnum, DonorStateEnum>;
 
@@ -78,24 +78,24 @@ public:
         CoordinatorStateEnum _enumVal;
     };
 
-    ReshardingMetricsNew(UUID instanceId,
-                         BSONObj shardKey,
-                         NamespaceString nss,
-                         Role role,
-                         Date_t startTime,
-                         ClockSource* clockSource,
-                         ShardingDataTransformCumulativeMetrics* cumulativeMetrics);
-    ReshardingMetricsNew(const CommonReshardingMetadata& metadata,
-                         Role role,
-                         ClockSource* clockSource,
-                         ShardingDataTransformCumulativeMetrics* cumulativeMetrics);
-
-    static std::unique_ptr<ReshardingMetricsNew> makeInstance(UUID instanceId,
-                                                              BSONObj shardKey,
-                                                              NamespaceString nss,
-                                                              Role role,
-                                                              Date_t startTime,
-                                                              ServiceContext* serviceContext);
+    ReshardingMetrics(UUID instanceId,
+                      BSONObj shardKey,
+                      NamespaceString nss,
+                      Role role,
+                      Date_t startTime,
+                      ClockSource* clockSource,
+                      ShardingDataTransformCumulativeMetrics* cumulativeMetrics);
+    ReshardingMetrics(const CommonReshardingMetadata& metadata,
+                      Role role,
+                      ClockSource* clockSource,
+                      ShardingDataTransformCumulativeMetrics* cumulativeMetrics);
+
+    static std::unique_ptr<ReshardingMetrics> makeInstance(UUID instanceId,
+                                                           BSONObj shardKey,
+                                                           NamespaceString nss,
+                                                           Role role,
+                                                           Date_t startTime,
+                                                           ServiceContext* serviceContext);
 
     template <typename T>
     static auto initializeFrom(const T& document,
@@ -103,10 +103,10 @@ public:
                                ShardingDataTransformCumulativeMetrics* cumulativeMetrics) {
         static_assert(resharding_metrics::isStateDocument<T>);
         auto result =
-            std::make_unique<ReshardingMetricsNew>(document.getCommonReshardingMetadata(),
-                                                   resharding_metrics::getRoleForStateDocument<T>(),
-                                                   clockSource,
-                                                   cumulativeMetrics);
+            std::make_unique<ReshardingMetrics>(document.getCommonReshardingMetadata(),
+                                                resharding_metrics::getRoleForStateDocument<T>(),
+                                                clockSource,
+                                                cumulativeMetrics);
         result->setState(resharding_metrics::getState(document));
         result->restoreRoleSpecificFields(document);
         return result;
diff --git a/src/mongo/db/s/resharding/resharding_metrics_new_test.cpp b/src/mongo/db/s/resharding/resharding_metrics_test.cpp
index 82bcba56d43..e57581cf8dd 100644
--- a/src/mongo/db/s/resharding/resharding_metrics_new_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_metrics_test.cpp
@@ -30,7 +30,7 @@
 
 #include "mongo/platform/basic.h"
 
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_service_test_helpers.h"
 #include "mongo/db/s/resharding/resharding_util.h"
 #include "mongo/db/s/sharding_data_transform_cumulative_metrics.h"
@@ -49,16 +49,16 @@ const auto kShardKey = BSON("newKey" << 1);
 class ReshardingMetricsTest : public ShardingDataTransformMetricsTestFixture {
 
 public:
-    std::unique_ptr<ReshardingMetricsNew> createInstanceMetrics(ClockSource* clockSource,
-                                                                UUID instanceId = UUID::gen(),
-                                                                Role role = Role::kDonor) {
-        return std::make_unique<ReshardingMetricsNew>(instanceId,
-                                                      BSON("y" << 1),
-                                                      kTestNamespace,
-                                                      role,
-                                                      clockSource->now(),
-                                                      clockSource,
-                                                      &_cumulativeMetrics);
+    std::unique_ptr<ReshardingMetrics> createInstanceMetrics(ClockSource* clockSource,
+                                                             UUID instanceId = UUID::gen(),
+                                                             Role role = Role::kDonor) {
+        return std::make_unique<ReshardingMetrics>(instanceId,
+                                                   BSON("y" << 1),
+                                                   kTestNamespace,
+                                                   role,
+                                                   clockSource->now(),
+                                                   clockSource,
+                                                   &_cumulativeMetrics);
     }
 
     const UUID& getSourceCollectionId() {
@@ -69,7 +69,7 @@ public:
     template <typename T>
     BSONObj getReportFromStateDocument(T document) {
         auto metrics =
-            ReshardingMetricsNew::initializeFrom(document, getClockSource(), &_cumulativeMetrics);
+            ReshardingMetrics::initializeFrom(document, getClockSource(), &_cumulativeMetrics);
         return metrics->reportForCurrentOp();
     }
 
@@ -98,12 +98,12 @@ public:
     }
 
     CommonReshardingMetadata createCommonReshardingMetadata(const UUID& operationId) {
-        CommonReshardingMetadata metadata{
-            operationId,
-            kTestNamespace,
-            getSourceCollectionId(),
-            constructTemporaryReshardingNss(kTestNamespace.db(), getSourceCollectionId()),
-            kShardKey};
+        CommonReshardingMetadata metadata{operationId,
+                                          kTestNamespace,
+                                          getSourceCollectionId(),
+                                          resharding::constructTemporaryReshardingNss(
+                                              kTestNamespace.db(), getSourceCollectionId()),
+                                          kShardKey};
         metadata.setStartTime(getClockSource()->now() - kRunningTime);
         return metadata;
     }
@@ -169,7 +169,7 @@ public:
         doc.setMetrics(metricsDoc);
 
         auto metrics =
-            ReshardingMetricsNew::initializeFrom(doc, getClockSource(), &_cumulativeMetrics);
+            ReshardingMetrics::initializeFrom(doc, getClockSource(), &_cumulativeMetrics);
 
         clock->advance(kInterval);
         auto report = metrics->reportForCurrentOp();
diff --git a/src/mongo/db/s/resharding/resharding_oplog_application.cpp b/src/mongo/db/s/resharding/resharding_oplog_application.cpp
index 1478a3ec30c..9a643ef819e 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_application.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_application.cpp
@@ -252,7 +252,7 @@ void ReshardingOplogApplicationRules::_applyInsert_inlock(OperationContext* opCt
 
     // First, query the conflict stash collection using [op _id] as the query. If a doc exists,
     // apply rule #1 and run a replacement update on the stash collection.
-    auto stashCollDoc = _queryStashCollById(opCtx, db, stashColl, idQuery);
+    auto stashCollDoc = _queryStashCollById(opCtx, stashColl, idQuery);
     if (!stashCollDoc.isEmpty()) {
         auto request = UpdateRequest();
         request.setNamespaceString(_myStashNss);
@@ -348,7 +348,7 @@ void ReshardingOplogApplicationRules::_applyUpdate_inlock(OperationContext* opCt
 
     // First, query the conflict stash collection using [op _id] as the query. If a doc exists,
     // apply rule #1 and update the doc from the stash collection.
-    auto stashCollDoc = _queryStashCollById(opCtx, db, stashColl, idQuery);
+    auto stashCollDoc = _queryStashCollById(opCtx, stashColl, idQuery);
     if (!stashCollDoc.isEmpty()) {
         auto request = UpdateRequest();
         request.setNamespaceString(_myStashNss);
@@ -430,7 +430,7 @@ void ReshardingOplogApplicationRules::_applyDelete_inlock(OperationContext* opCt
 
     // First, query the conflict stash collection using [op _id] as the query. If a doc exists,
     // apply rule #1 and delete the doc from the stash collection.
-    auto stashCollDoc = _queryStashCollById(opCtx, db, stashColl, idQuery);
+    auto stashCollDoc = _queryStashCollById(opCtx, stashColl, idQuery);
     if (!stashCollDoc.isEmpty()) {
         auto nDeleted = deleteObjects(opCtx, stashColl, _myStashNss, idQuery, true /* justOne */);
         invariant(nDeleted != 0);
@@ -543,7 +543,6 @@ void ReshardingOplogApplicationRules::_applyDelete_inlock(OperationContext* opCt
 }
 
 BSONObj ReshardingOplogApplicationRules::_queryStashCollById(OperationContext* opCtx,
-                                                             Database* db,
                                                              const CollectionPtr& coll,
                                                              const BSONObj& idQuery) const {
     const IndexCatalog* indexCatalog = coll->getIndexCatalog();
@@ -552,7 +551,7 @@ BSONObj ReshardingOplogApplicationRules::_queryStashCollById(OperationContext* o
             indexCatalog->haveIdIndex(opCtx));
 
     BSONObj result;
-    Helpers::findById(opCtx, db, _myStashNss.ns(), idQuery, result);
+    Helpers::findById(opCtx, _myStashNss.ns(), idQuery, result);
     return result;
 }
 }  // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_oplog_application.h b/src/mongo/db/s/resharding/resharding_oplog_application.h
index b8bd3942b40..4e00a62a269 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_application.h
+++ b/src/mongo/db/s/resharding/resharding_oplog_application.h
@@ -96,7 +96,6 @@ private:
 
     // Queries '_stashNss' using 'idQuery'.
     BSONObj _queryStashCollById(OperationContext* opCtx,
-                                Database* db,
                                 const CollectionPtr& coll,
                                 const BSONObj& idQuery) const;
 
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
index cf449c4c00c..d9edf786371 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
@@ -271,7 +271,7 @@ NamespaceString ReshardingOplogApplier::ensureStashCollectionExists(
     const UUID& existingUUID,
     const ShardId& donorShardId,
     const CollectionOptions& options) {
-    auto nss = getLocalConflictStashNamespace(existingUUID, donorShardId);
+    auto nss = resharding::getLocalConflictStashNamespace(existingUUID, donorShardId);
 
     resharding::data_copy::ensureCollectionExists(opCtx, nss, options);
     return nss;
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier.h b/src/mongo/db/s/resharding/resharding_oplog_applier.h
index 56a7e9d3a0a..f1df65219cc 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier.h
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier.h
@@ -36,7 +36,7 @@
 #include "mongo/db/repl/oplog_entry.h"
 #include "mongo/db/s/resharding/donor_oplog_id_gen.h"
 #include "mongo/db/s/resharding/resharding_donor_oplog_iterator.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_oplog_application.h"
 #include "mongo/db/s/resharding/resharding_oplog_applier_progress_gen.h"
 #include "mongo/db/s/resharding/resharding_oplog_batch_applier.h"
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp
index 31bb6ca8dd6..7a474b7edf1 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp
@@ -34,8 +34,8 @@
 namespace mongo {
 
 ReshardingOplogApplierMetrics::ReshardingOplogApplierMetrics(
-    ReshardingMetricsNew* metricsNew, boost::optional<ReshardingOplogApplierProgress> progressDoc)
-    : _metricsNew(metricsNew) {
+    ReshardingMetrics* metrics, boost::optional<ReshardingOplogApplierProgress> progressDoc)
+    : _metrics(metrics) {
     if (progressDoc) {
         _insertsApplied = progressDoc->getInsertsApplied();
         _updatesApplied = progressDoc->getUpdatesApplied();
@@ -46,35 +46,35 @@ ReshardingOplogApplierMetrics::ReshardingOplogApplierMetrics(
 
 void ReshardingOplogApplierMetrics::onInsertApplied() {
     _insertsApplied++;
-    _metricsNew->onInsertApplied();
+    _metrics->onInsertApplied();
 }
 
 void ReshardingOplogApplierMetrics::onUpdateApplied() {
     _updatesApplied++;
-    _metricsNew->onUpdateApplied();
+    _metrics->onUpdateApplied();
 }
 
 void ReshardingOplogApplierMetrics::onDeleteApplied() {
     _deletesApplied++;
-    _metricsNew->onDeleteApplied();
+    _metrics->onDeleteApplied();
 }
 
 void ReshardingOplogApplierMetrics::onBatchRetrievedDuringOplogApplying(Milliseconds elapsed) {
-    _metricsNew->onBatchRetrievedDuringOplogApplying(elapsed);
+    _metrics->onBatchRetrievedDuringOplogApplying(elapsed);
 }
 
 void ReshardingOplogApplierMetrics::onOplogLocalBatchApplied(Milliseconds elapsed) {
-    _metricsNew->onOplogLocalBatchApplied(elapsed);
+    _metrics->onOplogLocalBatchApplied(elapsed);
 }
 
 void ReshardingOplogApplierMetrics::onOplogEntriesApplied(int64_t numEntries) {
     _oplogEntriesApplied += numEntries;
-    _metricsNew->onOplogEntriesApplied(numEntries);
+    _metrics->onOplogEntriesApplied(numEntries);
 }
 
 void ReshardingOplogApplierMetrics::onWriteToStashCollections() {
     _writesToStashCollections++;
-    _metricsNew->onWriteToStashedCollections();
+    _metrics->onWriteToStashedCollections();
 }
 
 int64_t ReshardingOplogApplierMetrics::getInsertsApplied() const {
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h
index 28830da1bfc..14347ce0b6b 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h
@@ -29,7 +29,7 @@
 
 #pragma once
 
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_oplog_applier_progress_gen.h"
 #include "mongo/util/duration.h"
 
@@ -40,7 +40,7 @@ namespace mongo {
  */
 class ReshardingOplogApplierMetrics {
 public:
-    ReshardingOplogApplierMetrics(ReshardingMetricsNew* metricsNew,
+    ReshardingOplogApplierMetrics(ReshardingMetrics* metrics,
                                   boost::optional<ReshardingOplogApplierProgress> progressDoc);
 
     void onInsertApplied();
@@ -59,7 +59,7 @@ public:
     int64_t getWritesToStashCollections() const;
 
 private:
-    ReshardingMetricsNew* _metricsNew;
+    ReshardingMetrics* _metrics;
     int64_t _insertsApplied{0};
     int64_t _updatesApplied{0};
     int64_t _deletesApplied{0};
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp
index 44ea5efb842..7c04439713a 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp
@@ -42,14 +42,14 @@ namespace {
 
 class ReshardingOplogApplierMetricsTest : public ShardingDataTransformMetricsTestFixture {
 public:
-    std::unique_ptr<ReshardingMetricsNew> createInstanceMetrics() {
-        return std::make_unique<ReshardingMetricsNew>(UUID::gen(),
-                                                      kTestCommand,
-                                                      kTestNamespace,
-                                                      ReshardingMetricsNew::Role::kRecipient,
-                                                      getClockSource()->now(),
-                                                      getClockSource(),
-                                                      &_cumulativeMetrics);
+    std::unique_ptr<ReshardingMetrics> createInstanceMetrics() {
+        return std::make_unique<ReshardingMetrics>(UUID::gen(),
+                                                   kTestCommand,
+                                                   kTestNamespace,
+                                                   ReshardingMetrics::Role::kRecipient,
+                                                   getClockSource()->now(),
+                                                   getClockSource(),
+                                                   &_cumulativeMetrics);
     }
 };
 
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp
index 0e3f5a87504..d2313684ff9 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include <fmt/format.h>
 
 #include "mongo/db/cancelable_operation_context.h"
@@ -64,7 +61,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -158,13 +154,12 @@ public:
 
         _cm = createChunkManagerForOriginalColl();
 
-        _metrics =
-            ReshardingMetricsNew::makeInstance(kCrudUUID,
-                                               BSON("y" << 1),
-                                               kCrudNs,
-                                               ReshardingMetricsNew::Role::kRecipient,
-                                               getServiceContext()->getFastClockSource()->now(),
-                                               getServiceContext());
+        _metrics = ReshardingMetrics::makeInstance(kCrudUUID,
+                                                   BSON("y" << 1),
+                                                   kCrudNs,
+                                                   ReshardingMetrics::Role::kRecipient,
+                                                   getServiceContext()->getFastClockSource()->now(),
+                                                   getServiceContext());
         _applierMetrics =
             std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none);
 
@@ -195,17 +190,17 @@ public:
                 kCrudUUID,
                 ChunkRange{BSON(kOriginalShardKey << MINKEY),
                            BSON(kOriginalShardKey << -std::numeric_limits<double>::infinity())},
-                ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+                ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
                 _sourceId.getShardId()},
             ChunkType{
                 kCrudUUID,
                 ChunkRange{BSON(kOriginalShardKey << -std::numeric_limits<double>::infinity()),
                            BSON(kOriginalShardKey << 0)},
-                ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+                ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
                 kOtherShardId},
             ChunkType{kCrudUUID,
                       ChunkRange{BSON(kOriginalShardKey << 0), BSON(kOriginalShardKey << MAXKEY)},
-                      ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+                      ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
                       _sourceId.getShardId()}};
 
         auto rt = RoutingTableHistory::makeNew(kCrudNs,
@@ -363,7 +358,7 @@ protected:
     boost::optional<ChunkManager> _cm;
 
     const ReshardingSourceId _sourceId{UUID::gen(), kMyShardId};
-    std::unique_ptr<ReshardingMetricsNew> _metrics;
+    std::unique_ptr<ReshardingMetrics> _metrics;
     std::unique_ptr<ReshardingOplogApplierMetrics> _applierMetrics;
 
     std::shared_ptr<executor::ThreadPoolTaskExecutor> _executor;
diff --git a/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp
index f8af8d80998..ca596e65e16 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include <boost/optional/optional_io.hpp>
 #include <memory>
 #include <vector>
@@ -46,7 +43,7 @@
 #include "mongo/db/repl/storage_interface_impl.h"
 #include "mongo/db/s/op_observer_sharding_impl.h"
 #include "mongo/db/s/resharding/resharding_data_copy_util.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_oplog_application.h"
 #include "mongo/db/s/resharding/resharding_oplog_batch_applier.h"
 #include "mongo/db/s/resharding/resharding_oplog_session_application.h"
@@ -66,7 +63,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -111,15 +107,15 @@ public:
                     opCtx.get(), nss, CollectionOptions{});
             }
 
-            _metricsNew =
-                ReshardingMetricsNew::makeInstance(UUID::gen(),
-                                                   BSON("y" << 1),
-                                                   _outputNss,
-                                                   ShardingDataTransformMetrics::Role::kRecipient,
-                                                   serviceContext->getFastClockSource()->now(),
-                                                   serviceContext);
+            _metrics =
+                ReshardingMetrics::makeInstance(UUID::gen(),
+                                                BSON("y" << 1),
+                                                _outputNss,
+                                                ShardingDataTransformMetrics::Role::kRecipient,
+                                                serviceContext->getFastClockSource()->now(),
+                                                serviceContext);
             _applierMetrics =
-                std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), boost::none);
+                std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none);
             _crudApplication = std::make_unique<ReshardingOplogApplicationRules>(
                 _outputNss,
                 std::vector<NamespaceString>{_myStashNss, _otherStashNss},
@@ -318,7 +314,7 @@ private:
         std::vector<ChunkType> chunks = {ChunkType{
             _sourceUUID,
             ChunkRange{BSON(_currentShardKey << MINKEY), BSON(_currentShardKey << MAXKEY)},
-            ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+            ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
             _myDonorId}};
 
         auto rt = RoutingTableHistory::makeNew(_sourceNss,
@@ -356,13 +352,15 @@ private:
     const ShardId _otherDonorId{"otherDonorId"};
 
     const NamespaceString _outputNss =
-        constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
-    const NamespaceString _myStashNss = getLocalConflictStashNamespace(_sourceUUID, _myDonorId);
+        resharding::constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
+    const NamespaceString _myStashNss =
+        resharding::getLocalConflictStashNamespace(_sourceUUID, _myDonorId);
     const NamespaceString _otherStashNss =
-        getLocalConflictStashNamespace(_sourceUUID, _otherDonorId);
-    const NamespaceString _myOplogBufferNss = getLocalOplogBufferNamespace(_sourceUUID, _myDonorId);
+        resharding::getLocalConflictStashNamespace(_sourceUUID, _otherDonorId);
+    const NamespaceString _myOplogBufferNss =
+        resharding::getLocalOplogBufferNamespace(_sourceUUID, _myDonorId);
 
-    std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+    std::unique_ptr<ReshardingMetrics> _metrics;
     std::unique_ptr<ReshardingOplogApplierMetrics> _applierMetrics;
 
     std::unique_ptr<ReshardingOplogApplicationRules> _crudApplication;
diff --git a/src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp
index 4114100a5bc..9c09f5ebcf0 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp
@@ -27,12 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
-#include <memory>
-#include <vector>
-
 #include "mongo/bson/bsonmisc.h"
 #include "mongo/db/catalog/collection_options.h"
 #include "mongo/db/catalog_raii.h"
@@ -47,7 +41,7 @@
 #include "mongo/db/s/collection_sharding_runtime.h"
 #include "mongo/db/s/op_observer_sharding_impl.h"
 #include "mongo/db/s/resharding/resharding_data_copy_util.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_oplog_application.h"
 #include "mongo/db/s/resharding/resharding_util.h"
 #include "mongo/db/s/sharding_state.h"
@@ -112,15 +106,15 @@ public:
                         CollectionMetadata(makeChunkManagerForOutputCollection(), _myDonorId));
             }
 
-            _metricsNew =
-                ReshardingMetricsNew::makeInstance(_sourceUUID,
-                                                   BSON(_newShardKey << 1),
-                                                   _outputNss,
-                                                   ShardingDataTransformMetrics::Role::kRecipient,
-                                                   serviceContext->getFastClockSource()->now(),
-                                                   serviceContext);
+            _metrics =
+                ReshardingMetrics::makeInstance(_sourceUUID,
+                                                BSON(_newShardKey << 1),
+                                                _outputNss,
+                                                ShardingDataTransformMetrics::Role::kRecipient,
+                                                serviceContext->getFastClockSource()->now(),
+                                                serviceContext);
             _oplogApplierMetrics =
-                std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), boost::none);
+                std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none);
             _applier = std::make_unique<ReshardingOplogApplicationRules>(
                 _outputNss,
                 std::vector<NamespaceString>{_myStashNss, _otherStashNss},
@@ -289,16 +283,16 @@ private:
                 _sourceUUID,
                 ChunkRange{BSON(_currentShardKey << MINKEY),
                            BSON(_currentShardKey << -std::numeric_limits<double>::infinity())},
-                ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+                ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
                 _myDonorId},
             ChunkType{_sourceUUID,
                       ChunkRange{BSON(_currentShardKey << -std::numeric_limits<double>::infinity()),
                                  BSON(_currentShardKey << 0)},
-                      ChunkVersion(100, 1, epoch, Timestamp(1, 1)),
+                      ChunkVersion({epoch, Timestamp(1, 1)}, {100, 1}),
                       _otherDonorId},
             ChunkType{_sourceUUID,
                       ChunkRange{BSON(_currentShardKey << 0), BSON(_currentShardKey << MAXKEY)},
-                      ChunkVersion(100, 2, epoch, Timestamp(1, 1)),
+                      ChunkVersion({epoch, Timestamp(1, 1)}, {100, 2}),
                       _myDonorId}};
 
         return makeChunkManager(
@@ -311,7 +305,7 @@ private:
         std::vector<ChunkType> chunks = {
             ChunkType{outputUuid,
                       ChunkRange{BSON(_newShardKey << MINKEY), BSON(_newShardKey << MAXKEY)},
-                      ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+                      ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
                       _myDonorId}};
 
         return makeChunkManager(
@@ -335,13 +329,14 @@ private:
     const ShardId _otherDonorId{"otherDonorId"};
 
     const NamespaceString _outputNss =
-        constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
-    const NamespaceString _myStashNss = getLocalConflictStashNamespace(_sourceUUID, _myDonorId);
+        resharding::constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
+    const NamespaceString _myStashNss =
+        resharding::getLocalConflictStashNamespace(_sourceUUID, _myDonorId);
     const NamespaceString _otherStashNss =
-        getLocalConflictStashNamespace(_sourceUUID, _otherDonorId);
+        resharding::getLocalConflictStashNamespace(_sourceUUID, _otherDonorId);
 
     std::unique_ptr<ReshardingOplogApplicationRules> _applier;
-    std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+    std::unique_ptr<ReshardingMetrics> _metrics;
     std::unique_ptr<ReshardingOplogApplierMetrics> _oplogApplierMetrics;
 };
 
diff --git a/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp b/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp
index 41f87420e70..ac62a1cee4d 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp
@@ -45,7 +45,7 @@
 #include "mongo/db/pipeline/aggregate_command_gen.h"
 #include "mongo/db/repl/read_concern_args.h"
 #include "mongo/db/repl/read_concern_level.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_util.h"
 #include "mongo/db/s/sharding_data_transform_cumulative_metrics.h"
 #include "mongo/db/storage/write_unit_of_work.h"
@@ -272,9 +272,9 @@ AggregateCommandRequest ReshardingOplogFetcher::_makeAggregateCommandRequest(
     auto opCtx = opCtxRaii.get();
     auto expCtx = _makeExpressionContext(opCtx);
 
-    auto serializedPipeline =
-        createOplogFetchingPipelineForResharding(expCtx, _startAt, _collUUID, _recipientShard)
-            ->serializeToBson();
+    auto serializedPipeline = resharding::createOplogFetchingPipelineForResharding(
+                                  expCtx, _startAt, _collUUID, _recipientShard)
+                                  ->serializeToBson();
 
     AggregateCommandRequest aggRequest(NamespaceString::kRsOplogNamespace,
                                        std::move(serializedPipeline));
@@ -326,8 +326,8 @@ bool ReshardingOplogFetcher::consume(Client* client,
         [this, &batchesProcessed, &moreToCome, &opCtxRaii, &batchFetchTimer, factory](
             const std::vector<BSONObj>& batch,
             const boost::optional<BSONObj>& postBatchResumeToken) {
-            _env->metricsNew()->onOplogEntriesFetched(batch.size(),
-                                                      Milliseconds(batchFetchTimer.millis()));
+            _env->metrics()->onOplogEntriesFetched(batch.size(),
+                                                   Milliseconds(batchFetchTimer.millis()));
 
             ThreadClient client(fmt::format("ReshardingFetcher-{}-{}",
                                             _reshardingUUID.toString(),
@@ -354,7 +354,7 @@ bool ReshardingOplogFetcher::consume(Client* client,
                 uassertStatusOK(toWriteTo->insertDocument(opCtx, InsertStatement{doc}, nullptr));
                 wuow.commit();
 
-                _env->metricsNew()->onLocalInsertDuringOplogFetching(
+                _env->metrics()->onLocalInsertDuringOplogFetching(
                     Milliseconds(insertTimer.millis()));
 
                 ++_numOplogEntriesCopied;
@@ -368,7 +368,7 @@ bool ReshardingOplogFetcher::consume(Client* client,
                     _onInsertFuture = std::move(f);
                 }
 
-                if (isFinalOplog(nextOplog, _reshardingUUID)) {
+                if (resharding::isFinalOplog(nextOplog, _reshardingUUID)) {
                     moreToCome = false;
                     return false;
                 }
@@ -392,7 +392,7 @@ bool ReshardingOplogFetcher::consume(Client* client,
                     oplog.set_id(Value(startAt.toBSON()));
                     oplog.setObject(BSON("msg"
                                          << "Latest oplog ts from donor's cursor response"));
-                    oplog.setObject2(BSON("type" << kReshardProgressMark));
+                    oplog.setObject2(BSON("type" << resharding::kReshardProgressMark));
                     oplog.setOpTime(OplogSlot());
                     oplog.setWallClockTime(opCtx->getServiceContext()->getFastClockSource()->now());
 
@@ -402,7 +402,7 @@ bool ReshardingOplogFetcher::consume(Client* client,
 
                     // Also include synthetic oplog in the fetched count so it can match up with the
                     // total oplog applied count in the end.
-                    _env->metricsNew()->onOplogEntriesFetched(1, Milliseconds(0));
+                    _env->metrics()->onOplogEntriesFetched(1, Milliseconds(0));
 
                     auto [p, f] = makePromiseFuture<void>();
                     {
diff --git a/src/mongo/db/s/resharding/resharding_oplog_fetcher.h b/src/mongo/db/s/resharding/resharding_oplog_fetcher.h
index 5772c6bdfaa..37f5090f0e2 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_fetcher.h
+++ b/src/mongo/db/s/resharding/resharding_oplog_fetcher.h
@@ -50,25 +50,25 @@
 
 namespace mongo {
 
-class ReshardingMetricsNew;
+class ReshardingMetrics;
 
 class ReshardingOplogFetcher : public resharding::OnInsertAwaitable {
 public:
     class Env {
     public:
-        Env(ServiceContext* service, ReshardingMetricsNew* metricsNew)
-            : _service(service), _metricsNew(metricsNew) {}
+        Env(ServiceContext* service, ReshardingMetrics* metrics)
+            : _service(service), _metrics(metrics) {}
         ServiceContext* service() const {
             return _service;
         }
 
-        ReshardingMetricsNew* metricsNew() const {
-            return _metricsNew;
+        ReshardingMetrics* metrics() const {
+            return _metrics;
         }
 
     private:
         ServiceContext* _service;
-        ReshardingMetricsNew* _metricsNew;
+        ReshardingMetrics* _metrics;
     };
 
     // Special value to use for startAt to indicate there are no more oplog entries needing to be
diff --git a/src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp
index 17624acced9..68523519f41 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp
@@ -45,7 +45,7 @@
 #include "mongo/db/repl/storage_interface_impl.h"
 #include "mongo/db/repl/wait_for_majority_service.h"
 #include "mongo/db/s/operation_sharding_state.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_oplog_fetcher.h"
 #include "mongo/db/s/resharding/resharding_util.h"
 #include "mongo/db/s/shard_server_test_fixture.h"
@@ -98,13 +98,12 @@ public:
             OldClientContext ctx(_opCtx, NamespaceString::kRsOplogNamespace.ns());
         }
 
-        _metrics =
-            ReshardingMetricsNew::makeInstance(_reshardingUUID,
-                                               BSON("y" << 1),
-                                               NamespaceString{""},
-                                               ReshardingMetricsNew::Role::kRecipient,
-                                               getServiceContext()->getFastClockSource()->now(),
-                                               getServiceContext());
+        _metrics = ReshardingMetrics::makeInstance(_reshardingUUID,
+                                                   BSON("y" << 1),
+                                                   NamespaceString{""},
+                                                   ReshardingMetrics::Role::kRecipient,
+                                                   getServiceContext()->getFastClockSource()->now(),
+                                                   getServiceContext());
 
         for (const auto& shardId : kTwoShardIdList) {
             auto shardTargeter = RemoteCommandTargeterMock::get(
@@ -299,7 +298,8 @@ public:
                     BSON(
                         "msg" << fmt::format("Writes to {} are temporarily blocked for resharding.",
                                              dataColl.getCollection()->ns().toString())),
-                    BSON("type" << kReshardFinalOpLogType << "reshardingUUID" << _reshardingUUID),
+                    BSON("type" << resharding::kReshardFinalOpLogType << "reshardingUUID"
+                                << _reshardingUUID),
                     boost::none,
                     boost::none,
                     boost::none,
@@ -343,7 +343,7 @@ protected:
     Timestamp _fetchTimestamp;
     ShardId _donorShard;
     ShardId _destinationShard;
-    std::unique_ptr<ReshardingMetricsNew> _metrics;
+    std::unique_ptr<ReshardingMetrics> _metrics;
 
 private:
     static HostAndPort makeHostAndPort(const ShardId& shardId) {
diff --git a/src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp
index bd3d602f3a7..5be42b0c30d 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp
@@ -705,6 +705,16 @@ TEST_F(ReshardingOplogSessionApplicationTest,
     TxnNumber internalTxnTxnNumber = 1;
     StmtId stmtId = 2;
 
+    // Make two in progress transactions so the one started by resharding must block.
+    {
+        auto newClientOwned = getServiceContext()->makeClient("newClient");
+        AlternativeClientRegion acr(newClientOwned);
+        auto newOpCtx = cc().makeOperationContext();
+        makeInProgressTxn(newOpCtx.get(),
+                          makeLogicalSessionIdWithTxnNumberAndUUIDForTest(retryableWriteLsid,
+                                                                          retryableWriteTxnNumber),
+                          internalTxnTxnNumber);
+    }
     {
         auto opCtx = makeOperationContext();
         makeInProgressTxn(opCtx.get(), internalTxnLsid, internalTxnTxnNumber);
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service.cpp b/src/mongo/db/s/resharding/resharding_recipient_service.cpp
index baaf64fc5e3..5b66d19e8bd 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service.cpp
@@ -120,12 +120,12 @@ using resharding_metrics::getIntervalStartFieldName;
 using DocT = ReshardingRecipientDocument;
 const auto metricsPrefix = resharding_metrics::getMetricsPrefix<DocT>();
 
-void buildStateDocumentCloneMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetricsNew* metrics) {
+void buildStateDocumentCloneMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetrics* metrics) {
     bob.append(getIntervalStartFieldName<DocT>(ReshardingRecipientMetrics::kDocumentCopyFieldName),
                metrics->getCopyingBegin());
 }
 
-void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetricsNew* metrics) {
+void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetrics* metrics) {
     bob.append(getIntervalEndFieldName<DocT>(ReshardingRecipientMetrics::kDocumentCopyFieldName),
                metrics->getCopyingEnd());
     bob.append(
@@ -138,14 +138,14 @@ void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetr
 }
 
 void buildStateDocumentStrictConsistencyMetricsForUpdate(BSONObjBuilder& bob,
-                                                         ReshardingMetricsNew* metrics) {
+                                                         ReshardingMetrics* metrics) {
     bob.append(
         getIntervalEndFieldName<DocT>(ReshardingRecipientMetrics::kOplogApplicationFieldName),
         metrics->getApplyingEnd());
 }
 
 void buildStateDocumentMetricsForUpdate(BSONObjBuilder& bob,
-                                        ReshardingMetricsNew* metrics,
+                                        ReshardingMetrics* metrics,
                                         RecipientStateEnum newState) {
     switch (newState) {
         case RecipientStateEnum::kCloning:
@@ -162,8 +162,8 @@ void buildStateDocumentMetricsForUpdate(BSONObjBuilder& bob,
     }
 }
 
-ReshardingMetricsNew::RecipientState toMetricsState(RecipientStateEnum state) {
-    return ReshardingMetricsNew::RecipientState(state);
+ReshardingMetrics::RecipientState toMetricsState(RecipientStateEnum state) {
+    return ReshardingMetrics::RecipientState(state);
 }
 
 }  // namespace
@@ -190,7 +190,7 @@ ReshardingRecipientService::RecipientStateMachine::RecipientStateMachine(
     ReshardingDataReplicationFactory dataReplicationFactory)
     : repl::PrimaryOnlyService::TypedInstance<RecipientStateMachine>(),
       _recipientService{recipientService},
-      _metricsNew{ReshardingMetricsNew::initializeFrom(recipientDoc, getGlobalServiceContext())},
+      _metrics{ReshardingMetrics::initializeFrom(recipientDoc, getGlobalServiceContext())},
       _metadata{recipientDoc.getCommonReshardingMetadata()},
       _minimumOperationDuration{Milliseconds{recipientDoc.getMinimumOperationDurationMillis()}},
       _recipientCtx{recipientDoc.getMutableState()},
@@ -219,7 +219,7 @@ ReshardingRecipientService::RecipientStateMachine::RecipientStateMachine(
       }()) {
     invariant(_externalState);
 
-    _metricsNew->onStateTransition(boost::none, toMetricsState(_recipientCtx.getState()));
+    _metrics->onStateTransition(boost::none, toMetricsState(_recipientCtx.getState()));
 }
 
 ExecutorFuture<void>
@@ -370,7 +370,9 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_finishR
                     if (!_isAlsoDonor) {
                         auto opCtx = factory.makeOperationContext(&cc());
 
-                        _externalState->clearFilteringMetadata(opCtx.get());
+                        _externalState->clearFilteringMetadata(opCtx.get(),
+                                                               _metadata.getSourceNss(),
+                                                               _metadata.getTempReshardingNss());
 
                         RecoverableCriticalSectionService::get(opCtx.get())
                             ->releaseRecoverableCriticalSection(
@@ -417,7 +419,13 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_runMand
                        self = shared_from_this(),
                        outerStatus = status,
                        isCanceled = stepdownToken.isCanceled()](Status dataReplicationHaltStatus) {
-            _metricsNew->onStateTransition(toMetricsState(_recipientCtx.getState()), boost::none);
+            _metrics->onStateTransition(toMetricsState(_recipientCtx.getState()), boost::none);
+
+            // Destroy metrics early so it's lifetime will not be tied to the lifetime of this
+            // state machine. This is because we have future callbacks copy shared pointers to this
+            // state machine that causes it to live longer than expected and potentially overlap
+            // with a newer instance when stepping up.
+            _metrics.reset();
 
             // If the stepdownToken was triggered, it takes priority in order to make sure that
             // the promise is set with an error that the coordinator can retry with. If it ran into
@@ -432,7 +440,6 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_runMand
             // replication errors because resharding is known to have failed already.
             stdx::lock_guard<Latch> lk(_mutex);
             ensureFulfilledPromise(lk, _completionPromise, outerStatus);
-
             return outerStatus;
         });
 }
@@ -504,7 +511,7 @@ void ReshardingRecipientService::RecipientStateMachine::interrupt(Status status)
 boost::optional<BSONObj> ReshardingRecipientService::RecipientStateMachine::reportForCurrentOp(
     MongoProcessInterface::CurrentOpConnectionsMode,
     MongoProcessInterface::CurrentOpSessionsMode) noexcept {
-    return _metricsNew->reportForCurrentOp();
+    return _metrics->reportForCurrentOp();
 }
 
 void ReshardingRecipientService::RecipientStateMachine::onReshardingFieldsChanges(
@@ -550,8 +557,8 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::
                   ReshardingRecipientService::RecipientStateMachine::CloneDetails cloneDetails) {
             _transitionToCreatingCollection(
                 cloneDetails, (*executor)->now() + _minimumOperationDuration, factory);
-            _metricsNew->setDocumentsToCopyCounts(cloneDetails.approxDocumentsToCopy,
-                                                  cloneDetails.approxBytesToCopy);
+            _metrics->setDocumentsToCopyCounts(cloneDetails.approxDocumentsToCopy,
+                                               cloneDetails.approxBytesToCopy);
         });
 }
 
@@ -616,7 +623,7 @@ ReshardingRecipientService::RecipientStateMachine::_makeDataReplication(Operatio
         for (const auto& donor : _donorShards) {
             _applierMetricsMap.emplace(
                 donor.getShardId(),
-                std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), boost::none));
+                std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none));
         }
     } else {
         invariant(_applierMetricsMap.size() == _donorShards.size(),
@@ -625,7 +632,7 @@ ReshardingRecipientService::RecipientStateMachine::_makeDataReplication(Operatio
     }
 
     return _dataReplicationFactory(opCtx,
-                                   _metricsNew.get(),
+                                   _metrics.get(),
                                    &_applierMetricsMap,
                                    _metadata,
                                    _donorShards,
@@ -726,8 +733,8 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::
         .then([this, &factory] {
             auto opCtx = factory.makeOperationContext(&cc());
             for (const auto& donor : _donorShards) {
-                auto stashNss =
-                    getLocalConflictStashNamespace(_metadata.getSourceUUID(), donor.getShardId());
+                auto stashNss = resharding::getLocalConflictStashNamespace(
+                    _metadata.getSourceUUID(), donor.getShardId());
                 AutoGetCollection stashColl(opCtx.get(), stashNss, MODE_IS);
                 uassert(5356800,
                         "Resharding completed with non-empty stash collections",
@@ -846,7 +853,7 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionState(
     _updateRecipientDocument(
         std::move(newRecipientCtx), std::move(cloneDetails), std::move(configStartTime), factory);
 
-    _metricsNew->onStateTransition(toMetricsState(oldState), toMetricsState(newState));
+    _metrics->onStateTransition(toMetricsState(oldState), toMetricsState(newState));
 
     LOGV2_INFO(5279506,
                "Transitioned resharding recipient state",
@@ -871,7 +878,7 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionToCreatingCol
 
 void ReshardingRecipientService::RecipientStateMachine::_transitionToCloning(
     const CancelableOperationContextFactory& factory) {
-    _metricsNew->onCopyingBegin();
+    _metrics->onCopyingBegin();
     auto newRecipientCtx = _recipientCtx;
     newRecipientCtx.setState(RecipientStateEnum::kCloning);
     _transitionState(std::move(newRecipientCtx), boost::none, boost::none, factory);
@@ -883,8 +890,8 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionToApplying(
     newRecipientCtx.setState(RecipientStateEnum::kApplying);
     _transitionState(std::move(newRecipientCtx), boost::none, boost::none, factory);
 
-    _metricsNew->onCopyingEnd();
-    _metricsNew->onApplyingBegin();
+    _metrics->onCopyingEnd();
+    _metrics->onApplyingBegin();
 }
 
 void ReshardingRecipientService::RecipientStateMachine::_transitionToStrictConsistency(
@@ -893,14 +900,14 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionToStrictConsi
     newRecipientCtx.setState(RecipientStateEnum::kStrictConsistency);
     _transitionState(std::move(newRecipientCtx), boost::none, boost::none, factory);
 
-    _metricsNew->onApplyingEnd();
+    _metrics->onApplyingEnd();
 }
 
 void ReshardingRecipientService::RecipientStateMachine::_transitionToError(
     Status abortReason, const CancelableOperationContextFactory& factory) {
     auto newRecipientCtx = _recipientCtx;
     newRecipientCtx.setState(RecipientStateEnum::kError);
-    emplaceTruncatedAbortReasonIfExists(newRecipientCtx, abortReason);
+    resharding::emplaceTruncatedAbortReasonIfExists(newRecipientCtx, abortReason);
     _transitionState(std::move(newRecipientCtx), boost::none, boost::none, factory);
 }
 
@@ -1052,8 +1059,7 @@ void ReshardingRecipientService::RecipientStateMachine::_updateRecipientDocument
                               *configStartTime);
         }
 
-        buildStateDocumentMetricsForUpdate(
-            setBuilder, _metricsNew.get(), newRecipientCtx.getState());
+        buildStateDocumentMetricsForUpdate(setBuilder, _metrics.get(), newRecipientCtx.getState());
 
         setBuilder.doneFast();
     }
@@ -1156,7 +1162,7 @@ void ReshardingRecipientService::RecipientStateMachine::_restoreMetrics(
             // metrics section of the recipient state document and restored during metrics
             // initialization. This is so that applied oplog entries that add or remove documents do
             // not affect the cloning metrics.
-            _metricsNew->restoreDocumentsCopied(documentCountCopied, documentBytesCopied);
+            _metrics->restoreDocumentsCopied(documentCountCopied, documentBytesCopied);
         }
     }
 
@@ -1167,10 +1173,10 @@ void ReshardingRecipientService::RecipientStateMachine::_restoreMetrics(
         progressDocList;
     for (const auto& donor : _donorShards) {
         {
-            AutoGetCollection oplogBufferColl(
-                opCtx.get(),
-                getLocalOplogBufferNamespace(_metadata.getSourceUUID(), donor.getShardId()),
-                MODE_IS);
+            AutoGetCollection oplogBufferColl(opCtx.get(),
+                                              resharding::getLocalOplogBufferNamespace(
+                                                  _metadata.getSourceUUID(), donor.getShardId()),
+                                              MODE_IS);
             if (oplogBufferColl) {
                 oplogEntriesFetched += oplogBufferColl->numRecords(opCtx.get());
             }
@@ -1208,19 +1214,19 @@ void ReshardingRecipientService::RecipientStateMachine::_restoreMetrics(
         if (!progressDoc) {
             _applierMetricsMap.emplace(
                 shardId,
-                std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), boost::none));
+                std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none));
             continue;
         }
 
-        _metricsNew->accumulateFrom(*progressDoc);
+        _metrics->accumulateFrom(*progressDoc);
 
         auto applierMetrics =
-            std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), progressDoc);
+            std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), progressDoc);
         _applierMetricsMap.emplace(shardId, std::move(applierMetrics));
     }
 
-    _metricsNew->restoreOplogEntriesFetched(oplogEntriesFetched);
-    _metricsNew->restoreOplogEntriesApplied(oplogEntriesApplied);
+    _metrics->restoreOplogEntriesFetched(oplogEntriesFetched);
+    _metrics->restoreOplogEntriesApplied(oplogEntriesApplied);
 }
 
 CancellationToken ReshardingRecipientService::RecipientStateMachine::_initAbortSource(
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service.h b/src/mongo/db/s/resharding/resharding_recipient_service.h
index fc41ba0e9ee..5dab490b96f 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service.h
+++ b/src/mongo/db/s/resharding/resharding_recipient_service.h
@@ -33,7 +33,7 @@
 #include "mongo/db/s/resharding/recipient_document_gen.h"
 #include "mongo/db/s/resharding/resharding_data_replication.h"
 #include "mongo/db/s/resharding/resharding_future_util.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding/resharding_oplog_applier_metrics.h"
 #include "mongo/db/s/resharding/resharding_util.h"
 #include "mongo/s/resharding/type_collection_fields_gen.h"
@@ -163,9 +163,9 @@ public:
         return _metadata;
     }
 
-    inline const ReshardingMetricsNew& getMetrics() const {
-        invariant(_metricsNew);
-        return *_metricsNew;
+    inline const ReshardingMetrics& getMetrics() const {
+        invariant(_metrics);
+        return *_metrics;
     }
 
     boost::optional<BSONObj> reportForCurrentOp(
@@ -289,7 +289,7 @@ private:
     // The primary-only service instance corresponding to the recipient instance. Not owned.
     const ReshardingRecipientService* const _recipientService;
 
-    std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+    std::unique_ptr<ReshardingMetrics> _metrics;
     ReshardingApplierMetricsMap _applierMetricsMap;
 
     // The in-memory representation of the immutable portion of the document in
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp b/src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp
index 3e929815454..222a2c6f86a 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp
@@ -186,8 +186,12 @@ void RecipientStateMachineExternalStateImpl::updateCoordinatorDocument(Operation
     }
 }
 
-void RecipientStateMachineExternalStateImpl::clearFilteringMetadata(OperationContext* opCtx) {
-    resharding::clearFilteringMetadata(opCtx, true /* scheduleAsyncRefresh */);
+void RecipientStateMachineExternalStateImpl::clearFilteringMetadata(
+    OperationContext* opCtx,
+    const NamespaceString& sourceNss,
+    const NamespaceString& tempReshardingNss) {
+    stdx::unordered_set<NamespaceString> namespacesToRefresh{sourceNss, tempReshardingNss};
+    resharding::clearFilteringMetadata(opCtx, namespacesToRefresh, true /* scheduleAsyncRefresh */);
 }
 
 }  // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_external_state.h b/src/mongo/db/s/resharding/resharding_recipient_service_external_state.h
index c1597da7f7c..0a2749a66fc 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_external_state.h
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_external_state.h
@@ -90,7 +90,9 @@ public:
                                            const BSONObj& query,
                                            const BSONObj& update) = 0;
 
-    virtual void clearFilteringMetadata(OperationContext* opCtx) = 0;
+    virtual void clearFilteringMetadata(OperationContext* opCtx,
+                                        const NamespaceString& sourceNss,
+                                        const NamespaceString& tempReshardingNss) = 0;
 
     /**
      * Creates the temporary resharding collection locally.
@@ -137,7 +139,9 @@ public:
                                    const BSONObj& query,
                                    const BSONObj& update) override;
 
-    void clearFilteringMetadata(OperationContext* opCtx) override;
+    void clearFilteringMetadata(OperationContext* opCtx,
+                                const NamespaceString& sourceNss,
+                                const NamespaceString& tempReshardingNss) override;
 
 private:
     template <typename Callable>
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp b/src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp
index c4e193e6897..62776bba466 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/bson/unordered_fields_bsonobj_comparator.h"
 #include "mongo/db/catalog_raii.h"
 #include "mongo/db/dbdirectclient.h"
@@ -48,9 +45,6 @@
 #include "mongo/s/database_version.h"
 #include "mongo/s/stale_exception.h"
 
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
-
 namespace mongo {
 namespace {
 
@@ -168,7 +162,7 @@ public:
             reshardingFields.setRecipientFields(recipientFields);
             coll.setReshardingFields(reshardingFields);
 
-            ChunkVersion version(1, 0, epoch, timestamp);
+            ChunkVersion version({epoch, timestamp}, {1, 0});
 
             ChunkType chunk(uuid,
                             {skey.getKeyPattern().globalMin(), skey.getKeyPattern().globalMax()},
@@ -193,7 +187,7 @@ public:
             CollectionType coll(
                 origNss, epoch, timestamp, Date_t::now(), uuid, skey.getKeyPattern());
 
-            ChunkVersion version(2, 0, epoch, timestamp);
+            ChunkVersion version({epoch, timestamp}, {2, 0});
 
             ChunkType chunk(uuid,
                             {skey.getKeyPattern().globalMin(), skey.getKeyPattern().globalMax()},
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp b/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
index 78316aacca7..4e6a5489f71 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
@@ -83,7 +83,7 @@ public:
         std::vector<ChunkType> chunks = {ChunkType{
             _sourceUUID,
             ChunkRange{BSON(_currentShardKey << MINKEY), BSON(_currentShardKey << MAXKEY)},
-            ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+            ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
             _someDonorId}};
 
         auto rt = RoutingTableHistory::makeNew(_sourceNss,
@@ -136,7 +136,9 @@ public:
                                    const BSONObj& query,
                                    const BSONObj& update) override {}
 
-    void clearFilteringMetadata(OperationContext* opCtx) override {}
+    void clearFilteringMetadata(OperationContext* opCtx,
+                                const NamespaceString& sourceNss,
+                                const NamespaceString& tempReshardingNss) override {}
 
 private:
     RoutingTableHistoryValueHandle _makeStandaloneRoutingTableHistory(RoutingTableHistory rt) {
@@ -250,12 +252,12 @@ public:
 
         NamespaceString sourceNss("sourcedb", "sourcecollection");
         auto sourceUUID = UUID::gen();
-        auto commonMetadata =
-            CommonReshardingMetadata(UUID::gen(),
-                                     sourceNss,
-                                     sourceUUID,
-                                     constructTemporaryReshardingNss(sourceNss.db(), sourceUUID),
-                                     newShardKeyPattern());
+        auto commonMetadata = CommonReshardingMetadata(
+            UUID::gen(),
+            sourceNss,
+            sourceUUID,
+            resharding::constructTemporaryReshardingNss(sourceNss.db(), sourceUUID),
+            newShardKeyPattern());
         commonMetadata.setStartTime(getServiceContext()->getFastClockSource()->now());
 
         doc.setCommonReshardingMetadata(std::move(commonMetadata));
@@ -625,7 +627,8 @@ TEST_F(ReshardingRecipientServiceTest, WritesNoopOplogEntryOnReshardDoneCatchUp)
               ErrorCodes::InterruptedDueToReplStateChange);
 
     DBDirectClient client(opCtx.get());
-    NamespaceString sourceNss = constructTemporaryReshardingNss("sourcedb", doc.getSourceUUID());
+    NamespaceString sourceNss =
+        resharding::constructTemporaryReshardingNss("sourcedb", doc.getSourceUUID());
 
     FindCommandRequest findRequest{NamespaceString::kRsOplogNamespace};
     findRequest.setFilter(
@@ -671,7 +674,8 @@ TEST_F(ReshardingRecipientServiceTest, WritesNoopOplogEntryForImplicitShardColle
               ErrorCodes::InterruptedDueToReplStateChange);
 
     DBDirectClient client(opCtx.get());
-    NamespaceString sourceNss = constructTemporaryReshardingNss("sourcedb", doc.getSourceUUID());
+    NamespaceString sourceNss =
+        resharding::constructTemporaryReshardingNss("sourcedb", doc.getSourceUUID());
 
     FindCommandRequest findRequest{NamespaceString::kRsOplogNamespace};
     findRequest.setFilter(
@@ -739,7 +743,7 @@ TEST_F(ReshardingRecipientServiceTest, TruncatesXLErrorOnRecipientDocument) {
             // to the primitive truncation algorithm - Check that the total size is less than
             // kReshardErrorMaxBytes + a couple additional bytes to provide a buffer for the field
             // name sizes.
-            int maxReshardErrorBytesCeiling = kReshardErrorMaxBytes + 200;
+            int maxReshardErrorBytesCeiling = resharding::kReshardErrorMaxBytes + 200;
             ASSERT_LT(persistedAbortReasonBSON->objsize(), maxReshardErrorBytesCeiling);
             ASSERT_EQ(persistedAbortReasonBSON->getIntField("code"),
                       ErrorCodes::ReshardCollectionTruncatedError);
@@ -815,7 +819,8 @@ TEST_F(ReshardingRecipientServiceTest, RestoreMetricsAfterStepUp) {
             for (const auto& donor : donorShards) {
                 // Setup oplogBuffer collection.
                 ReshardingDonorOplogId donorOplogId{{20, i}, {19, 0}};
-                insertFn(getLocalOplogBufferNamespace(doc.getSourceUUID(), donor.getShardId()),
+                insertFn(resharding::getLocalOplogBufferNamespace(doc.getSourceUUID(),
+                                                                  donor.getShardId()),
                          InsertStatement{BSON("_id" << donorOplogId.toBSON())});
                 ++i;
 
@@ -923,7 +928,7 @@ TEST_F(ReshardingRecipientServiceTest, RestoreMetricsAfterStepUpWithMissingProgr
 
         // Setup oplogBuffer collection.
         ReshardingDonorOplogId donorOplogId{{20, i}, {19, 0}};
-        insertFn(getLocalOplogBufferNamespace(doc.getSourceUUID(), donor.getShardId()),
+        insertFn(resharding::getLocalOplogBufferNamespace(doc.getSourceUUID(), donor.getShardId()),
                  InsertStatement{BSON("_id" << donorOplogId.toBSON())});
 
         // Setup reshardingApplierProgress collection.
diff --git a/src/mongo/db/s/resharding/resharding_txn_cloner.cpp b/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
index 478c0272c7d..1f074af6f75 100644
--- a/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
+++ b/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
@@ -35,7 +35,6 @@
 #include <vector>
 
 #include "mongo/bson/bsonmisc.h"
-#include "mongo/client/query.h"
 #include "mongo/client/read_preference.h"
 #include "mongo/db/client.h"
 #include "mongo/db/concurrency/d_concurrency.h"
diff --git a/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp b/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp
index 1e22bc5a4a7..24045678550 100644
--- a/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp
@@ -1004,6 +1004,16 @@ TEST_F(ReshardingTxnClonerTest,
                                                                            retryableWriteTxnNumber);
     TxnNumber internalTxnTxnNumber = 1;
 
+    // Make two in progress transactions so the one started by resharding must block.
+    {
+        auto newClientOwned = getServiceContext()->makeClient("newClient");
+        AlternativeClientRegion acr(newClientOwned);
+        auto newOpCtx = cc().makeOperationContext();
+        makeInProgressTxn(newOpCtx.get(),
+                          makeLogicalSessionIdWithTxnNumberAndUUIDForTest(retryableWriteLsid,
+                                                                          retryableWriteTxnNumber),
+                          internalTxnTxnNumber);
+    }
     makeInProgressTxn(operationContext(), internalTxnLsid, internalTxnTxnNumber);
     auto lastOplogTs = getLatestOplogTimestamp(operationContext());
 
@@ -1096,6 +1106,16 @@ TEST_F(ReshardingTxnClonerTest, CancelableWhileWaitingOnInProgressInternalTxnFor
                                                                            retryableWriteTxnNumber);
     TxnNumber internalTxnTxnNumber = 1;
 
+    // Make two in progress transactions so the one started by resharding must block.
+    {
+        auto newClientOwned = getServiceContext()->makeClient("newClient");
+        AlternativeClientRegion acr(newClientOwned);
+        auto newOpCtx = cc().makeOperationContext();
+        makeInProgressTxn(newOpCtx.get(),
+                          makeLogicalSessionIdWithTxnNumberAndUUIDForTest(retryableWriteLsid,
+                                                                          retryableWriteTxnNumber),
+                          internalTxnTxnNumber);
+    }
     makeInProgressTxn(operationContext(), internalTxnLsid, internalTxnTxnNumber);
     ON_BLOCK_EXIT([&] { abortTxn(operationContext(), internalTxnLsid, internalTxnTxnNumber); });
 
diff --git a/src/mongo/db/s/resharding/resharding_util.cpp b/src/mongo/db/s/resharding/resharding_util.cpp
index d9a04064d3c..873fc7ce5d5 100644
--- a/src/mongo/db/s/resharding/resharding_util.cpp
+++ b/src/mongo/db/s/resharding/resharding_util.cpp
@@ -48,7 +48,7 @@
 #include "mongo/db/s/collection_sharding_state.h"
 #include "mongo/db/s/resharding/document_source_resharding_add_resume_id.h"
 #include "mongo/db/s/resharding/document_source_resharding_iterate_transaction.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/sharding_state.h"
 #include "mongo/db/storage/write_unit_of_work.h"
 #include "mongo/logv2/log.h"
@@ -63,6 +63,7 @@
 
 
 namespace mongo {
+namespace resharding {
 
 namespace {
 /**
@@ -414,4 +415,5 @@ boost::optional<Milliseconds> estimateRemainingRecipientTime(bool applyingBegan,
     return {};
 }
 
+}  // namespace resharding
 }  // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_util.h b/src/mongo/db/s/resharding/resharding_util.h
index 194381e7e78..0d8aacbe3f7 100644
--- a/src/mongo/db/s/resharding/resharding_util.h
+++ b/src/mongo/db/s/resharding/resharding_util.h
@@ -50,6 +50,7 @@
 #include "mongo/util/str.h"
 
 namespace mongo {
+namespace resharding {
 
 constexpr auto kReshardFinalOpLogType = "reshardFinalOp"_sd;
 constexpr auto kReshardProgressMark = "reshardProgressMark"_sd;
@@ -324,5 +325,6 @@ std::vector<std::shared_ptr<Instance>> getReshardingStateMachines(OperationConte
     return result;
 }
 
+}  // namespace resharding
 
 }  // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_util_test.cpp b/src/mongo/db/s/resharding/resharding_util_test.cpp
index 5fd40fd86b7..12e5e15ddcd 100644
--- a/src/mongo/db/s/resharding/resharding_util_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_util_test.cpp
@@ -52,6 +52,7 @@
 
 
 namespace mongo {
+namespace resharding {
 namespace {
 
 class ReshardingUtilTest : public ConfigServerTestFixture {
@@ -309,4 +310,7 @@ TEST_F(ReshardingTxnCloningPipelineTest, TxnPipelineAfterID) {
 }
 
 }  // namespace
+
+}  // namespace resharding
+
 }  // namespace mongo
diff --git a/src/mongo/db/s/resharding_test_commands.cpp b/src/mongo/db/s/resharding_test_commands.cpp
index 61fa835829f..74688928784 100644
--- a/src/mongo/db/s/resharding_test_commands.cpp
+++ b/src/mongo/db/s/resharding_test_commands.cpp
@@ -37,7 +37,7 @@
 #include "mongo/db/commands.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/s/resharding/resharding_collection_cloner.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
 #include "mongo/db/s/resharding_test_commands_gen.h"
 #include "mongo/db/vector_clock_metadata_hook.h"
 #include "mongo/executor/network_interface_factory.h"
@@ -79,11 +79,11 @@ public:
                 }
             };
 
-            auto metrics = ReshardingMetricsNew::makeInstance(
+            auto metrics = ReshardingMetrics::makeInstance(
                 request().getUuid(),
                 request().getShardKey(),
                 ns(),
-                ReshardingMetricsNew::Role::kRecipient,
+                ReshardingMetrics::Role::kRecipient,
                 opCtx->getServiceContext()->getFastClockSource()->now(),
                 opCtx->getServiceContext());
 
diff --git a/src/mongo/db/s/sessions_collection_config_server.cpp b/src/mongo/db/s/sessions_collection_config_server.cpp
index 60c72dcab47..4376166a365 100644
--- a/src/mongo/db/s/sessions_collection_config_server.cpp
+++ b/src/mongo/db/s/sessions_collection_config_server.cpp
@@ -125,8 +125,10 @@ void SessionsCollectionConfigServer::setupSessionsCollection(OperationContext* o
     auto filterQuery =
         BSON("_id" << NamespaceString::kLogicalSessionsNamespace.ns()
                    << CollectionType::kMaxChunkSizeBytesFieldName << BSON("$exists" << false));
-    auto updateQuery =
-        BSON("$set" << BSON(CollectionType::kMaxChunkSizeBytesFieldName << kMaxChunkSizeBytes));
+    auto updateQuery = BSON("$set" << BSON(CollectionType::kMaxChunkSizeBytesFieldName
+                                           << kMaxChunkSizeBytes
+                                           << CollectionType::kNoAutoSplitFieldName << true));
+
     uassertStatusOK(Grid::get(opCtx)->catalogClient()->updateConfigDocument(
         opCtx,
         CollectionType::ConfigNS,
diff --git a/src/mongo/db/s/set_allow_migrations_coordinator.cpp b/src/mongo/db/s/set_allow_migrations_coordinator.cpp
index 3395aa7f465..d8cb15afb2e 100644
--- a/src/mongo/db/s/set_allow_migrations_coordinator.cpp
+++ b/src/mongo/db/s/set_allow_migrations_coordinator.cpp
@@ -50,14 +50,6 @@ bool isCollectionSharded(OperationContext* opCtx, const NamespaceString& nss) {
     }
 }
 
-SetAllowMigrationsCoordinator::SetAllowMigrationsCoordinator(ShardingDDLCoordinatorService* service,
-                                                             const BSONObj& initialState)
-    : ShardingDDLCoordinator(service, initialState),
-      _doc(SetAllowMigrationsCoordinatorDocument::parse(
-          IDLParserErrorContext("SetAllowMigrationsCoordinatorDocument"), initialState)),
-      _allowMigrations(_doc.getAllowMigrations()) {}
-
-
 void SetAllowMigrationsCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
     // If we have two set allow migrations on the same namespace, then the arguments must be the
     // same.
@@ -72,23 +64,9 @@ void SetAllowMigrationsCoordinator::checkIfOptionsConflict(const BSONObj& doc) c
                 otherDoc.getSetAllowMigrationsRequest().toBSON()));
 }
 
-boost::optional<BSONObj> SetAllowMigrationsCoordinator::reportForCurrentOp(
-    MongoProcessInterface::CurrentOpConnectionsMode connMode,
-    MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-    BSONObjBuilder cmdBob;
-    if (const auto& optComment = getForwardableOpMetadata().getComment()) {
-        cmdBob.append(optComment.get().firstElement());
-    }
-    cmdBob.appendElements(_doc.getSetAllowMigrationsRequest().toBSON());
-
-    BSONObjBuilder bob;
-    bob.append("type", "op");
-    bob.append("desc", "SetAllowMigrationsCoordinator");
-    bob.append("op", "command");
-    bob.append("ns", nss().toString());
-    bob.append("command", cmdBob.obj());
-    bob.append("active", true);
-    return bob.obj();
+void SetAllowMigrationsCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+    stdx::lock_guard lk{_docMutex};
+    cmdInfoBuilder->appendElements(_doc.getSetAllowMigrationsRequest().toBSON());
 }
 
 ExecutorFuture<void> SetAllowMigrationsCoordinator::_runImpl(
diff --git a/src/mongo/db/s/set_allow_migrations_coordinator.h b/src/mongo/db/s/set_allow_migrations_coordinator.h
index cf8e14348d7..78d2e03696a 100644
--- a/src/mongo/db/s/set_allow_migrations_coordinator.h
+++ b/src/mongo/db/s/set_allow_migrations_coordinator.h
@@ -38,31 +38,27 @@
 
 namespace mongo {
 
-class SetAllowMigrationsCoordinator final : public ShardingDDLCoordinator {
+class SetAllowMigrationsCoordinator final
+    : public ShardingDDLCoordinatorImpl<SetAllowMigrationsCoordinatorDocument> {
 
 public:
     SetAllowMigrationsCoordinator(ShardingDDLCoordinatorService* service,
-                                  const BSONObj& initialState);
+                                  const BSONObj& initialState)
+        : ShardingDDLCoordinatorImpl(service, "SetAllowMigrationsCoordinator", initialState),
+          _allowMigrations(_doc.getAllowMigrations()) {}
 
     void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
 
-    boost::optional<BSONObj> reportForCurrentOp(
-        MongoProcessInterface::CurrentOpConnectionsMode connMode,
-        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+    void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
 
     bool canAlwaysStartWhenUserWritesAreDisabled() const override {
         return true;
     }
 
 private:
-    ShardingDDLCoordinatorMetadata const& metadata() const override {
-        return _doc.getShardingDDLCoordinatorMetadata();
-    }
-
     ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
                                   const CancellationToken& token) noexcept override;
 
-    SetAllowMigrationsCoordinatorDocument _doc;
     const bool _allowMigrations;
 };
 }  // namespace mongo
diff --git a/src/mongo/db/s/set_shard_version_command.cpp b/src/mongo/db/s/set_shard_version_command.cpp
deleted file mode 100644
index 0c8e2da5037..00000000000
--- a/src/mongo/db/s/set_shard_version_command.cpp
+++ /dev/null
@@ -1,340 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/auth/action_set.h"
-#include "mongo/db/auth/action_type.h"
-#include "mongo/db/auth/authorization_session.h"
-#include "mongo/db/auth/privilege.h"
-#include "mongo/db/catalog/collection_catalog.h"
-#include "mongo/db/catalog_raii.h"
-#include "mongo/db/client.h"
-#include "mongo/db/commands.h"
-#include "mongo/db/not_primary_error_tracker.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/repl/replication_coordinator.h"
-#include "mongo/db/s/collection_sharding_runtime.h"
-#include "mongo/db/s/shard_filtering_metadata_refresh.h"
-#include "mongo/db/s/sharding_state.h"
-#include "mongo/logv2/log.h"
-#include "mongo/s/client/shard_registry.h"
-#include "mongo/s/grid.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
-#include "mongo/util/str.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
-
-namespace mongo {
-namespace {
-
-class SetShardVersion : public ErrmsgCommandDeprecated {
-public:
-    SetShardVersion() : ErrmsgCommandDeprecated("setShardVersion") {}
-
-    std::string help() const override {
-        return "internal";
-    }
-
-    bool adminOnly() const override {
-        return true;
-    }
-
-    AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
-        return AllowedOnSecondary::kAlways;
-    }
-
-    virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
-        return false;
-    }
-
-    void addRequiredPrivileges(const std::string& dbname,
-                               const BSONObj& cmdObj,
-                               std::vector<Privilege>* out) const override {
-        ActionSet actions;
-        actions.addAction(ActionType::internal);
-        out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
-    }
-
-    bool errmsgRun(OperationContext* opCtx,
-                   const std::string&,
-                   const BSONObj& cmdObj,
-                   std::string& errmsg,
-                   BSONObjBuilder& result) {
-        uassert(ErrorCodes::IllegalOperation,
-                "can't issue setShardVersion from 'eval'",
-                !opCtx->getClient()->isInDirectClient());
-
-        auto const shardingState = ShardingState::get(opCtx);
-        uassertStatusOK(shardingState->canAcceptShardedCommands());
-
-        // Steps
-        // 1. Set the `authoritative` variable from the command object.
-        //
-        // 2. Validate all command parameters against the info in our ShardingState, and return an
-        //    error if they do not match.
-        //
-        // 3. If the sent shardVersion is compatible with our shardVersion, return.
-        //
-        // 4. If the sent shardVersion indicates a drop, jump to step 6.
-        //
-        // 5. If the sent shardVersion is staler than ours, return a stale config error.
-        //
-        // 6. If the sent shardVersion is newer than ours (or indicates a drop), reload our metadata
-        //    and compare the sent shardVersion with what we reloaded. If the sent shardVersion is
-        //    staler than what we reloaded, return a stale config error, as in step 5.
-
-        // Step 1
-
-        Client* client = opCtx->getClient();
-        NotPrimaryErrorTracker::get(client).disable();
-
-        const bool authoritative = cmdObj.getBoolField("authoritative");
-
-        // Step 2
-
-        // Validate namespace parameter.
-        const NamespaceString nss(cmdObj["setShardVersion"].String());
-        uassert(ErrorCodes::InvalidNamespace,
-                str::stream() << "Invalid namespace " << nss.ns(),
-                nss.isValid());
-
-        // Validate chunk version parameter.
-        auto requestedVersion = ChunkVersion::parse(cmdObj[SetShardVersionRequest::kVersion]);
-
-        // Step 3
-
-        {
-            boost::optional<AutoGetDb> autoDb;
-            autoDb.emplace(opCtx, nss.db(), MODE_IS);
-
-            // Secondary nodes cannot support set shard version
-            uassert(ErrorCodes::NotWritablePrimary,
-                    str::stream() << "setShardVersion with collection version is only supported "
-                                     "against primary nodes, but it was received for namespace "
-                                  << nss.ns(),
-                    repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx,
-                                                                                         nss.db()));
-
-            boost::optional<Lock::CollectionLock> collLock;
-            collLock.emplace(opCtx, nss, MODE_IS);
-
-            // Views do not require a shard version check. We do not care about invalid system views
-            // for this check, only to validate if a view already exists for this namespace.
-            if (autoDb->getDb() &&
-                !CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss) &&
-                CollectionCatalog::get(opCtx)->lookupViewWithoutValidatingDurable(opCtx, nss)) {
-                return true;
-            }
-
-            auto* const csr = CollectionShardingRuntime::get(opCtx, nss);
-            const ChunkVersion collectionShardVersion = [&] {
-                auto optMetadata = csr->getCurrentMetadataIfKnown();
-                return (optMetadata && optMetadata->isSharded()) ? optMetadata->getShardVersion()
-                                                                 : ChunkVersion::UNSHARDED();
-            }();
-
-            if (requestedVersion.isWriteCompatibleWith(collectionShardVersion)) {
-                return true;
-            }
-
-            // Step 4
-
-            const bool isDropRequested =
-                !requestedVersion.isSet() && collectionShardVersion.isSet();
-
-            if (isDropRequested) {
-                if (!authoritative) {
-                    result.appendBool("need_authoritative", true);
-                    result.append("ns", nss.ns());
-                    collectionShardVersion.appendLegacyWithField(&result, "globalVersion");
-                    errmsg = "dropping needs to be authoritative";
-                    return false;
-                }
-
-                // Fall through to metadata reload below
-            } else {
-                // Not Dropping
-
-                // Step 5
-
-                const auto kTenSeconds = Milliseconds(10000);
-
-                if (requestedVersion.isOlderThan(collectionShardVersion)) {
-                    auto critSecSignal = csr->getCriticalSectionSignal(
-                        opCtx, ShardingMigrationCriticalSection::kWrite);
-                    if (critSecSignal) {
-                        collLock.reset();
-                        autoDb.reset();
-                        LOGV2(22056, "waiting till out of critical section");
-                        auto deadline = opCtx->getServiceContext()->getFastClockSource()->now() +
-                            std::min(opCtx->getRemainingMaxTimeMillis(), kTenSeconds);
-
-                        opCtx->runWithDeadline(deadline, ErrorCodes::ExceededTimeLimit, [&] {
-                            critSecSignal->wait(opCtx);
-                        });
-                    }
-
-                    errmsg = str::stream() << "shard global version for collection is higher "
-                                           << "than trying to set to '" << nss.ns() << "'";
-                    result.append("ns", nss.ns());
-                    requestedVersion.appendLegacyWithField(&result, "version");
-                    collectionShardVersion.appendLegacyWithField(&result, "globalVersion");
-                    result.appendBool("reloadConfig", true);
-                    return false;
-                }
-
-                if (!collectionShardVersion.isSet() && !authoritative) {
-                    // Needed b/c when the last chunk is moved off a shard, the version gets reset
-                    // to zero, which should require a reload.
-                    auto critSecSignal = csr->getCriticalSectionSignal(
-                        opCtx, ShardingMigrationCriticalSection::kWrite);
-                    if (critSecSignal) {
-                        collLock.reset();
-                        autoDb.reset();
-                        LOGV2(22057, "waiting till out of critical section");
-
-                        auto deadline = opCtx->getServiceContext()->getFastClockSource()->now() +
-                            std::min(opCtx->getRemainingMaxTimeMillis(), kTenSeconds);
-
-                        opCtx->runWithDeadline(deadline, ErrorCodes::ExceededTimeLimit, [&] {
-                            critSecSignal->wait(opCtx);
-                        });
-                    }
-
-                    // need authoritative for first look
-                    result.append("ns", nss.ns());
-                    result.appendBool("need_authoritative", true);
-                    errmsg = str::stream() << "first time for collection '" << nss.ns() << "'";
-                    return false;
-                }
-
-                // Fall through to metadata reload below
-            }
-        }
-
-        // Step 6
-
-        const auto status = [&] {
-            try {
-                // TODO (SERVER-50812) remove this if-else: just call onShardVersionMismatch
-                if (requestedVersion == ChunkVersion::UNSHARDED()) {
-                    forceShardFilteringMetadataRefresh(opCtx, nss);
-                } else {
-                    onShardVersionMismatch(opCtx, nss, requestedVersion);
-                }
-            } catch (const DBException& ex) {
-                return ex.toStatus();
-            }
-            return Status::OK();
-        }();
-
-        {
-            // Avoid using AutoGetCollection() as it returns the InvalidViewDefinition error code
-            // if an invalid view is in the 'system.views' collection.
-            AutoGetDb autoDb(opCtx, nss.db(), MODE_IS);
-            Lock::CollectionLock collLock(opCtx, nss, MODE_IS);
-
-            const ChunkVersion currVersion = [&] {
-                auto* const csr = CollectionShardingRuntime::get(opCtx, nss);
-                auto optMetadata = csr->getCurrentMetadataIfKnown();
-                return (optMetadata && optMetadata->isSharded()) ? optMetadata->getShardVersion()
-                                                                 : ChunkVersion::UNSHARDED();
-            }();
-
-            if (!status.isOK()) {
-                // The reload itself was interrupted or confused here
-                LOGV2_WARNING(
-                    22058,
-                    "Could not refresh metadata for the namespace {namespace} with the requested "
-                    "shard version {requestedShardVersion}; the current shard version is "
-                    "{currentShardVersion}: {error}",
-                    "Could not refresh metadata",
-                    "namespace"_attr = nss.ns(),
-                    "requestedShardVersion"_attr = requestedVersion,
-                    "currentShardVersion"_attr = currVersion,
-                    "error"_attr = redact(status));
-
-                result.append("ns", nss.ns());
-                status.serializeErrorToBSON(&result);
-                requestedVersion.appendLegacyWithField(&result, "version");
-                currVersion.appendLegacyWithField(&result, "globalVersion");
-                result.appendBool("reloadConfig", true);
-
-                return false;
-            } else if (!requestedVersion.isWriteCompatibleWith(currVersion)) {
-                // We reloaded a version that doesn't match the version mongos was trying to
-                // set.
-                static Occasionally sampler;
-                if (sampler.tick()) {
-                    LOGV2_WARNING(
-                        22059,
-                        "Requested shard version differs from the authoritative (current) shard "
-                        "version for the namespace {namespace}; the requested version is "
-                        "{requestedShardVersion}, but the current version is "
-                        "{currentShardVersion}",
-                        "Requested shard version differs from the authoritative (current) shard "
-                        "version for this namespace",
-                        "namespace"_attr = nss.ns(),
-                        "requestedShardVersion"_attr = requestedVersion,
-                        "currentShardVersion"_attr = currVersion);
-                }
-
-                // WARNING: the exact fields below are important for compatibility with mongos
-                // version reload.
-
-                result.append("ns", nss.ns());
-                currVersion.appendLegacyWithField(&result, "globalVersion");
-
-                // If this was a reset of a collection or the last chunk moved out, inform mongos to
-                // do a full reload.
-                if (currVersion.epoch() != requestedVersion.epoch() || !currVersion.isSet()) {
-                    result.appendBool("reloadConfig", true);
-                    // Zero-version also needed to trigger full mongos reload, sadly
-                    // TODO: Make this saner, and less impactful (full reload on last chunk is bad)
-                    ChunkVersion::UNSHARDED().appendLegacyWithField(&result, "version");
-                    // For debugging
-                    requestedVersion.appendLegacyWithField(&result, "origVersion");
-                } else {
-                    requestedVersion.appendLegacyWithField(&result, "version");
-                }
-
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-} setShardVersionCmd;
-
-}  // namespace
-}  // namespace mongo
diff --git a/src/mongo/db/s/shard_key_index_util.cpp b/src/mongo/db/s/shard_key_index_util.cpp
index 9b3b6371a4a..1cdd4f99008 100644
--- a/src/mongo/db/s/shard_key_index_util.cpp
+++ b/src/mongo/db/s/shard_key_index_util.cpp
@@ -48,7 +48,8 @@ boost::optional<ShardKeyIndex> _findShardKeyPrefixedIndex(
     const IndexCatalog* indexCatalog,
     const boost::optional<std::string>& excludeName,
     const BSONObj& shardKey,
-    bool requireSingleKey) {
+    bool requireSingleKey,
+    std::string* errMsg = nullptr) {
     if (collection->isClustered() &&
         clustered_util::matchesClusterKey(shardKey, collection->getClusteredInfo())) {
         auto clusteredIndexSpec = collection->getClusteredInfo()->getIndexSpec();
@@ -67,7 +68,8 @@ boost::optional<ShardKeyIndex> _findShardKeyPrefixedIndex(
             continue;
         }
 
-        if (isCompatibleWithShardKey(opCtx, collection, indexEntry, shardKey, requireSingleKey)) {
+        if (isCompatibleWithShardKey(
+                opCtx, collection, indexEntry, shardKey, requireSingleKey, errMsg)) {
             if (!indexEntry->isMultikey(opCtx, collection)) {
                 return ShardKeyIndex(indexDescriptor);
             }
@@ -108,26 +110,72 @@ bool isCompatibleWithShardKey(OperationContext* opCtx,
                               const CollectionPtr& collection,
                               const IndexCatalogEntry* indexEntry,
                               const BSONObj& shardKey,
-                              bool requireSingleKey) {
+                              bool requireSingleKey,
+                              std::string* errMsg) {
+    // Return a descriptive error for each index that shares a prefix with shardKey but
+    // cannot be used for sharding.
+    const int kErrorPartial = 0x01;
+    const int kErrorSparse = 0x02;
+    const int kErrorMultikey = 0x04;
+    const int kErrorCollation = 0x08;
+    const int kErrorNotPrefix = 0x10;
+    int reasons = 0;
+
     auto desc = indexEntry->descriptor();
     bool hasSimpleCollation = desc->collation().isEmpty();
 
-    if (desc->isPartial() || desc->isSparse()) {
-        return false;
+    if (desc->isPartial()) {
+        reasons |= kErrorPartial;
+    }
+
+    if (desc->isSparse()) {
+        reasons |= kErrorSparse;
     }
 
     if (!shardKey.isPrefixOf(desc->keyPattern(), SimpleBSONElementComparator::kInstance)) {
-        return false;
+        reasons |= kErrorNotPrefix;
     }
 
-    if (!indexEntry->isMultikey(opCtx, collection) && hasSimpleCollation) {
-        return true;
+    if (reasons == 0) {  // that is, not partial index, not sparse, and not prefix, then:
+        if (!indexEntry->isMultikey(opCtx, collection)) {
+            if (hasSimpleCollation) {
+                return true;
+            }
+        } else {
+            reasons |= kErrorMultikey;
+        }
+        if (!requireSingleKey && hasSimpleCollation) {
+            return true;
+        }
     }
 
-    if (!requireSingleKey && hasSimpleCollation) {
-        return true;
+    if (!hasSimpleCollation) {
+        reasons |= kErrorCollation;
     }
 
+    if (errMsg && reasons != 0) {
+        std::string errors = "Index " + indexEntry->descriptor()->indexName() +
+            " cannot be used for sharding because:";
+        if (reasons & kErrorPartial) {
+            errors += " Index key is partial.";
+        }
+        if (reasons & kErrorSparse) {
+            errors += " Index key is sparse.";
+        }
+        if (reasons & kErrorMultikey) {
+            errors += " Index key is multikey.";
+        }
+        if (reasons & kErrorCollation) {
+            errors += " Index has a non-simple collation.";
+        }
+        if (reasons & kErrorNotPrefix) {
+            errors += " Shard key is not a prefix of index key.";
+        }
+        if (!errMsg->empty()) {
+            *errMsg += "\n";
+        }
+        *errMsg += errors;
+    }
     return false;
 }
 
@@ -145,9 +193,10 @@ boost::optional<ShardKeyIndex> findShardKeyPrefixedIndex(OperationContext* opCtx
                                                          const CollectionPtr& collection,
                                                          const IndexCatalog* indexCatalog,
                                                          const BSONObj& shardKey,
-                                                         bool requireSingleKey) {
+                                                         bool requireSingleKey,
+                                                         std::string* errMsg) {
     return _findShardKeyPrefixedIndex(
-        opCtx, collection, indexCatalog, boost::none, shardKey, requireSingleKey);
+        opCtx, collection, indexCatalog, boost::none, shardKey, requireSingleKey, errMsg);
 }
 
 }  // namespace mongo
diff --git a/src/mongo/db/s/shard_key_index_util.h b/src/mongo/db/s/shard_key_index_util.h
index 515523b0803..c474363d8ac 100644
--- a/src/mongo/db/s/shard_key_index_util.h
+++ b/src/mongo/db/s/shard_key_index_util.h
@@ -67,12 +67,16 @@ private:
 
 /**
  * Returns true if the given index is compatible with the shard key pattern.
+ *
+ * If return value is false and errMsg is non-null, the reasons that the existing index is
+ * incompatible will be appended to errMsg.
  */
 bool isCompatibleWithShardKey(OperationContext* opCtx,
                               const CollectionPtr& collection,
                               const IndexCatalogEntry* indexEntry,
                               const BSONObj& shardKey,
-                              bool requireSingleKey);
+                              bool requireSingleKey,
+                              std::string* errMsg = nullptr);
 
 /**
  * Returns an index suitable for shard key range scans if it exists.
@@ -89,7 +93,8 @@ boost::optional<ShardKeyIndex> findShardKeyPrefixedIndex(OperationContext* opCtx
                                                          const CollectionPtr& collection,
                                                          const IndexCatalog* indexCatalog,
                                                          const BSONObj& shardKey,
-                                                         bool requireSingleKey);
+                                                         bool requireSingleKey,
+                                                         std::string* errMsg = nullptr);
 
 /**
  * Returns true if the given index name is the last remaining index that is compatible with the
diff --git a/src/mongo/db/s/shard_key_util.cpp b/src/mongo/db/s/shard_key_util.cpp
index 5a0acaeb2a4..a0363a907d4 100644
--- a/src/mongo/db/s/shard_key_util.cpp
+++ b/src/mongo/db/s/shard_key_util.cpp
@@ -107,7 +107,8 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
                               const ShardKeyPattern& shardKeyPattern,
                               const boost::optional<BSONObj>& defaultCollation,
                               bool requiresUnique,
-                              const ShardKeyValidationBehaviors& behaviors) {
+                              const ShardKeyValidationBehaviors& behaviors,
+                              std::string* errMsg) {
     auto indexes = behaviors.loadIndexes(nss);
 
     // 1.  Verify consistency with existing unique indexes
@@ -124,7 +125,9 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
 
     // 2. Check for a useful index
     bool hasUsefulIndexForKey = false;
+    std::string allReasons;
     for (const auto& idx : indexes) {
+        std::string reasons;
         BSONObj currentKey = idx["key"].embeddedObject();
         // Check 2.i. and 2.ii.
         if (!idx["sparse"].trueValue() && idx["filter"].eoo() && idx["collation"].eoo() &&
@@ -143,6 +146,19 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
                         idx["seed"].numberInt() == BSONElementHasher::DEFAULT_HASH_SEED);
             hasUsefulIndexForKey = true;
         }
+        if (idx["sparse"].trueValue()) {
+            reasons += " Index key is sparse.";
+        }
+        if (idx["filter"].ok()) {
+            reasons += " Index key is partial.";
+        }
+        if (idx["collation"].ok()) {
+            reasons += " Index has a non-simple collation.";
+        }
+        if (!reasons.empty()) {
+            allReasons =
+                " Index " + idx["name"] + " cannot be used for sharding because [" + reasons + " ]";
+        }
     }
 
     // 3. If proposed key is required to be unique, additionally check for exact match.
@@ -173,6 +189,10 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
         }
     }
 
+    if (errMsg && !allReasons.empty()) {
+        *errMsg += allReasons;
+    }
+
     if (hasUsefulIndexForKey) {
         // Check 2.iii Make sure that there is a useful, non-multikey index available.
         behaviors.verifyUsefulNonMultiKeyIndex(nss, shardKeyPattern.toBSON());
@@ -188,17 +208,19 @@ bool validateShardKeyIndexExistsOrCreateIfPossible(OperationContext* opCtx,
                                                    bool unique,
                                                    bool enforceUniquenessCheck,
                                                    const ShardKeyValidationBehaviors& behaviors) {
+    std::string errMsg;
     if (validShardKeyIndexExists(opCtx,
                                  nss,
                                  shardKeyPattern,
                                  defaultCollation,
                                  unique && enforceUniquenessCheck,
-                                 behaviors)) {
+                                 behaviors,
+                                 &errMsg)) {
         return false;
     }
 
     // 4. If no useful index, verify we can create one.
-    behaviors.verifyCanCreateShardKeyIndex(nss);
+    behaviors.verifyCanCreateShardKeyIndex(nss, &errMsg);
 
     // 5. If no useful index exists and we can create one, create one on proposedKey. Only need
     //    to call ensureIndex on primary shard, since indexes get copied to receiving shard
@@ -271,11 +293,12 @@ void ValidationBehaviorsShardCollection::verifyUsefulNonMultiKeyIndex(
     uassert(ErrorCodes::InvalidOptions, res["errmsg"].str(), success);
 }
 
-void ValidationBehaviorsShardCollection::verifyCanCreateShardKeyIndex(
-    const NamespaceString& nss) const {
+void ValidationBehaviorsShardCollection::verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+                                                                      std::string* errMsg) const {
     uassert(ErrorCodes::InvalidOptions,
-            "Please create an index that starts with the proposed shard key before "
-            "sharding the collection",
+            str::stream() << "Please create an index that starts with the proposed shard key before"
+                             " sharding the collection. "
+                          << *errMsg,
             _localClient->findOne(nss, BSONObj{}).isEmpty());
 }
 
@@ -334,11 +357,13 @@ void ValidationBehaviorsRefineShardKey::verifyUsefulNonMultiKeyIndex(
     uassertStatusOK(checkShardingIndexRes.commandStatus);
 }
 
-void ValidationBehaviorsRefineShardKey::verifyCanCreateShardKeyIndex(
-    const NamespaceString& nss) const {
-    uasserted(ErrorCodes::InvalidOptions,
-              "Please create an index that starts with the proposed shard key before "
-              "refining the shard key of the collection");
+void ValidationBehaviorsRefineShardKey::verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+                                                                     std::string* errMsg) const {
+    uasserted(
+        ErrorCodes::InvalidOptions,
+        str::stream() << "Please create an index that starts with the proposed shard key before"
+                         " sharding the collection. "
+                      << *errMsg);
 }
 
 void ValidationBehaviorsRefineShardKey::createShardKeyIndex(
diff --git a/src/mongo/db/s/shard_key_util.h b/src/mongo/db/s/shard_key_util.h
index 5d20a013bef..55905e7beb7 100644
--- a/src/mongo/db/s/shard_key_util.h
+++ b/src/mongo/db/s/shard_key_util.h
@@ -51,7 +51,8 @@ public:
     virtual void verifyUsefulNonMultiKeyIndex(const NamespaceString& nss,
                                               const BSONObj& proposedKey) const = 0;
 
-    virtual void verifyCanCreateShardKeyIndex(const NamespaceString& nss) const = 0;
+    virtual void verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+                                              std::string* errMsg) const = 0;
 
     virtual void createShardKeyIndex(const NamespaceString& nss,
                                      const BSONObj& proposedKey,
@@ -72,7 +73,8 @@ public:
     void verifyUsefulNonMultiKeyIndex(const NamespaceString& nss,
                                       const BSONObj& proposedKey) const override;
 
-    void verifyCanCreateShardKeyIndex(const NamespaceString& nss) const override;
+    void verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+                                      std::string* errMsg) const override;
 
     void createShardKeyIndex(const NamespaceString& nss,
                              const BSONObj& proposedKey,
@@ -95,7 +97,8 @@ public:
     void verifyUsefulNonMultiKeyIndex(const NamespaceString& nss,
                                       const BSONObj& proposedKey) const override;
 
-    void verifyCanCreateShardKeyIndex(const NamespaceString& nss) const override;
+    void verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+                                      std::string* errMsg) const override;
 
     void createShardKeyIndex(const NamespaceString& nss,
                              const BSONObj& proposedKey,
@@ -165,7 +168,8 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
                               const ShardKeyPattern& shardKeyPattern,
                               const boost::optional<BSONObj>& defaultCollation,
                               bool requiresUnique,
-                              const ShardKeyValidationBehaviors& behaviors);
+                              const ShardKeyValidationBehaviors& behaviors,
+                              std::string* errMsg = nullptr);
 
 void validateShardKeyIsNotEncrypted(OperationContext* opCtx,
                                     const NamespaceString& nss,
diff --git a/src/mongo/db/s/shard_metadata_util.cpp b/src/mongo/db/s/shard_metadata_util.cpp
index 1651cfc167e..e52a5e28d1a 100644
--- a/src/mongo/db/s/shard_metadata_util.cpp
+++ b/src/mongo/db/s/shard_metadata_util.cpp
@@ -27,13 +27,8 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/s/shard_metadata_util.h"
 
-#include <memory>
-
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/ops/write_ops.h"
 #include "mongo/db/s/type_shard_collection.h"
@@ -49,7 +44,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 namespace shardmetadatautil {
 namespace {
@@ -105,8 +99,9 @@ Status unsetPersistedRefreshFlags(OperationContext* opCtx,
     // Set 'refreshing' to false and update the last refreshed collection version.
     BSONObjBuilder updateBuilder;
     updateBuilder.append(ShardCollectionType::kRefreshingFieldName, false);
-    updateBuilder.appendTimestamp(ShardCollectionType::kLastRefreshedCollectionVersionFieldName,
-                                  refreshedVersion.toLong());
+    updateBuilder.appendTimestamp(
+        ShardCollectionType::kLastRefreshedCollectionMajorMinorVersionFieldName,
+        refreshedVersion.toLong());
 
     return updateShardCollectionsEntry(opCtx,
                                        BSON(ShardCollectionType::kNssFieldName << nss.ns()),
@@ -141,12 +136,11 @@ StatusWith<RefreshState> getPersistedRefreshFlags(OperationContext* opCtx,
                         entry.getRefreshing() ? *entry.getRefreshing() : true,
                         entry.getLastRefreshedCollectionVersion()
                             ? *entry.getLastRefreshedCollectionVersion()
-                            : ChunkVersion(0, 0, entry.getEpoch(), entry.getTimestamp())};
+                            : ChunkVersion({entry.getEpoch(), entry.getTimestamp()}, {0, 0})};
 }
 
 StatusWith<ShardCollectionType> readShardCollectionsEntry(OperationContext* opCtx,
                                                           const NamespaceString& nss) {
-
     try {
         DBDirectClient client(opCtx);
         FindCommandRequest findRequest{NamespaceString::kShardConfigCollectionsNamespace};
@@ -211,7 +205,8 @@ Status updateShardCollectionsEntry(OperationContext* opCtx,
     if (upsert) {
         // If upserting, this should be an update from the config server that does not have shard
         // refresh / migration inc signal information.
-        invariant(!update.hasField(ShardCollectionType::kLastRefreshedCollectionVersionFieldName));
+        invariant(!update.hasField(
+            ShardCollectionType::kLastRefreshedCollectionMajorMinorVersionFieldName));
     }
 
     try {
diff --git a/src/mongo/db/s/shard_metadata_util.h b/src/mongo/db/s/shard_metadata_util.h
index 52f043a0b9a..a23efa4b577 100644
--- a/src/mongo/db/s/shard_metadata_util.h
+++ b/src/mongo/db/s/shard_metadata_util.h
@@ -32,7 +32,7 @@
 #include <string>
 #include <vector>
 
-#include "mongo/base/status.h"
+#include "mongo/base/status_with.h"
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/oid.h"
 #include "mongo/s/chunk_version.h"
@@ -40,17 +40,11 @@
 namespace mongo {
 
 class ChunkType;
-class CollectionMetadata;
 class NamespaceString;
 class OperationContext;
 class ShardCollectionType;
 class ShardDatabaseType;
-template <typename T>
-class StatusWith;
 
-/**
- * Function helpers to locally, using a DBDirectClient, read and write sharding metadata on a shard.
- */
 namespace shardmetadatautil {
 
 /**
@@ -62,25 +56,6 @@ struct QueryAndSort {
 };
 
 /**
- * Subset of the shard's collections collection document that relates to refresh state.
- */
-struct RefreshState {
-    bool operator==(const RefreshState& other) const;
-
-    std::string toString() const;
-
-    // The current generation of the collection.
-    CollectionGeneration generation;
-
-    // Whether a refresh is currently in progress.
-    bool refreshing;
-
-    // The collection version after the last complete refresh. Indicates change if refreshing has
-    // started and finished since last loaded.
-    ChunkVersion lastRefreshedCollectionVersion;
-};
-
-/**
  * Returns the query needed to find incremental changes to the chunks collection on a shard server.
  *
  * The query has to find all the chunks $gte the current max version. Currently, any splits, merges
@@ -115,6 +90,26 @@ Status unsetPersistedRefreshFlags(OperationContext* opCtx,
                                   const ChunkVersion& refreshedVersion);
 
 /**
+ * Represents a subset of a collection's config.cache.collections entry that relates to refresh
+ * state.
+ */
+struct RefreshState {
+    bool operator==(const RefreshState& other) const;
+
+    std::string toString() const;
+
+    // The current generation of the collection.
+    CollectionGeneration generation;
+
+    // Whether a refresh is currently in progress.
+    bool refreshing;
+
+    // The collection version after the last complete refresh. Indicates change if refreshing has
+    // started and finished since last loaded.
+    ChunkVersion lastRefreshedCollectionVersion;
+};
+
+/**
  * Reads the persisted refresh signal for 'nss' and returns those settings.
  */
 StatusWith<RefreshState> getPersistedRefreshFlags(OperationContext* opCtx,
diff --git a/src/mongo/db/s/shard_metadata_util_test.cpp b/src/mongo/db/s/shard_metadata_util_test.cpp
index 6bad5d66ac1..af35cf373e8 100644
--- a/src/mongo/db/s/shard_metadata_util_test.cpp
+++ b/src/mongo/db/s/shard_metadata_util_test.cpp
@@ -27,14 +27,10 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/s/shard_metadata_util.h"
-
-#include "mongo/base/status.h"
 #include "mongo/client/remote_command_targeter_mock.h"
 #include "mongo/db/commands.h"
 #include "mongo/db/dbdirectclient.h"
+#include "mongo/db/s/shard_metadata_util.h"
 #include "mongo/db/s/shard_server_test_fixture.h"
 #include "mongo/db/s/type_shard_collection.h"
 #include "mongo/rpc/get_status_from_command_result.h"
@@ -159,7 +155,7 @@ struct ShardMetadataUtilTest : public ShardServerTestFixture {
         }
     }
 
-    ChunkVersion maxCollVersion{0, 0, OID::gen(), Timestamp(1, 1)};
+    ChunkVersion maxCollVersion{{OID::gen(), Timestamp(1, 1)}, {0, 0}};
     const KeyPattern keyPattern{BSON("a" << 1)};
     const BSONObj defaultCollation{BSON("locale"
                                         << "fr_CA")};
@@ -216,7 +212,7 @@ TEST_F(ShardMetadataUtilTest, PersistedRefreshSignalStartAndFinish) {
     ASSERT(state.generation.isSameCollection(maxCollVersion));
     ASSERT_EQUALS(state.refreshing, true);
     ASSERT_EQUALS(state.lastRefreshedCollectionVersion,
-                  ChunkVersion(0, 0, maxCollVersion.epoch(), maxCollVersion.getTimestamp()));
+                  ChunkVersion({maxCollVersion.epoch(), maxCollVersion.getTimestamp()}, {0, 0}));
 
     // Signal refresh finish
     ASSERT_OK(unsetPersistedRefreshFlags(operationContext(), kNss, maxCollVersion));
@@ -235,7 +231,7 @@ TEST_F(ShardMetadataUtilTest, WriteAndReadChunks) {
 
     // read all the chunks
     QueryAndSort allChunkDiff = createShardChunkDiffQuery(
-        ChunkVersion(0, 0, maxCollVersion.epoch(), maxCollVersion.getTimestamp()));
+        ChunkVersion({maxCollVersion.epoch(), maxCollVersion.getTimestamp()}, {0, 0}));
     std::vector<ChunkType> readChunks = assertGet(readShardChunks(operationContext(),
                                                                   kNss,
                                                                   allChunkDiff.query,
diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
index e344e20b5e6..93a685475d4 100644
--- a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
+++ b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
@@ -27,17 +27,8 @@
  *    it in the license file.
  */
 
-
-#define LOGV2_FOR_CATALOG_REFRESH(ID, DLEVEL, MESSAGE, ...) \
-    LOGV2_DEBUG_OPTIONS(                                    \
-        ID, DLEVEL, {logv2::LogComponent::kShardingCatalogRefresh}, MESSAGE, ##__VA_ARGS__)
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/s/shard_server_catalog_cache_loader.h"
 
-#include <memory>
-
 #include "mongo/db/catalog/rename_collection.h"
 #include "mongo/db/client.h"
 #include "mongo/db/db_raii.h"
@@ -57,7 +48,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 
 using namespace shardmetadatautil;
@@ -66,7 +56,6 @@ using CollectionAndChangedChunks = CatalogCacheLoader::CollectionAndChangedChunk
 
 namespace {
 
-MONGO_FAIL_POINT_DEFINE(hangPersistCollectionAndChangedChunksAfterDropChunks);
 MONGO_FAIL_POINT_DEFINE(hangCollectionFlush);
 
 AtomicWord<unsigned long long> taskIdGenerator{0};
@@ -85,11 +74,6 @@ void dropChunksIfEpochChanged(OperationContext* opCtx,
     // Drop the 'config.cache.chunks.<ns>' collection
     dropChunks(opCtx, nss);
 
-    if (MONGO_unlikely(hangPersistCollectionAndChangedChunksAfterDropChunks.shouldFail())) {
-        LOGV2(22093, "Hit hangPersistCollectionAndChangedChunksAfterDropChunks failpoint");
-        hangPersistCollectionAndChangedChunksAfterDropChunks.pauseWhileSet(opCtx);
-    }
-
     LOGV2(5990400,
           "Dropped persisted chunk metadata due to epoch change",
           "namespace"_attr = nss,
@@ -131,7 +115,6 @@ Status persistCollectionAndChangedChunks(OperationContext* opCtx,
         return status;
     }
 
-    // Update the chunk metadata.
     try {
         dropChunksIfEpochChanged(opCtx, maxLoaderVersion, collAndChunks.epoch, nss);
     } catch (const DBException& ex) {
@@ -211,13 +194,13 @@ ChunkVersion getPersistedMaxChunkVersion(OperationContext* opCtx, const Namespac
         return ChunkVersion::UNSHARDED();
     }
 
-    auto statusWithChunk = shardmetadatautil::readShardChunks(opCtx,
-                                                              nss,
-                                                              BSONObj(),
-                                                              BSON(ChunkType::lastmod() << -1),
-                                                              1LL,
-                                                              cachedCollection.getEpoch(),
-                                                              cachedCollection.getTimestamp());
+    auto statusWithChunk = readShardChunks(opCtx,
+                                           nss,
+                                           BSONObj(),
+                                           BSON(ChunkType::lastmod() << -1),
+                                           1LL,
+                                           cachedCollection.getEpoch(),
+                                           cachedCollection.getTimestamp());
     uassertStatusOKWithContext(
         statusWithChunk,
         str::stream() << "Failed to read highest version persisted chunk for collection '"
@@ -247,11 +230,9 @@ CollectionAndChangedChunks getPersistedMetadataSinceVersion(OperationContext* op
     // If the epochs are the same we can safely take the timestamp from the shard coll entry.
     ChunkVersion startingVersion = version.isSameCollection({shardCollectionEntry.getEpoch(),
                                                              shardCollectionEntry.getTimestamp()})
-        ? ChunkVersion(version.majorVersion(),
-                       version.minorVersion(),
-                       version.epoch(),
-                       shardCollectionEntry.getTimestamp())
-        : ChunkVersion(0, 0, shardCollectionEntry.getEpoch(), shardCollectionEntry.getTimestamp());
+        ? version
+        : ChunkVersion({shardCollectionEntry.getEpoch(), shardCollectionEntry.getTimestamp()},
+                       {0, 0});
 
     QueryAndSort diff = createShardChunkDiffQuery(startingVersion);
 
@@ -647,7 +628,14 @@ StatusWith<CollectionAndChangedChunks> ShardServerCatalogCacheLoader::_runSecond
     const NamespaceString& nss,
     const ChunkVersion& catalogCacheSinceVersion) {
 
+    Timer t;
     forcePrimaryCollectionRefreshAndWaitForReplication(opCtx, nss);
+    LOGV2_FOR_CATALOG_REFRESH(5965800,
+                              2,
+                              "Cache loader on secondary successfully waited for primary refresh "
+                              "and replication of collection",
+                              "namespace"_attr = nss,
+                              "duration"_attr = Milliseconds(t.millis()));
 
     // Read the local metadata.
 
@@ -776,7 +764,14 @@ ShardServerCatalogCacheLoader::_schedulePrimaryGetChunksSince(
 
 StatusWith<DatabaseType> ShardServerCatalogCacheLoader::_runSecondaryGetDatabase(
     OperationContext* opCtx, StringData dbName) {
+    Timer t;
     forcePrimaryDatabaseRefreshAndWaitForReplication(opCtx, dbName);
+    LOGV2_FOR_CATALOG_REFRESH(5965801,
+                              2,
+                              "Cache loader on secondary successfully waited for primary refresh "
+                              "and replication of database",
+                              "db"_attr = dbName,
+                              "duration"_attr = Milliseconds(t.millis()));
     return readShardDatabasesEntry(opCtx, dbName);
 }
 
@@ -1280,16 +1275,7 @@ ShardServerCatalogCacheLoader::CollAndChunkTask::CollAndChunkTask(
     if (statusWithCollectionAndChangedChunks.isOK()) {
         collectionAndChangedChunks = std::move(statusWithCollectionAndChangedChunks.getValue());
         invariant(!collectionAndChangedChunks->changedChunks.empty());
-        const auto highestVersion = collectionAndChangedChunks->changedChunks.back().getVersion();
-        // Note that due to the way Phase 1 of the FCV upgrade writes timestamps to chunks
-        // (non-atomically), it is possible that chunks exist with timestamps, but the
-        // corresponding config.collections entry doesn't. In this case, the chunks timestamp
-        // should be ignored when computing the max query version and we should use the
-        // timestamp that comes from config.collections.
-        maxQueryVersion = ChunkVersion(highestVersion.majorVersion(),
-                                       highestVersion.minorVersion(),
-                                       highestVersion.epoch(),
-                                       collectionAndChangedChunks->timestamp);
+        maxQueryVersion = collectionAndChangedChunks->changedChunks.back().getVersion();
     } else {
         invariant(statusWithCollectionAndChangedChunks == ErrorCodes::NamespaceNotFound);
         dropped = true;
diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
index 9f2f1ddf8d0..a111b9bf592 100644
--- a/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
+++ b/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include <boost/optional/optional_io.hpp>
 
 #include "mongo/db/s/shard_server_catalog_cache_loader.h"
@@ -203,7 +201,7 @@ CollectionType ShardServerCatalogCacheLoaderTest::makeCollectionType(
 
 std::pair<CollectionType, vector<ChunkType>>
 ShardServerCatalogCacheLoaderTest::setUpChunkLoaderWithFiveChunks() {
-    ChunkVersion collectionVersion(1, 0, OID::gen(), Timestamp(1, 1));
+    ChunkVersion collectionVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
 
     CollectionType collectionType = makeCollectionType(collectionVersion);
     vector<ChunkType> chunks = makeFiveChunks(collectionVersion);
@@ -371,7 +369,7 @@ TEST_F(ShardServerCatalogCacheLoaderTest, PrimaryLoadFromShardedAndFindNewEpoch)
 
     // Then refresh again and find that the collection has been dropped and recreated.
 
-    ChunkVersion collVersionWithNewEpoch(1, 0, OID::gen(), Timestamp(2, 0));
+    ChunkVersion collVersionWithNewEpoch({OID::gen(), Timestamp(2, 0)}, {1, 0});
     CollectionType collectionTypeWithNewEpoch = makeCollectionType(collVersionWithNewEpoch);
     vector<ChunkType> chunksWithNewEpoch = makeFiveChunks(collVersionWithNewEpoch);
     _remoteLoaderMock->setCollectionRefreshReturnValue(collectionTypeWithNewEpoch);
@@ -398,7 +396,7 @@ TEST_F(ShardServerCatalogCacheLoaderTest, PrimaryLoadFromShardedAndFindMixedChun
     // Then refresh again and retrieve chunks from the config server that have mixed epoches, like
     // as if the chunks read yielded around a drop and recreate of the collection.
 
-    ChunkVersion collVersionWithNewEpoch(1, 0, OID::gen(), Timestamp(2, 0));
+    ChunkVersion collVersionWithNewEpoch({OID::gen(), Timestamp(2, 0)}, {1, 0});
     CollectionType collectionTypeWithNewEpoch = makeCollectionType(collVersionWithNewEpoch);
     vector<ChunkType> chunksWithNewEpoch = makeFiveChunks(collVersionWithNewEpoch);
     vector<ChunkType> mixedChunks;
@@ -441,7 +439,7 @@ TEST_F(ShardServerCatalogCacheLoaderTest, PrimaryLoadFromShardedAndFindMixedChun
 }
 
 TEST_F(ShardServerCatalogCacheLoaderTest, TimeseriesFieldsAreProperlyPropagatedOnSSCCL) {
-    ChunkVersion collectionVersion(1, 0, OID::gen(), Timestamp(1, 1));
+    ChunkVersion collectionVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
 
     CollectionType collectionType = makeCollectionType(collectionVersion);
     vector<ChunkType> chunks = makeFiveChunks(collectionVersion);
@@ -483,7 +481,7 @@ TEST_F(ShardServerCatalogCacheLoaderTest, TimeseriesFieldsAreProperlyPropagatedO
 }
 
 void ShardServerCatalogCacheLoaderTest::refreshCollectionEpochOnRemoteLoader() {
-    ChunkVersion collectionVersion(1, 2, OID::gen(), Timestamp(1, 1));
+    ChunkVersion collectionVersion({OID::gen(), Timestamp(1, 1)}, {1, 2});
     CollectionType collectionType = makeCollectionType(collectionVersion);
     vector<ChunkType> chunks = makeFiveChunks(collectionVersion);
     _remoteLoaderMock->setCollectionRefreshReturnValue(collectionType);
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
index add2ac6f728..dca4b07d7cf 100644
--- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
+++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
@@ -50,8 +50,8 @@ constexpr auto kCountFailed = "countFailed";
 constexpr auto kCountCanceled = "countCanceled";
 constexpr auto kLastOpEndingChunkImbalance = "lastOpEndingChunkImbalance";
 constexpr auto kActive = "active";
-constexpr auto kDocumentsProcessed = "documentsProcessed";
-constexpr auto kBytesWritten = "bytesWritten";
+constexpr auto kDocumentsCopied = "documentsCopied";
+constexpr auto kBytesCopied = "bytesCopied";
 constexpr auto kOplogEntriesFetched = "oplogEntriesFetched";
 constexpr auto kOplogEntriesApplied = "oplogEntriesApplied";
 constexpr auto kInsertsApplied = "insertsApplied";
@@ -240,8 +240,8 @@ void ShardingDataTransformCumulativeMetrics::reportForServerStatus(BSONObjBuilde
 
 void ShardingDataTransformCumulativeMetrics::reportActive(BSONObjBuilder* bob) const {
     BSONObjBuilder s(bob->subobjStart(kActive));
-    s.append(kDocumentsProcessed, _documentsProcessed.load());
-    s.append(kBytesWritten, _bytesWritten.load());
+    s.append(kDocumentsCopied, _documentsCopied.load());
+    s.append(kBytesCopied, _bytesCopied.load());
     s.append(kOplogEntriesFetched, _oplogEntriesFetched.load());
     s.append(kOplogEntriesApplied, _oplogEntriesApplied.load());
     s.append(kInsertsApplied, _insertsApplied.load());
@@ -422,8 +422,8 @@ const char* ShardingDataTransformCumulativeMetrics::fieldNameFor(
 void ShardingDataTransformCumulativeMetrics::onInsertsDuringCloning(
     int64_t count, int64_t bytes, const Milliseconds& elapsedTime) {
     _collectionCloningTotalLocalBatchInserts.fetchAndAdd(1);
-    _documentsProcessed.fetchAndAdd(count);
-    _bytesWritten.fetchAndAdd(bytes);
+    _documentsCopied.fetchAndAdd(count);
+    _bytesCopied.fetchAndAdd(bytes);
     _collectionCloningTotalLocalInsertTimeMillis.fetchAndAdd(
         durationCount<Milliseconds>(elapsedTime));
 }
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
index dfd8c989628..5e6949cf001 100644
--- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
+++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
@@ -191,8 +191,8 @@ private:
     AtomicWord<int64_t> _totalBatchRetrievedDuringCloneMillis{0};
     AtomicWord<int64_t> _oplogBatchApplied{0};
     AtomicWord<int64_t> _oplogBatchAppliedMillis{0};
-    AtomicWord<int64_t> _documentsProcessed{0};
-    AtomicWord<int64_t> _bytesWritten{0};
+    AtomicWord<int64_t> _documentsCopied{0};
+    AtomicWord<int64_t> _bytesCopied{0};
 
     AtomicWord<int64_t> _lastOpEndingChunkImbalance{0};
     AtomicWord<int64_t> _readsDuringCriticalSection{0};
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp b/src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp
index 99a221b10ba..5d6603c954c 100644
--- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp
+++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp
@@ -330,8 +330,8 @@ TEST_F(ShardingDataTransformCumulativeMetricsTest, ReportContainsInsertsDuringCl
     ASSERT_EQ(latencySection.getIntField("collectionCloningTotalLocalInsertTimeMillis"), 0);
 
     auto activeSection = getActiveSection(_cumulativeMetrics);
-    ASSERT_EQ(activeSection.getIntField("documentsProcessed"), 0);
-    ASSERT_EQ(activeSection.getIntField("bytesWritten"), 0);
+    ASSERT_EQ(activeSection.getIntField("documentsCopied"), 0);
+    ASSERT_EQ(activeSection.getIntField("bytesCopied"), 0);
 
     _cumulativeMetrics.onInsertsDuringCloning(140, 20763, Milliseconds(15));
 
@@ -340,8 +340,8 @@ TEST_F(ShardingDataTransformCumulativeMetricsTest, ReportContainsInsertsDuringCl
     ASSERT_EQ(latencySection.getIntField("collectionCloningTotalLocalInsertTimeMillis"), 15);
 
     activeSection = getActiveSection(_cumulativeMetrics);
-    ASSERT_EQ(activeSection.getIntField("documentsProcessed"), 140);
-    ASSERT_EQ(activeSection.getIntField("bytesWritten"), 20763);
+    ASSERT_EQ(activeSection.getIntField("documentsCopied"), 140);
+    ASSERT_EQ(activeSection.getIntField("bytesCopied"), 20763);
 }
 
 TEST_F(ShardingDataTransformCumulativeMetricsTest, ReportContainsInsertsDuringFetching) {
diff --git a/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp b/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
index e74155e374b..807195c689d 100644
--- a/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
+++ b/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
@@ -85,11 +85,11 @@ ShardingDataTransformInstanceMetrics::ShardingDataTransformInstanceMetrics(
       _originalCommand{std::move(originalCommand)},
       _sourceNs{std::move(sourceNs)},
       _role{role},
+      _startTime{startTime},
       _clockSource{clockSource},
       _observer{std::move(observer)},
       _cumulativeMetrics{cumulativeMetrics},
       _deregister{_cumulativeMetrics->registerInstanceMetrics(_observer.get())},
-      _startTime{startTime},
       _copyingStartTime{kNoDate},
       _copyingEndTime{kNoDate},
       _approxDocumentsToCopy{0},
@@ -118,7 +118,8 @@ ShardingDataTransformInstanceMetrics::~ShardingDataTransformInstanceMetrics() {
 Milliseconds ShardingDataTransformInstanceMetrics::getHighEstimateRemainingTimeMillis() const {
     switch (_role) {
         case Role::kRecipient: {
-            auto estimate = estimateRemainingRecipientTime(_applyingStartTime.load() != kNoDate,
+            auto estimate =
+                resharding::estimateRemainingRecipientTime(_applyingStartTime.load() != kNoDate,
                                                            _bytesCopied.load(),
                                                            _approxBytesToCopy.load(),
                                                            getCopyingElapsedTimeSecs(),
diff --git a/src/mongo/db/s/sharding_data_transform_instance_metrics.h b/src/mongo/db/s/sharding_data_transform_instance_metrics.h
index 6c508bbafd8..dbf81eabffb 100644
--- a/src/mongo/db/s/sharding_data_transform_instance_metrics.h
+++ b/src/mongo/db/s/sharding_data_transform_instance_metrics.h
@@ -164,13 +164,13 @@ protected:
         "allShardsHighestRemainingOperationTimeEstimatedSecs";
 
 private:
+    const Date_t _startTime;
+
     ClockSource* _clockSource;
     ObserverPtr _observer;
     ShardingDataTransformCumulativeMetrics* _cumulativeMetrics;
     ShardingDataTransformCumulativeMetrics::DeregistrationFunction _deregister;
 
-    const Date_t _startTime;
-
     AtomicWord<Date_t> _copyingStartTime;
     AtomicWord<Date_t> _copyingEndTime;
     AtomicWord<int32_t> _approxDocumentsToCopy;
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.h b/src/mongo/db/s/sharding_ddl_coordinator.h
index 5972c7ce9e6..51dcc023f60 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.h
+++ b/src/mongo/db/s/sharding_ddl_coordinator.h
@@ -40,8 +40,11 @@
 #include "mongo/db/s/sharding_ddl_coordinator_gen.h"
 #include "mongo/db/s/sharding_ddl_coordinator_service.h"
 #include "mongo/executor/task_executor.h"
+#include "mongo/logv2/log.h"
 #include "mongo/util/future.h"
 
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+
 namespace mongo {
 
 ShardingDDLCoordinatorMetadata extractShardingDDLCoordinatorMetadata(const BSONObj& coorDoc);
@@ -117,74 +120,6 @@ protected:
 
     virtual ShardingDDLCoordinatorMetadata const& metadata() const = 0;
 
-    template <typename StateDoc>
-    StateDoc _insertStateDocument(StateDoc&& newDoc) {
-        auto copyMetadata = newDoc.getShardingDDLCoordinatorMetadata();
-        copyMetadata.setRecoveredFromDisk(true);
-        newDoc.setShardingDDLCoordinatorMetadata(copyMetadata);
-
-        auto opCtx = cc().makeOperationContext();
-        PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
-        try {
-            store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout);
-        } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) {
-            // A series of step-up and step-down events can cause a node to try and insert the
-            // document when it has already been persisted locally, but we must still wait for
-            // majority commit.
-            const auto replCoord = repl::ReplicationCoordinator::get(opCtx.get());
-            const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime();
-            WaitForMajorityService::get(opCtx->getServiceContext())
-                .waitUntilMajority(lastLocalOpTime, opCtx.get()->getCancellationToken())
-                .get(opCtx.get());
-        }
-
-        return std::move(newDoc);
-    }
-
-    template <typename StateDoc>
-    StateDoc _updateStateDocument(OperationContext* opCtx, StateDoc&& newDoc) {
-        PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
-        invariant(newDoc.getShardingDDLCoordinatorMetadata().getRecoveredFromDisk());
-        store.update(opCtx,
-                     BSON(StateDoc::kIdFieldName << newDoc.getId().toBSON()),
-                     newDoc.toBSON(),
-                     WriteConcerns::kMajorityWriteConcernNoTimeout);
-        return std::move(newDoc);
-    }
-
-    // lazily acqiure Logical Session ID and a txn number
-    template <typename StateDoc>
-    StateDoc _updateSession(OperationContext* opCtx, StateDoc const& doc) {
-        auto newShardingDDLCoordinatorMetadata = doc.getShardingDDLCoordinatorMetadata();
-
-        auto optSession = newShardingDDLCoordinatorMetadata.getSession();
-        if (optSession) {
-            auto txnNumber = optSession->getTxnNumber();
-            optSession->setTxnNumber(++txnNumber);
-            newShardingDDLCoordinatorMetadata.setSession(optSession);
-        } else {
-            auto session = InternalSessionPool::get(opCtx)->acquireSystemSession();
-            newShardingDDLCoordinatorMetadata.setSession(
-                ShardingDDLSession(session.getSessionId(), session.getTxnNumber()));
-        }
-
-        StateDoc newDoc(doc);
-        newDoc.setShardingDDLCoordinatorMetadata(std::move(newShardingDDLCoordinatorMetadata));
-        return _updateStateDocument(opCtx, std::move(newDoc));
-    }
-
-    template <typename StateDoc>
-    OperationSessionInfo getCurrentSession(StateDoc const& doc) const {
-        invariant(doc.getShardingDDLCoordinatorMetadata().getSession());
-        ShardingDDLSession shardingDDLSession =
-            *doc.getShardingDDLCoordinatorMetadata().getSession();
-
-        OperationSessionInfo osi;
-        osi.setSessionId(shardingDDLSession.getLsid());
-        osi.setTxnNumber(shardingDDLSession.getTxnNumber());
-        return osi;
-    }
-
     /*
      * Performs a noop write on all shards and the configsvr using the sessionId and txnNumber
      * specified in 'osi'.
@@ -237,4 +172,204 @@ private:
     std::stack<DistLockManager::ScopedLock> _scopedLocks;
 };
 
+template <class StateDoc>
+class ShardingDDLCoordinatorImpl : public ShardingDDLCoordinator {
+public:
+    boost::optional<BSONObj> reportForCurrentOp(
+        MongoProcessInterface::CurrentOpConnectionsMode connMode,
+        MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override {
+        return basicReportBuilder().obj();
+    }
+
+protected:
+    ShardingDDLCoordinatorImpl(ShardingDDLCoordinatorService* service,
+                               const std::string& name,
+                               const BSONObj& initialStateDoc)
+        : ShardingDDLCoordinator(service, initialStateDoc),
+          _coordinatorName(name),
+          _initialState(initialStateDoc.getOwned()),
+          _doc(StateDoc::parse(IDLParserErrorContext("CoordinatorDocument"), _initialState)) {}
+
+    ShardingDDLCoordinatorMetadata const& metadata() const override {
+        return _doc.getShardingDDLCoordinatorMetadata();
+    }
+
+
+    virtual void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {};
+
+    virtual BSONObjBuilder basicReportBuilder() const noexcept {
+        BSONObjBuilder bob;
+
+        // Append static info
+        bob.append("type", "op");
+        bob.append("ns", nss().toString());
+        bob.append("desc", _coordinatorName);
+        bob.append("op", "command");
+        bob.append("active", true);
+
+        // Create command description
+        BSONObjBuilder cmdInfoBuilder;
+        {
+            stdx::lock_guard lk{_docMutex};
+            if (const auto& optComment = getForwardableOpMetadata().getComment()) {
+                cmdInfoBuilder.append(optComment.get().firstElement());
+            }
+        }
+        appendCommandInfo(&cmdInfoBuilder);
+        bob.append("command", cmdInfoBuilder.obj());
+
+        return bob;
+    }
+
+    const std::string _coordinatorName;
+    const BSONObj _initialState;
+    mutable Mutex _docMutex = MONGO_MAKE_LATCH("ShardingDDLCoordinator::_docMutex");
+    StateDoc _doc;
+};
+
+template <class StateDoc, class Phase>
+class RecoverableShardingDDLCoordinator : public ShardingDDLCoordinatorImpl<StateDoc> {
+protected:
+    using ShardingDDLCoordinatorImpl<StateDoc>::_doc;
+    using ShardingDDLCoordinatorImpl<StateDoc>::_docMutex;
+
+    RecoverableShardingDDLCoordinator(ShardingDDLCoordinatorService* service,
+                                      const std::string& name,
+                                      const BSONObj& initialStateDoc)
+        : ShardingDDLCoordinatorImpl<StateDoc>(service, name, initialStateDoc) {}
+
+    virtual StringData serializePhase(const Phase& phase) const = 0;
+
+    template <typename Func>
+    auto _executePhase(const Phase& newPhase, Func&& func) {
+        return [=] {
+            const auto& currPhase = _doc.getPhase();
+
+            if (currPhase > newPhase) {
+                // Do not execute this phase if we already reached a subsequent one.
+                return;
+            }
+            if (currPhase < newPhase) {
+                // Persist the new phase if this is the first time we are executing it.
+                _enterPhase(newPhase);
+            }
+            return func();
+        };
+    }
+
+    void _enterPhase(const Phase& newPhase) {
+        auto newDoc = [&] {
+            stdx::lock_guard lk{_docMutex};
+            return _doc;
+        }();
+
+        newDoc.setPhase(newPhase);
+
+        LOGV2_DEBUG(5390501,
+                    2,
+                    "DDL coordinator phase transition",
+                    "coordinatorId"_attr = _doc.getId(),
+                    "newPhase"_attr = serializePhase(newDoc.getPhase()),
+                    "oldPhase"_attr = serializePhase(_doc.getPhase()));
+
+        auto opCtx = cc().makeOperationContext();
+
+        if (_doc.getPhase() == Phase::kUnset) {
+            _insertStateDocument(opCtx.get(), std::move(newDoc));
+        } else {
+            _updateStateDocument(opCtx.get(), std::move(newDoc));
+        }
+    }
+
+    BSONObjBuilder basicReportBuilder() const noexcept override {
+        auto baseReportBuilder = ShardingDDLCoordinatorImpl<StateDoc>::basicReportBuilder();
+
+        const auto currPhase = [&]() {
+            stdx::lock_guard l{_docMutex};
+            return _doc.getPhase();
+        }();
+
+        baseReportBuilder.append("currentPhase", serializePhase(currPhase));
+        return baseReportBuilder;
+    }
+
+    void _insertStateDocument(OperationContext* opCtx, StateDoc&& newDoc) {
+        auto copyMetadata = newDoc.getShardingDDLCoordinatorMetadata();
+        copyMetadata.setRecoveredFromDisk(true);
+        newDoc.setShardingDDLCoordinatorMetadata(copyMetadata);
+
+        PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
+        try {
+            store.add(opCtx, newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout);
+        } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) {
+            // A series of step-up and step-down events can cause a node to try and insert the
+            // document when it has already been persisted locally, but we must still wait for
+            // majority commit.
+            const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+            const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime();
+            WaitForMajorityService::get(opCtx->getServiceContext())
+                .waitUntilMajority(lastLocalOpTime, opCtx->getCancellationToken())
+                .get(opCtx);
+        }
+
+        {
+            stdx::lock_guard lk{_docMutex};
+            _doc = std::move(newDoc);
+        }
+    }
+
+    void _updateStateDocument(OperationContext* opCtx, StateDoc&& newDoc) {
+        PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
+        invariant(newDoc.getShardingDDLCoordinatorMetadata().getRecoveredFromDisk());
+        store.update(opCtx,
+                     BSON(StateDoc::kIdFieldName << newDoc.getId().toBSON()),
+                     newDoc.toBSON(),
+                     WriteConcerns::kMajorityWriteConcernNoTimeout);
+
+        {
+            stdx::lock_guard lk{_docMutex};
+            _doc = std::move(newDoc);
+        }
+    }
+
+    // lazily acqiure Logical Session ID and a txn number
+    void _updateSession(OperationContext* opCtx) {
+        auto newDoc = [&] {
+            stdx::lock_guard lk{_docMutex};
+            return _doc;
+        }();
+        auto newShardingDDLCoordinatorMetadata = newDoc.getShardingDDLCoordinatorMetadata();
+
+        auto optSession = newShardingDDLCoordinatorMetadata.getSession();
+        if (optSession) {
+            auto txnNumber = optSession->getTxnNumber();
+            optSession->setTxnNumber(++txnNumber);
+            newShardingDDLCoordinatorMetadata.setSession(optSession);
+        } else {
+            auto session = InternalSessionPool::get(opCtx)->acquireSystemSession();
+            newShardingDDLCoordinatorMetadata.setSession(
+                ShardingDDLSession(session.getSessionId(), session.getTxnNumber()));
+        }
+
+        newDoc.setShardingDDLCoordinatorMetadata(std::move(newShardingDDLCoordinatorMetadata));
+        _updateStateDocument(opCtx, std::move(newDoc));
+    }
+
+    OperationSessionInfo getCurrentSession() const {
+        auto optSession = [&] {
+            stdx::lock_guard lk{_docMutex};
+            return _doc.getShardingDDLCoordinatorMetadata().getSession();
+        }();
+
+        invariant(optSession);
+
+        OperationSessionInfo osi;
+        osi.setSessionId(optSession->getLsid());
+        osi.setTxnNumber(optSession->getTxnNumber());
+        return osi;
+    }
+};
+
+#undef MONGO_LOGV2_DEFAULT_COMPONENT
+
 }  // namespace mongo
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.idl b/src/mongo/db/s/sharding_ddl_coordinator.idl
index ce42c66a6e4..3a6b35e3eb4 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.idl
+++ b/src/mongo/db/s/sharding_ddl_coordinator.idl
@@ -47,13 +47,9 @@ enums:
             kDropDatabase: "dropDatabase"
             kDropCollection: "dropCollection"
             kRenameCollection: "renameCollection"
-            # TODO SERVER-64720 remove once 6.0 becomes last LTS
-            kCreateCollectionPre60Compatible: "createCollection"
             kCreateCollection: "createCollection_V2"
             kRefineCollectionShardKey: "refineCollectionShardKey"
             kSetAllowMigrations: "setAllowMigrations"
-            # TODO (SERVER-62325): Remove pre60 compatible collMod coordinator after 6.0 branching.
-            kCollModPre60Compatible: "collMod"
             kCollMod: "collMod_V2"
             kReshardCollection: "reshardCollection"
             kReshardCollectionNoResilient: "reshardCollectionNoResilient"
diff --git a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
index 4073c70fc58..f4494ace7eb 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
@@ -38,7 +38,6 @@
 #include "mongo/db/pipeline/document_source_count.h"
 #include "mongo/db/pipeline/expression_context.h"
 #include "mongo/db/s/collmod_coordinator.h"
-#include "mongo/db/s/collmod_coordinator_pre60_compatible.h"
 #include "mongo/db/s/compact_structured_encryption_data_coordinator.h"
 #include "mongo/db/s/create_collection_coordinator.h"
 #include "mongo/db/s/database_sharding_state.h"
@@ -76,10 +75,6 @@ std::shared_ptr<ShardingDDLCoordinator> constructShardingDDLCoordinatorInstance(
             break;
         case DDLCoordinatorTypeEnum::kRenameCollection:
             return std::make_shared<RenameCollectionCoordinator>(service, std::move(initialState));
-        case DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible:
-            return std::make_shared<CreateCollectionCoordinatorPre60Compatible>(
-                service, std::move(initialState));
-            break;
         case DDLCoordinatorTypeEnum::kCreateCollection:
             return std::make_shared<CreateCollectionCoordinator>(service, std::move(initialState));
             break;
@@ -94,10 +89,6 @@ std::shared_ptr<ShardingDDLCoordinator> constructShardingDDLCoordinatorInstance(
         case DDLCoordinatorTypeEnum::kCollMod:
             return std::make_shared<CollModCoordinator>(service, std::move(initialState));
             break;
-        case DDLCoordinatorTypeEnum::kCollModPre60Compatible:
-            return std::make_shared<CollModCoordinatorPre60Compatible>(service,
-                                                                       std::move(initialState));
-            break;
         case DDLCoordinatorTypeEnum::kReshardCollection:
             return std::make_shared<ReshardCollectionCoordinator>(service, std::move(initialState));
             break;
diff --git a/src/mongo/db/s/sharding_ddl_util.cpp b/src/mongo/db/s/sharding_ddl_util.cpp
index 8b5c2113adf..89eb4107f60 100644
--- a/src/mongo/db/s/sharding_ddl_util.cpp
+++ b/src/mongo/db/s/sharding_ddl_util.cpp
@@ -340,14 +340,7 @@ void shardedRenameMetadata(OperationContext* opCtx,
     auto now = VectorClock::get(opCtx)->getTime();
     auto newTimestamp = now.clusterTime().asTimestamp();
     fromCollType.setTimestamp(newTimestamp);
-    {
-        // Only bump the epoch if the whole cluster is in FCV 5.0, so chunks do not contain epochs.
-        FixedFCVRegion fixedFCVRegion(opCtx);
-        if (serverGlobalParams.featureCompatibility.isGreaterThanOrEqualTo(
-                multiversion::FeatureCompatibilityVersion::kFullyDowngradedTo_5_0)) {
-            fromCollType.setEpoch(OID::gen());
-        }
-    }
+    fromCollType.setEpoch(OID::gen());
 
     // Insert the TO collection entry
     uassertStatusOK(catalogClient->insertConfigDocument(
@@ -506,16 +499,8 @@ void sendDropCollectionParticipantCommandToShards(OperationContext* opCtx,
     const auto cmdObj =
         CommandHelpers::appendMajorityWriteConcern(dropCollectionParticipant.toBSON({}));
 
-    try {
-        sharding_ddl_util::sendAuthenticatedCommandToShards(
-            opCtx, nss.db(), cmdObj.addFields(osi.toBSON()), shardIds, executor);
-    } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
-        // Older 5.0 binaries don't support running the _shardsvrDropCollectionParticipant
-        // command as a retryable write yet. In that case, retry without attaching session
-        // info.
-        sharding_ddl_util::sendAuthenticatedCommandToShards(
-            opCtx, nss.db(), cmdObj, shardIds, executor);
-    }
+    sharding_ddl_util::sendAuthenticatedCommandToShards(
+        opCtx, nss.db(), cmdObj.addFields(osi.toBSON()), shardIds, executor);
 }
 
 }  // namespace sharding_ddl_util
diff --git a/src/mongo/db/s/sharding_ddl_util_test.cpp b/src/mongo/db/s/sharding_ddl_util_test.cpp
index fd4e3905980..2ff3925c53e 100644
--- a/src/mongo/db/s/sharding_ddl_util_test.cpp
+++ b/src/mongo/db/s/sharding_ddl_util_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/logical_session_cache_noop.h"
 #include "mongo/db/namespace_string.h"
@@ -47,7 +44,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 namespace {
 
@@ -119,7 +115,7 @@ TEST_F(ShardingDDLUtilTest, ShardedRenameMetadata) {
     const int nChunks = 10;
     std::vector<ChunkType> chunks;
     for (int i = 0; i < nChunks; i++) {
-        ChunkVersion chunkVersion(1, i, fromEpoch, collTimestamp);
+        ChunkVersion chunkVersion({fromEpoch, collTimestamp}, {1, uint32_t(i)});
         ChunkType chunk;
         chunk.setName(OID::gen());
         chunk.setCollectionUUID(collUUID);
@@ -138,7 +134,7 @@ TEST_F(ShardingDDLUtilTest, ShardedRenameMetadata) {
     const auto toEpoch = OID::gen();
     const auto toUUID = UUID::gen();
     for (int i = 0; i < nChunks; i++) {
-        ChunkVersion chunkVersion(1, i, toEpoch, Timestamp(2));
+        ChunkVersion chunkVersion({toEpoch, Timestamp(2)}, {1, uint32_t(i)});
         ChunkType chunk;
         chunk.setName(OID::gen());
         chunk.setCollectionUUID(toUUID);
@@ -215,7 +211,7 @@ TEST_F(ShardingDDLUtilTest, RenamePreconditionsAreMet) {
         opCtx, false /* sourceIsSharded */, kToNss, false /* dropTarget */);
 
     // Initialize a chunk
-    ChunkVersion chunkVersion(1, 1, OID::gen(), Timestamp(2, 1));
+    ChunkVersion chunkVersion({OID::gen(), Timestamp(2, 1)}, {1, 1});
     ChunkType chunk;
     chunk.setName(OID::gen());
     chunk.setCollectionUUID(UUID::gen());
@@ -256,7 +252,7 @@ TEST_F(ShardingDDLUtilTest, RenamePreconditionsTargetCollectionExists) {
     auto opCtx = operationContext();
 
     // Initialize a chunk
-    ChunkVersion chunkVersion(1, 1, OID::gen(), Timestamp(2, 1));
+    ChunkVersion chunkVersion({OID::gen(), Timestamp(2, 1)}, {1, 1});
     ChunkType chunk;
     chunk.setName(OID::gen());
     chunk.setCollectionUUID(UUID::gen());
diff --git a/src/mongo/db/s/sharding_mongod_test_fixture.cpp b/src/mongo/db/s/sharding_mongod_test_fixture.cpp
index a05fddaa213..c7b078c89e0 100644
--- a/src/mongo/db/s/sharding_mongod_test_fixture.cpp
+++ b/src/mongo/db/s/sharding_mongod_test_fixture.cpp
@@ -71,7 +71,6 @@
 #include "mongo/s/client/shard_remote.h"
 #include "mongo/s/grid.h"
 #include "mongo/s/query/cluster_cursor_manager.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
 #include "mongo/util/clock_source_mock.h"
 #include "mongo/util/tick_source_mock.h"
 
diff --git a/src/mongo/db/s/sharding_server_status.cpp b/src/mongo/db/s/sharding_server_status.cpp
index 8d560454382..82de4cfc5c9 100644
--- a/src/mongo/db/s/sharding_server_status.cpp
+++ b/src/mongo/db/s/sharding_server_status.cpp
@@ -73,14 +73,20 @@ public:
         result.append("configsvrConnectionString",
                       shardRegistry->getConfigServerConnectionString().toString());
 
+        const auto vcTime = VectorClock::get(opCtx)->getTime();
+
         const auto configOpTime = [&]() {
-            const auto vcTime = VectorClock::get(opCtx)->getTime();
             const auto vcConfigTimeTs = vcTime.configTime().asTimestamp();
             return mongo::repl::OpTime(vcConfigTimeTs, mongo::repl::OpTime::kUninitializedTerm);
         }();
-
         configOpTime.append(&result, "lastSeenConfigServerOpTime");
 
+        const auto topologyOpTime = [&]() {
+            const auto vcTopologyTimeTs = vcTime.topologyTime().asTimestamp();
+            return mongo::repl::OpTime(vcTopologyTimeTs, mongo::repl::OpTime::kUninitializedTerm);
+        }();
+        topologyOpTime.append(&result, "lastSeenTopologyOpTime");
+
         const long long maxChunkSizeInBytes =
             grid->getBalancerConfiguration()->getMaxChunkSizeBytes();
         result.append("maxChunkSizeInBytes", maxChunkSizeInBytes);
diff --git a/src/mongo/db/s/sharding_util.cpp b/src/mongo/db/s/sharding_util.cpp
index fde594f35cb..c082038d714 100644
--- a/src/mongo/db/s/sharding_util.cpp
+++ b/src/mongo/db/s/sharding_util.cpp
@@ -28,18 +28,12 @@
  */
 
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/s/sharding_util.h"
 
 #include <fmt/format.h>
 
 #include "mongo/db/commands.h"
-#include "mongo/db/dbdirectclient.h"
-#include "mongo/db/repl/repl_client_info.h"
-#include "mongo/db/s/type_shard_collection.h"
 #include "mongo/logv2/log.h"
-#include "mongo/s/catalog/type_collection.h"
 #include "mongo/s/request_types/flush_routing_table_cache_updates_gen.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
@@ -113,45 +107,5 @@ std::vector<AsyncRequestsSender::Response> sendCommandToShards(
     return responses;
 }
 
-void downgradeCollectionBalancingFieldsToPre53(OperationContext* opCtx) {
-    const NamespaceString collNss = [&]() {
-        if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
-            return NamespaceString::kShardConfigCollectionsNamespace;
-        } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
-            return CollectionType::ConfigNS;
-        }
-        MONGO_UNREACHABLE;
-    }();
-
-    write_ops::UpdateCommandRequest updateOp(collNss);
-    updateOp.setUpdates({[&] {
-        write_ops::UpdateOpEntry entry;
-        BSONObjBuilder updateCmd;
-        BSONObjBuilder unsetBuilder(updateCmd.subobjStart("$unset"));
-        unsetBuilder.append(CollectionType::kMaxChunkSizeBytesFieldName, 0);
-        if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
-            unsetBuilder.append(CollectionType::kNoAutoSplitFieldName, 0);
-        } else {
-            unsetBuilder.append(ShardCollectionTypeBase::kAllowAutoSplitFieldName, 0);
-        }
-        unsetBuilder.doneFast();
-        entry.setQ({});
-        const BSONObj update = updateCmd.obj();
-        entry.setU(write_ops::UpdateModification::parseFromClassicUpdate(update));
-        entry.setUpsert(false);
-        entry.setMulti(true);
-        return entry;
-    }()});
-
-    DBDirectClient client(opCtx);
-    client.update(updateOp);
-
-    const WriteConcernOptions majorityWC{
-        WriteConcernOptions::kMajority, WriteConcernOptions::SyncMode::UNSET, Seconds(0)};
-    WriteConcernResult ignoreResult;
-    auto latestOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
-    uassertStatusOK(waitForWriteConcern(opCtx, latestOpTime, majorityWC, &ignoreResult));
-}
-
 }  // namespace sharding_util
 }  // namespace mongo
diff --git a/src/mongo/db/s/sharding_util.h b/src/mongo/db/s/sharding_util.h
index c5021b4d46f..783c6703138 100644
--- a/src/mongo/db/s/sharding_util.h
+++ b/src/mongo/db/s/sharding_util.h
@@ -61,14 +61,5 @@ std::vector<AsyncRequestsSender::Response> sendCommandToShards(
     const std::shared_ptr<executor::TaskExecutor>& executor,
     bool throwOnError = true);
 
-/**
- * Unset the `noAutosplit` and `maxChunkSizeBytes` fields from:
- * - `config.collections` on the CSRS
- * - `config.cache.collections` on shards
- *
- * TODO SERVER-62693 remove this method and all its usages once 6.0 branches out
- */
-void downgradeCollectionBalancingFieldsToPre53(OperationContext* opCtx);
-
 }  // namespace sharding_util
 }  // namespace mongo
diff --git a/src/mongo/db/s/sharding_write_router_bm.cpp b/src/mongo/db/s/sharding_write_router_bm.cpp
index 7a47c6eed21..6d20ad82215 100644
--- a/src/mongo/db/s/sharding_write_router_bm.cpp
+++ b/src/mongo/db/s/sharding_write_router_bm.cpp
@@ -103,7 +103,7 @@ std::pair<std::vector<mongo::ChunkType>, mongo::ChunkManager> createChunks(
     for (uint32_t i = 0; i < nChunks; ++i) {
         chunks.emplace_back(collIdentifier,
                             getRangeForChunk(i, nChunks),
-                            ChunkVersion{i + 1, 0, collEpoch, collTimestamp},
+                            ChunkVersion({collEpoch, collTimestamp}, {i + 1, 0}),
                             pessimalShardSelector(i, nShards, nChunks));
     }
 
diff --git a/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
index 4e95395faaa..f0918cc5766 100644
--- a/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
@@ -99,7 +99,7 @@ public:
             // If abort actually went through, the resharding documents should be cleaned up.
             // If they still exists, it could be because that it was interrupted or it is no
             // longer primary.
-            doNoopWrite(opCtx, "_shardsvrAbortReshardCollection no-op", ns());
+            resharding::doNoopWrite(opCtx, "_shardsvrAbortReshardCollection no-op", ns());
             PersistentTaskStore<CommonReshardingMetadata> donorReshardingOpStore(
                 NamespaceString::kDonorReshardingOperationsNamespace);
             uassert(5563802,
diff --git a/src/mongo/db/s/shardsvr_collmod_command.cpp b/src/mongo/db/s/shardsvr_collmod_command.cpp
index f0564913aa1..3df3e521579 100644
--- a/src/mongo/db/s/shardsvr_collmod_command.cpp
+++ b/src/mongo/db/s/shardsvr_collmod_command.cpp
@@ -33,19 +33,12 @@
 #include "mongo/db/coll_mod_gen.h"
 #include "mongo/db/coll_mod_reply_validation.h"
 #include "mongo/db/commands.h"
-#include "mongo/db/commands/feature_compatibility_version.h"
 #include "mongo/db/curop.h"
 #include "mongo/db/s/collmod_coordinator.h"
-#include "mongo/db/s/collmod_coordinator_pre60_compatible.h"
 #include "mongo/db/s/sharding_state.h"
 #include "mongo/db/s/sharding_util.h"
 #include "mongo/db/timeseries/catalog_helper.h"
-#include "mongo/db/timeseries/timeseries_commands_conversion_helper.h"
 #include "mongo/logv2/log.h"
-#include "mongo/s/chunk_manager_targeter.h"
-#include "mongo/s/cluster_commands_helpers.h"
-#include "mongo/s/grid.h"
-#include "mongo/util/fail_point.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
@@ -53,9 +46,6 @@
 namespace mongo {
 namespace {
 
-MONGO_FAIL_POINT_DEFINE(collModPrimaryDispatching);
-MONGO_FAIL_POINT_DEFINE(collModCoordinatorPre60Compatible);
-
 class ShardsvrCollModCommand final : public BasicCommandWithRequestParser<ShardsvrCollModCommand> {
 public:
     using Request = ShardsvrCollMod;
@@ -112,29 +102,6 @@ public:
         CurOp::get(opCtx)->raiseDbProfileLevel(
             CollectionCatalog::get(opCtx)->getDatabaseProfileLevel(cmd.getNamespace().dbName()));
 
-        boost::optional<FixedFCVRegion> fcvRegion;
-        fcvRegion.emplace(opCtx);
-
-        if (MONGO_unlikely(collModPrimaryDispatching.shouldFail())) {
-            return runWithDispatchingCommands(opCtx, result, cmd);
-        } else if (MONGO_unlikely(collModCoordinatorPre60Compatible.shouldFail())) {
-            return runWithDDLCoordinatorPre60Compatible(opCtx, result, cmd, fcvRegion);
-        }
-
-        if (fcvRegion.get()->isLessThan(multiversion::FeatureCompatibilityVersion::kVersion_5_3)) {
-            return runWithDispatchingCommands(opCtx, result, cmd);
-        } else if (fcvRegion.get()->isLessThan(
-                       multiversion::FeatureCompatibilityVersion::kVersion_6_0)) {
-            return runWithDDLCoordinatorPre60Compatible(opCtx, result, cmd, fcvRegion);
-        } else {
-            return runWithDDLCoordinator(opCtx, result, cmd, fcvRegion);
-        }
-    }
-
-    bool runWithDDLCoordinator(OperationContext* opCtx,
-                               BSONObjBuilder& result,
-                               const ShardsvrCollMod& cmd,
-                               boost::optional<FixedFCVRegion>& fcvRegion) {
         auto coordinatorDoc = CollModCoordinatorDocument();
         coordinatorDoc.setCollModRequest(cmd.getCollModRequest());
         coordinatorDoc.setShardingDDLCoordinatorMetadata(
@@ -142,73 +109,10 @@ public:
         auto service = ShardingDDLCoordinatorService::getService(opCtx);
         auto collModCoordinator = checked_pointer_cast<CollModCoordinator>(
             service->getOrCreateInstance(opCtx, coordinatorDoc.toBSON()));
-        fcvRegion = boost::none;
-        result.appendElements(collModCoordinator->getResult(opCtx));
-        return true;
-    }
-
-    bool runWithDDLCoordinatorPre60Compatible(OperationContext* opCtx,
-                                              BSONObjBuilder& result,
-                                              const ShardsvrCollMod& cmd,
-                                              boost::optional<FixedFCVRegion>& fcvRegion) {
-        auto coordinatorDoc = CollModCoordinatorDocument();
-        coordinatorDoc.setCollModRequest(cmd.getCollModRequest());
-        coordinatorDoc.setShardingDDLCoordinatorMetadata(
-            {{cmd.getNamespace(), DDLCoordinatorTypeEnum::kCollModPre60Compatible}});
-        auto service = ShardingDDLCoordinatorService::getService(opCtx);
-        auto collModCoordinator = checked_pointer_cast<CollModCoordinatorPre60Compatible>(
-            service->getOrCreateInstance(opCtx, coordinatorDoc.toBSON()));
-        fcvRegion = boost::none;
         result.appendElements(collModCoordinator->getResult(opCtx));
         return true;
     }
 
-    bool runWithDispatchingCommands(OperationContext* opCtx,
-                                    BSONObjBuilder& result,
-                                    const ShardsvrCollMod& cmd) {
-        const auto& nss = cmd.getNamespace();
-        auto collModCmd = CollMod(nss);
-        collModCmd.setCollModRequest(cmd.getCollModRequest());
-        auto collModCmdObj = collModCmd.toBSON({});
-
-        const auto targeter = ChunkManagerTargeter(opCtx, nss);
-        const auto& routingInfo = targeter.getRoutingInfo();
-        if (targeter.timeseriesNamespaceNeedsRewrite(nss)) {
-            collModCmdObj =
-                timeseries::makeTimeseriesCommand(collModCmdObj,
-                                                  nss,
-                                                  CollMod::kCommandName,
-                                                  CollMod::kIsTimeseriesNamespaceFieldName);
-        }
-
-        std::set<ShardId> participants;
-        if (routingInfo.isSharded()) {
-            std::unique_ptr<CollatorInterface> collator;
-            const auto expCtx =
-                make_intrusive<ExpressionContext>(opCtx, std::move(collator), targeter.getNS());
-            routingInfo.getShardIdsForQuery(
-                expCtx, {} /* query */, {} /* collation */, &participants);
-        } else {
-            participants.insert(routingInfo.dbPrimary());
-        }
-
-        auto executor = Grid::get(opCtx)->getExecutorPool()->getFixedExecutor();
-        const auto& responses = sharding_util::sendCommandToShards(
-            opCtx,
-            targeter.getNS().db(),
-            CommandHelpers::appendMajorityWriteConcern(collModCmdObj, opCtx->getWriteConcern()),
-            {std::make_move_iterator(participants.begin()),
-             std::make_move_iterator(participants.end())},
-            executor);
-
-        std::string errmsg;
-        auto ok = appendRawResponses(opCtx, &errmsg, &result, std::move(responses)).responseOK;
-        if (!errmsg.empty()) {
-            CommandHelpers::appendSimpleCommandStatus(result, ok, errmsg);
-        }
-        return ok;
-    }
-
     void validateResult(const BSONObj& resultObj) final {
         StringDataSet ignorableFields({"raw", "ok", "errmsg"});
         auto reply = Response::parse(IDLParserErrorContext("CollModReply"),
diff --git a/src/mongo/db/s/shardsvr_collmod_participant_command.cpp b/src/mongo/db/s/shardsvr_collmod_participant_command.cpp
index b321236caf1..2a7e78886b2 100644
--- a/src/mongo/db/s/shardsvr_collmod_participant_command.cpp
+++ b/src/mongo/db/s/shardsvr_collmod_participant_command.cpp
@@ -69,6 +69,10 @@ public:
         return Command::AllowedOnSecondary::kNever;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBase {
     public:
         using InvocationBase::InvocationBase;
diff --git a/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
index f4240c1eb0a..3d9be030fcb 100644
--- a/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
@@ -107,7 +107,7 @@ public:
             // If commit actually went through, the resharding documents will be cleaned up. If
             // documents still exist, it could be because that commit was interrupted or that the
             // underlying replica set node is no longer primary.
-            doNoopWrite(opCtx, "_shardsvrCommitReshardCollection no-op", ns());
+            resharding::doNoopWrite(opCtx, "_shardsvrCommitReshardCollection no-op", ns());
             PersistentTaskStore<CommonReshardingMetadata> donorReshardingOpStore(
                 NamespaceString::kDonorReshardingOperationsNamespace);
             uassert(5795302,
diff --git a/src/mongo/db/s/shardsvr_create_collection_command.cpp b/src/mongo/db/s/shardsvr_create_collection_command.cpp
index bcc2e17a9fd..3769e253b7b 100644
--- a/src/mongo/db/s/shardsvr_create_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_create_collection_command.cpp
@@ -144,21 +144,11 @@ public:
                 FixedFCVRegion fixedFcvRegion(opCtx);
 
                 auto coordinatorDoc = [&] {
-                    if (serverGlobalParams.featureCompatibility.isLessThan(
-                            multiversion::FeatureCompatibilityVersion::kVersion_6_0)) {
-                        auto doc = CreateCollectionCoordinatorDocumentPre60Compatible();
-                        doc.setShardingDDLCoordinatorMetadata(
-                            {{std::move(nss),
-                              DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible}});
-                        doc.setCreateCollectionRequest(std::move(createCmdRequest));
-                        return doc.toBSON();
-                    } else {
-                        auto doc = CreateCollectionCoordinatorDocument();
-                        doc.setShardingDDLCoordinatorMetadata(
-                            {{std::move(nss), DDLCoordinatorTypeEnum::kCreateCollection}});
-                        doc.setCreateCollectionRequest(std::move(createCmdRequest));
-                        return doc.toBSON();
-                    }
+                    auto doc = CreateCollectionCoordinatorDocument();
+                    doc.setShardingDDLCoordinatorMetadata(
+                        {{std::move(nss), DDLCoordinatorTypeEnum::kCreateCollection}});
+                    doc.setCreateCollectionRequest(std::move(createCmdRequest));
+                    return doc.toBSON();
                 }();
 
                 auto service = ShardingDDLCoordinatorService::getService(opCtx);
diff --git a/src/mongo/db/s/shardsvr_create_collection_participant_command.cpp b/src/mongo/db/s/shardsvr_create_collection_participant_command.cpp
index fd7c8217403..4157f1145f8 100644
--- a/src/mongo/db/s/shardsvr_create_collection_participant_command.cpp
+++ b/src/mongo/db/s/shardsvr_create_collection_participant_command.cpp
@@ -65,6 +65,10 @@ public:
         return AllowedOnSecondary::kNever;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBase {
     public:
         using InvocationBase::InvocationBase;
@@ -76,6 +80,11 @@ public:
             CommandHelpers::uassertCommandRunWithMajority(Request::kCommandName,
                                                           opCtx->getWriteConcern());
 
+            const auto txnParticipant = TransactionParticipant::get(opCtx);
+            uassert(6077300,
+                    str::stream() << Request::kCommandName << " must be run as a retryable write",
+                    txnParticipant);
+
             opCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE();
 
             MigrationDestinationManager::cloneCollectionIndexesAndOptions(
@@ -86,23 +95,15 @@ public:
                  request().getIdIndex(),
                  request().getOptions()});
 
-            // The txnParticipant will only be missing when the command was sent from a coordinator
-            // running an old 5.0.0 binary that didn't attach a sessionId & txnNumber.
-            // TODO SERVER-60773: Once 6.0 has branched out, txnParticipant must always exist. Add a
-            // uassert for that.
-            auto txnParticipant = TransactionParticipant::get(opCtx);
-            if (txnParticipant) {
-                // Since no write that generated a retryable write oplog entry with this sessionId
-                // and txnNumber happened, we need to make a dummy write so that the session gets
-                // durably persisted on the oplog. This must be the last operation done on this
-                // command.
-                DBDirectClient client(opCtx);
-                client.update(NamespaceString::kServerConfigurationNamespace.ns(),
-                              BSON("_id" << Request::kCommandName),
-                              BSON("$inc" << BSON("count" << 1)),
-                              true /* upsert */,
-                              false /* multi */);
-            }
+            // Since no write that generated a retryable write oplog entry with this sessionId and
+            // txnNumber happened, we need to make a dummy write so that the session gets durably
+            // persisted on the oplog. This must be the last operation done on this command.
+            DBDirectClient client(opCtx);
+            client.update(NamespaceString::kServerConfigurationNamespace.ns(),
+                          BSON("_id" << Request::kCommandName),
+                          BSON("$inc" << BSON("count" << 1)),
+                          true /* upsert */,
+                          false /* multi */);
         }
 
     private:
diff --git a/src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp b/src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp
index 658c894a209..31c19139c38 100644
--- a/src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp
+++ b/src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp
@@ -64,6 +64,10 @@ public:
                "directly. Participates in droping a collection.";
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     using Request = ShardsvrDropCollectionParticipant;
 
     class Invocation final : public InvocationBase {
@@ -75,6 +79,11 @@ public:
             CommandHelpers::uassertCommandRunWithMajority(Request::kCommandName,
                                                           opCtx->getWriteConcern());
 
+            const auto txnParticipant = TransactionParticipant::get(opCtx);
+            uassert(6077301,
+                    str::stream() << Request::kCommandName << " must be run as a retryable write",
+                    txnParticipant);
+
             opCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE();
 
             try {
@@ -86,23 +95,16 @@ public:
                             "namespace"_attr = ns());
             }
 
-            // The txnParticipant will only be missing when the command was sent from a coordinator
-            // running an old 5.0.0 binary that didn't attach a sessionId & txnNumber.
-            // TODO SERVER-60773: Once 6.0 has branched out, txnParticipant must always exist. Add a
-            // uassert for that.
-            auto txnParticipant = TransactionParticipant::get(opCtx);
-            if (txnParticipant) {
-                // Since no write that generated a retryable write oplog entry with this sessionId
-                // and txnNumber happened, we need to make a dummy write so that the session gets
-                // durably persisted on the oplog. This must be the last operation done on this
-                // command.
-                DBDirectClient client(opCtx);
-                client.update(NamespaceString::kServerConfigurationNamespace.ns(),
-                              BSON("_id" << Request::kCommandName),
-                              BSON("$inc" << BSON("count" << 1)),
-                              true /* upsert */,
-                              false /* multi */);
-            }
+
+            // Since no write that generated a retryable write oplog entry with this sessionId and
+            // txnNumber happened, we need to make a dummy write so that the session gets durably
+            // persisted on the oplog. This must be the last operation done on this command.
+            DBDirectClient client(opCtx);
+            client.update(NamespaceString::kServerConfigurationNamespace.ns(),
+                          BSON("_id" << Request::kCommandName),
+                          BSON("$inc" << BSON("count" << 1)),
+                          true /* upsert */,
+                          false /* multi */);
         }
 
     private:
diff --git a/src/mongo/db/s/shardsvr_merge_chunks_command.cpp b/src/mongo/db/s/shardsvr_merge_chunks_command.cpp
index c3971e7afd6..8b3892a907b 100644
--- a/src/mongo/db/s/shardsvr_merge_chunks_command.cpp
+++ b/src/mongo/db/s/shardsvr_merge_chunks_command.cpp
@@ -149,8 +149,7 @@ void mergeChunks(OperationContext* opCtx,
     auto shardVersionReceived = [&]() -> boost::optional<ChunkVersion> {
         // Old versions might not have the shardVersion field
         if (cmdResponse.response[ChunkVersion::kShardVersionField]) {
-            return ChunkVersion::fromBSONPositionalOrNewerFormat(
-                cmdResponse.response[ChunkVersion::kShardVersionField]);
+            return ChunkVersion::parse(cmdResponse.response[ChunkVersion::kShardVersionField]);
         }
         return boost::none;
     }();
diff --git a/src/mongo/db/s/shardsvr_participant_block_command.cpp b/src/mongo/db/s/shardsvr_participant_block_command.cpp
index 9ff5f58127c..c6774bd7bec 100644
--- a/src/mongo/db/s/shardsvr_participant_block_command.cpp
+++ b/src/mongo/db/s/shardsvr_participant_block_command.cpp
@@ -62,6 +62,10 @@ public:
         return Command::AllowedOnSecondary::kNever;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBase {
     public:
         using InvocationBase::InvocationBase;
diff --git a/src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp b/src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp
index 73a182754e5..16d75a2bfb9 100644
--- a/src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp
+++ b/src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp
@@ -68,6 +68,10 @@ public:
         return AllowedOnSecondary::kNever;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBase {
     public:
         using InvocationBase::InvocationBase;
@@ -76,6 +80,11 @@ public:
             CommandHelpers::uassertCommandRunWithMajority(Request::kCommandName,
                                                           opCtx->getWriteConcern());
 
+            const auto txnParticipant = TransactionParticipant::get(opCtx);
+            uassert(6077302,
+                    str::stream() << Request::kCommandName << " must be run as a retryable write",
+                    txnParticipant);
+
             auto const shardingState = ShardingState::get(opCtx);
             uassertStatusOK(shardingState->canAcceptShardedCommands());
             auto const& req = request();
@@ -100,23 +109,15 @@ public:
 
             renameCollectionParticipant->getBlockCRUDAndRenameCompletionFuture().get(opCtx);
 
-            // The txnParticipant will only be missing when the command was sent from a coordinator
-            // running an old 5.0.0 binary that didn't attach a sessionId & txnNumber.
-            // TODO SERVER-60773: Once 6.0 has branched out, txnParticipant must always exist. Add a
-            // uassert for that.
-            auto txnParticipant = TransactionParticipant::get(opCtx);
-            if (txnParticipant) {
-                // Since no write that generated a retryable write oplog entry with this sessionId
-                // and txnNumber happened, we need to make a dummy write so that the session gets
-                // durably persisted on the oplog. This must be the last operation done on this
-                // command.
-                DBDirectClient client(opCtx);
-                client.update(NamespaceString::kServerConfigurationNamespace.ns(),
-                              BSON("_id" << Request::kCommandName),
-                              BSON("$inc" << BSON("count" << 1)),
-                              true /* upsert */,
-                              false /* multi */);
-            }
+            // Since no write that generated a retryable write oplog entry with this sessionId and
+            // txnNumber happened, we need to make a dummy write so that the session gets durably
+            // persisted on the oplog. This must be the last operation done on this command.
+            DBDirectClient client(opCtx);
+            client.update(NamespaceString::kServerConfigurationNamespace.ns(),
+                          BSON("_id" << Request::kCommandName),
+                          BSON("$inc" << BSON("count" << 1)),
+                          true /* upsert */,
+                          false /* multi */);
         }
 
     private:
@@ -162,6 +163,10 @@ public:
         return AllowedOnSecondary::kNever;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBase {
     public:
         using InvocationBase::InvocationBase;
@@ -170,6 +175,11 @@ public:
             CommandHelpers::uassertCommandRunWithMajority(Request::kCommandName,
                                                           opCtx->getWriteConcern());
 
+            const auto txnParticipant = TransactionParticipant::get(opCtx);
+            uassert(6077303,
+                    str::stream() << Request::kCommandName << " must be run as a retryable write",
+                    txnParticipant);
+
             auto const shardingState = ShardingState::get(opCtx);
             uassertStatusOK(shardingState->canAcceptShardedCommands());
 
@@ -187,23 +197,16 @@ public:
                 optRenameCollectionParticipant.get()->getUnblockCrudFuture().get(opCtx);
             }
 
-            // The txnParticipant will only be missing when the command was sent from a coordinator
-            // running an old 5.0.0 binary that didn't attach a sessionId & txnNumber.
-            // TODO SERVER-60773: Once 6.0 has branched out, txnParticipant must always exist. Add a
-            // uassert for that.
-            auto txnParticipant = TransactionParticipant::get(opCtx);
-            if (txnParticipant) {
-                // Since no write that generated a retryable write oplog entry with this sessionId
-                // and txnNumber happened, we need to make a dummy write so that the session gets
-                // durably persisted on the oplog. This must be the last operation done on this
-                // command.
-                DBDirectClient client(opCtx);
-                client.update(NamespaceString::kServerConfigurationNamespace.ns(),
-                              BSON("_id" << Request::kCommandName),
-                              BSON("$inc" << BSON("count" << 1)),
-                              true /* upsert */,
-                              false /* multi */);
-            }
+            // Since no write that generated a retryable write oplog entry with this sessionId
+            // and txnNumber happened, we need to make a dummy write so that the session gets
+            // durably persisted on the oplog. This must be the last operation done on this
+            // command.
+            DBDirectClient client(opCtx);
+            client.update(NamespaceString::kServerConfigurationNamespace.ns(),
+                          BSON("_id" << Request::kCommandName),
+                          BSON("$inc" << BSON("count" << 1)),
+                          true /* upsert */,
+                          false /* multi */);
         }
 
     private:
diff --git a/src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp b/src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp
index 4c3e05a7879..56bf7b644f3 100644
--- a/src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp
+++ b/src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp
@@ -108,10 +108,9 @@ public:
         }
 
         Response typedRun(OperationContext* opCtx) {
-            auto instances =
-                getReshardingStateMachines<ReshardingRecipientService,
-                                           ReshardingRecipientService::RecipientStateMachine>(opCtx,
-                                                                                              ns());
+            auto instances = resharding::getReshardingStateMachines<
+                ReshardingRecipientService,
+                ReshardingRecipientService::RecipientStateMachine>(opCtx, ns());
             if (instances.empty()) {
                 return Response{boost::none, boost::none};
             }
diff --git a/src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp b/src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp
index d5d2593bdf2..e8ed9e14277 100644
--- a/src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp
+++ b/src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp
@@ -127,6 +127,10 @@ public:
     AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
         return AllowedOnSecondary::kNever;
     }
+
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
 } shardsvrSetClusterParameterCmd;
 
 }  // namespace
diff --git a/src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp b/src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp
index 49bdc1b90bb..ceecece4027 100644
--- a/src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp
+++ b/src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp
@@ -198,6 +198,10 @@ public:
     AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
         return AllowedOnSecondary::kNever;
     }
+
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
 } shardsvrSetUserWriteBlockModeCmd;
 
 }  // namespace
diff --git a/src/mongo/db/s/split_chunk.cpp b/src/mongo/db/s/split_chunk.cpp
index 062f5b47752..964871d3740 100644
--- a/src/mongo/db/s/split_chunk.cpp
+++ b/src/mongo/db/s/split_chunk.cpp
@@ -255,8 +255,7 @@ StatusWith<boost::optional<ChunkRange>> splitChunk(
     boost::optional<ChunkVersion> shardVersionReceived = [&]() -> boost::optional<ChunkVersion> {
         // old versions might not have the shardVersion field
         if (cmdResponse.response[ChunkVersion::kShardVersionField]) {
-            return ChunkVersion::fromBSONPositionalOrNewerFormat(
-                cmdResponse.response[ChunkVersion::kShardVersionField]);
+            return ChunkVersion::parse(cmdResponse.response[ChunkVersion::kShardVersionField]);
         }
         return boost::none;
     }();
diff --git a/src/mongo/db/s/transaction_coordinator_service.cpp b/src/mongo/db/s/transaction_coordinator_service.cpp
index 41b758cffec..c317922c251 100644
--- a/src/mongo/db/s/transaction_coordinator_service.cpp
+++ b/src/mongo/db/s/transaction_coordinator_service.cpp
@@ -379,6 +379,10 @@ TransactionCoordinatorService::getAllRemovalFuturesForCoordinatorsForInternalTra
     std::shared_ptr<CatalogAndScheduler> cas = _getCatalogAndScheduler(opCtx);
     auto& catalog = cas->catalog;
 
+    // On step up, we want to wait until the catalog has recovered all active transaction
+    // coordinators before getting the removal futures.
+    cas->recoveryTaskCompleted->get(opCtx);
+
     auto predicate = [](const LogicalSessionId lsid,
                         const TxnNumberAndRetryCounter txnNumberAndRetryCounter,
                         const std::shared_ptr<TransactionCoordinator> transactionCoordinator) {
diff --git a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
index e60c2ad339e..dd4b94aae1c 100644
--- a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
+++ b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
@@ -59,6 +59,14 @@ public:
         return true;
     }
 
+    bool isTransactionCommand() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class PrepareTimestamp {
     public:
         PrepareTimestamp(Timestamp timestamp) : _timestamp(std::move(timestamp)) {}
@@ -383,6 +391,18 @@ public:
         return AllowedOnSecondary::kNever;
     }
 
+    bool isTransactionCommand() const final {
+        return true;
+    }
+
+    bool shouldCheckoutSession() const final {
+        return false;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
 } coordinateCommitTransactionCmd;
 
 }  // namespace
diff --git a/src/mongo/db/s/type_shard_collection.cpp b/src/mongo/db/s/type_shard_collection.cpp
index 4dbb0b1c2f8..2628297e0d7 100644
--- a/src/mongo/db/s/type_shard_collection.cpp
+++ b/src/mongo/db/s/type_shard_collection.cpp
@@ -52,15 +52,6 @@ ShardCollectionType::ShardCollectionType(const BSONObj& obj) {
     uassert(ErrorCodes::ShardKeyNotFound,
             str::stream() << "Empty shard key. Failed to parse: " << obj.toString(),
             !getKeyPattern().toBSON().isEmpty());
-
-    // Last refreshed collection version is stored as a timestamp in the BSON representation of
-    // shard collection type for legacy reasons. We therefore explicitly convert this timestamp, if
-    // it exists, into a chunk version.
-    if (getLastRefreshedCollectionVersion()) {
-        ChunkVersion version = *getLastRefreshedCollectionVersion();
-        setLastRefreshedCollectionVersion(ChunkVersion(
-            version.majorVersion(), version.minorVersion(), getEpoch(), getTimestamp()));
-    }
 }
 
 BSONObj ShardCollectionType::toBSON() const {
@@ -83,4 +74,15 @@ void ShardCollectionType::setAllowMigrations(bool allowMigrations) {
         setPre50CompatibleAllowMigrations(false);
 }
 
+boost::optional<ChunkVersion> ShardCollectionType::getLastRefreshedCollectionVersion() const {
+    // Last refreshed collection version is stored as a timestamp in the BSON representation of
+    // shard collection type for legacy reasons. We therefore explicitly convert this timestamp, if
+    // it exists, into a chunk version.
+    if (!getLastRefreshedCollectionMajorMinorVersion())
+        return boost::none;
+
+    Timestamp majorMinor = *getLastRefreshedCollectionMajorMinorVersion();
+    return ChunkVersion({getEpoch(), getTimestamp()}, {majorMinor.getSecs(), majorMinor.getInc()});
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/s/type_shard_collection.h b/src/mongo/db/s/type_shard_collection.h
index 8180358174a..de6e56eb784 100644
--- a/src/mongo/db/s/type_shard_collection.h
+++ b/src/mongo/db/s/type_shard_collection.h
@@ -42,7 +42,7 @@ public:
     using ShardCollectionTypeBase::kEnterCriticalSectionCounterFieldName;
     using ShardCollectionTypeBase::kEpochFieldName;
     using ShardCollectionTypeBase::kKeyPatternFieldName;
-    using ShardCollectionTypeBase::kLastRefreshedCollectionVersionFieldName;
+    using ShardCollectionTypeBase::kLastRefreshedCollectionMajorMinorVersionFieldName;
     using ShardCollectionTypeBase::kNssFieldName;
     using ShardCollectionTypeBase::kRefreshingFieldName;
     using ShardCollectionTypeBase::kReshardingFieldsFieldName;
@@ -57,7 +57,6 @@ public:
     using ShardCollectionTypeBase::getEnterCriticalSectionCounter;
     using ShardCollectionTypeBase::getEpoch;
     using ShardCollectionTypeBase::getKeyPattern;
-    using ShardCollectionTypeBase::getLastRefreshedCollectionVersion;
     using ShardCollectionTypeBase::getMaxChunkSizeBytes;
     using ShardCollectionTypeBase::getNss;
     using ShardCollectionTypeBase::getRefreshing;
@@ -94,6 +93,8 @@ public:
         return getPre50CompatibleAllowMigrations().value_or(true);
     }
     void setAllowMigrations(bool allowMigrations);
+
+    boost::optional<ChunkVersion> getLastRefreshedCollectionVersion() const;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/s/type_shard_collection.idl b/src/mongo/db/s/type_shard_collection.idl
index 051a6de35d3..d56b231e302 100644
--- a/src/mongo/db/s/type_shard_collection.idl
+++ b/src/mongo/db/s/type_shard_collection.idl
@@ -80,15 +80,6 @@ imports:
     - "mongo/s/resharding/type_collection_fields.idl"
     - "mongo/s/type_collection_common_types.idl"
 
-types:
-    ChunkVersionLegacy:
-        bson_serialization_type: any
-        description: "An object representing a chunk version for a collection. Ignores the
-                      component in the chunk version for legacy reasons."
-        cpp_type: ChunkVersion
-        serializer: ChunkVersion::serialiseMajorMinorVersionOnlyForShardCollectionType
-        deserializer: ChunkVersion::parseMajorMinorVersionOnlyFromShardCollectionType
-
 structs:
     ShardCollectionTypeBase:
         description: "Represents the layout and contents of documents contained in the shard 
@@ -102,11 +93,12 @@ structs:
                 optional: false
             epoch:
                 type: objectid
+                optional: false
                 description: "Uniquely identifies this instance of the collection, in case of
                               drop/create or shard key refine."
-                optional: false
             timestamp:
                 type: timestamp
+                optional: false
                 description: "Uniquely identifies this incarnation of the collection. Only changes
                               in case of drop and create, or shard key refine.
                               This field will store the ClusterTime of the Config Server when the
@@ -141,7 +133,8 @@ structs:
                               chunk metadata."
                 optional: true
             lastRefreshedCollectionVersion:
-                type: ChunkVersionLegacy
+                type: timestamp
+                cpp_name: lastRefreshedCollectionMajorMinorVersion
                 description: "Set by primaries and used by shard secondaries to safely refresh chunk
                               metadata. Indicates the collection version of the last complete chunk
                               metadata refresh, and is used to indicate if a refresh occurred if the
diff --git a/src/mongo/db/s/type_shard_collection_test.cpp b/src/mongo/db/s/type_shard_collection_test.cpp
index 59a85b1e13c..f21418cc206 100644
--- a/src/mongo/db/s/type_shard_collection_test.cpp
+++ b/src/mongo/db/s/type_shard_collection_test.cpp
@@ -67,25 +67,12 @@ TEST(ShardCollectionType, FromBSONEpochMatchesLastRefreshedCollectionVersionWhen
              << ShardCollectionType::kUuidFieldName << UUID::gen()
              << ShardCollectionType::kKeyPatternFieldName << kKeyPattern
              << ShardCollectionType::kUniqueFieldName << true
-             << ShardCollectionType::kLastRefreshedCollectionVersionFieldName << Timestamp(1, 1)));
-    ASSERT_EQ(epoch, shardCollType.getLastRefreshedCollectionVersion()->epoch());
-    ASSERT_EQ(timestamp, shardCollType.getLastRefreshedCollectionVersion()->getTimestamp());
-}
-
-TEST(ShardCollectionType, FromBSONEpochMatchesLastRefreshedCollectionVersionWhenDate) {
-    OID epoch = OID::gen();
-    Timestamp timestamp(1, 1);
-
-    ShardCollectionType shardCollType(
-        BSON(ShardCollectionType::kNssFieldName
-             << kNss.ns() << ShardCollectionType::kEpochFieldName << epoch
-             << ShardCollectionType::kUuidFieldName << UUID::gen()
-             << ShardCollectionType::kTimestampFieldName << timestamp
-             << ShardCollectionType::kKeyPatternFieldName << kKeyPattern
-             << ShardCollectionType::kUniqueFieldName << true
-             << ShardCollectionType::kLastRefreshedCollectionVersionFieldName << Date_t()));
+             << ShardCollectionType::kLastRefreshedCollectionMajorMinorVersionFieldName
+             << Timestamp(123, 45)));
     ASSERT_EQ(epoch, shardCollType.getLastRefreshedCollectionVersion()->epoch());
     ASSERT_EQ(timestamp, shardCollType.getLastRefreshedCollectionVersion()->getTimestamp());
+    ASSERT_EQ(Timestamp(123, 45),
+              Timestamp(shardCollType.getLastRefreshedCollectionVersion()->toLong()));
 }
 
 TEST(ShardCollectionType, ToBSONEmptyDefaultCollationNotIncluded) {
diff --git a/src/mongo/db/server_options.h b/src/mongo/db/server_options.h
index 643fe7a46a1..25a69ffa995 100644
--- a/src/mongo/db/server_options.h
+++ b/src/mongo/db/server_options.h
@@ -201,15 +201,6 @@ struct ServerGlobalParams {
                 version != multiversion::GenericFCV::kLastLTS;
         }
 
-        bool isFCVUpgradingToOrAlreadyLatest() const {
-            auto currentVersion = getVersion();
-
-            // (Generic FCV reference): This FCV reference should exist across LTS binary versions.
-            return currentVersion == multiversion::GenericFCV::kUpgradingFromLastLTSToLatest ||
-                isGreaterThanOrEqualTo(
-                       multiversion::GenericFCV::kUpgradingFromLastContinuousToLatest);
-        }
-
         bool isFCVDowngradingOrAlreadyDowngradedFromLatest() const {
             auto currentVersion = getVersion();
 
diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
index ce1d0e55ddf..9de2da2e33d 100644
--- a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
@@ -42,6 +42,10 @@ bool isSecondary(const OperationContext* opCtx) {
     return !opCtx->writesAreReplicated();
 }
 
+bool isPrimary(const OperationContext* opCtx) {
+    return opCtx->writesAreReplicated();
+}
+
 const auto tenantIdsToDeleteDecoration =
     OperationContext::declareDecoration<boost::optional<std::vector<std::string>>>();
 
@@ -50,6 +54,13 @@ ShardSplitDonorDocument parseAndValidateDonorDocument(const BSONObj& doc) {
         ShardSplitDonorDocument::parse(IDLParserErrorContext("donorStateDoc"), doc);
     const std::string errmsg = "Invalid donor state doc, {}: {}";
 
+    if (donorStateDoc.getExpireAt()) {
+        uassert(ErrorCodes::BadValue,
+                "Contains 'expireAt' but the split has not committed or aborted",
+                donorStateDoc.getState() == ShardSplitDonorStateEnum::kCommitted ||
+                    donorStateDoc.getState() == ShardSplitDonorStateEnum::kAborted);
+    }
+
     switch (donorStateDoc.getState()) {
         case ShardSplitDonorStateEnum::kUninitialized:
             uassert(ErrorCodes::BadValue,
@@ -68,6 +79,12 @@ ShardSplitDonorDocument parseAndValidateDonorDocument(const BSONObj& doc) {
                                 doc.toString()),
                     !donorStateDoc.getAbortReason());
             break;
+        case ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+            uassert(ErrorCodes::BadValue,
+                    errmsg,
+                    !donorStateDoc.getBlockTimestamp() && !donorStateDoc.getCommitOrAbortOpTime() &&
+                        !donorStateDoc.getAbortReason());
+            break;
         case ShardSplitDonorStateEnum::kBlocking:
             uassert(ErrorCodes::BadValue,
                     fmt::format(errmsg,
@@ -125,54 +142,61 @@ ShardSplitDonorDocument parseAndValidateDonorDocument(const BSONObj& doc) {
  * Initializes the TenantMigrationDonorAccessBlocker for the tenant migration denoted by the given
  * state doc.
  */
-void onBlockerInitialization(OperationContext* opCtx,
-                             const ShardSplitDonorDocument& donorStateDoc) {
-    invariant(donorStateDoc.getState() == ShardSplitDonorStateEnum::kBlocking);
-    invariant(donorStateDoc.getBlockTimestamp());
-
-    auto optionalTenants = donorStateDoc.getTenantIds();
-    invariant(optionalTenants);
-
-    const auto& tenantIds = optionalTenants.get();
+void onTransitionToAbortingIndexBuilds(OperationContext* opCtx,
+                                       const ShardSplitDonorDocument& donorStateDoc) {
+    invariant(donorStateDoc.getState() == ShardSplitDonorStateEnum::kAbortingIndexBuilds);
+    invariant(donorStateDoc.getTenantIds());
+    invariant(donorStateDoc.getRecipientConnectionString());
+
+    auto tenantIds = *donorStateDoc.getTenantIds();
+    auto recipientConnectionString = *donorStateDoc.getRecipientConnectionString();
+    for (const auto& tenantId : tenantIds) {
+        auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>(
+            opCtx->getServiceContext(),
+            donorStateDoc.getId(),
+            tenantId.toString(),
+            MigrationProtocolEnum::kMultitenantMigrations,
+            recipientConnectionString.toString());
+
+        TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()).add(tenantId, mtab);
+    }
 
-    // The primary create and sets the tenant access blocker to blocking within the
-    // ShardSplitDonorService.
-    if (isSecondary(opCtx)) {
-        auto recipientConnectionString = [stateDoc = donorStateDoc]() {
-            if (stateDoc.getRecipientConnectionString()) {
-                return *stateDoc.getRecipientConnectionString();
+    if (isPrimary(opCtx)) {
+        // onRollback is not registered on secondaries since secondaries should not fail to
+        // apply the write.
+        opCtx->recoveryUnit()->onRollback([opCtx, tenantIds] {
+            for (const auto& tenantId : tenantIds) {
+                TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
+                    .remove(tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
             }
-
-            auto recipientTagName = stateDoc.getRecipientTagName();
-            invariant(recipientTagName);
-            auto recipientSetName = stateDoc.getRecipientSetName();
-            invariant(recipientSetName);
-            auto config = repl::ReplicationCoordinator::get(cc().getServiceContext())->getConfig();
-            return serverless::makeRecipientConnectionString(
-                config, *recipientTagName, *recipientSetName);
-        }();
-
-        for (const auto& tenantId : tenantIds) {
-            auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>(
-                opCtx->getServiceContext(),
-                donorStateDoc.getId(),
-                tenantId.toString(),
-                MigrationProtocolEnum::kMultitenantMigrations,
-                recipientConnectionString.toString());
-
-            TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
-                .add(tenantId, mtab);
-
-            // No rollback handler is necessary as the write should not fail on secondaries.
-            mtab->startBlockingWrites();
-        }
+        });
     }
+}
 
-    for (const auto& tenantId : tenantIds) {
+/**
+ * Transitions the TenantMigrationDonorAccessBlocker to the blocking state.
+ */
+void onTransitionToBlocking(OperationContext* opCtx, const ShardSplitDonorDocument& donorStateDoc) {
+    invariant(donorStateDoc.getState() == ShardSplitDonorStateEnum::kBlocking);
+    invariant(donorStateDoc.getBlockTimestamp());
+    invariant(donorStateDoc.getTenantIds());
+
+    auto tenantIds = *donorStateDoc.getTenantIds();
+    for (auto tenantId : tenantIds) {
         auto mtab = tenant_migration_access_blocker::getTenantMigrationDonorAccessBlocker(
             opCtx->getServiceContext(), tenantId);
         invariant(mtab);
 
+        if (isSecondary(opCtx)) {
+            // A primary calls startBlockingWrites on the TenantMigrationDonorAccessBlocker before
+            // reserving the OpTime for the "start blocking" write, so only secondaries call
+            // startBlockingWrites on the TenantMigrationDonorAccessBlocker in the op observer.
+            mtab->startBlockingWrites();
+        }
+
+        // Both primaries and secondaries call startBlockingReadsAfter in the op observer, since
+        // startBlockingReadsAfter just needs to be called before the "start blocking" write's oplog
+        // hole is filled.
         mtab->startBlockingReadsAfter(donorStateDoc.getBlockTimestamp().get());
     }
 }
@@ -206,9 +230,9 @@ void onTransitionToAborted(OperationContext* opCtx, const ShardSplitDonorDocumen
 
     auto tenants = donorStateDoc.getTenantIds();
     if (!tenants) {
-        // The only case where there can be no tenants is when the instance is created by the abort
-        // command. In that case, no tenant migration blockers are created and the state will go
-        // straight to abort.
+        // The only case where there can be no tenants is when the instance is created by the
+        // abort command. In that case, no tenant migration blockers are created and the state
+        // will go straight to abort.
         invariant(donorStateDoc.getState() == ShardSplitDonorStateEnum::kUninitialized);
         return;
     }
@@ -242,34 +266,35 @@ public:
                             _opCtx->getServiceContext(), tenantId);
 
                     if (!mtab) {
-                        // The state doc and TenantMigrationDonorAccessBlocker for this migration
-                        // were removed immediately after expireAt was set. This is unlikely to
-                        // occur in production where the garbage collection delay should be
-                        // sufficiently large.
+                        // The state doc and TenantMigrationDonorAccessBlocker for this
+                        // migration were removed immediately after expireAt was set. This is
+                        // unlikely to occur in production where the garbage collection delay
+                        // should be sufficiently large.
                         continue;
                     }
 
-                    if (!_opCtx->writesAreReplicated()) {
-                        // Setting expireAt implies that the TenantMigrationDonorAccessBlocker for
-                        // this migration will be removed shortly after this. However, a lagged
-                        // secondary might not manage to advance its majority commit point past the
-                        // migration commit or abort opTime and consequently transition out of the
-                        // blocking state before the TenantMigrationDonorAccessBlocker is removed.
-                        // When this occurs, blocked reads or writes will be left waiting for the
-                        // migration decision indefinitely. To avoid that, notify the
-                        // TenantMigrationDonorAccessBlocker here that the commit or abort opTime
-                        // has been majority committed (guaranteed to be true since by design the
-                        // donor never marks its state doc as garbage collectable before the
-                        // migration decision is majority committed).
+                    if (isSecondary(_opCtx)) {
+                        // Setting expireAt implies that the TenantMigrationDonorAccessBlocker
+                        // for this migration will be removed shortly after this. However, a
+                        // lagged secondary might not manage to advance its majority commit
+                        // point past the migration commit or abort opTime and consequently
+                        // transition out of the blocking state before the
+                        // TenantMigrationDonorAccessBlocker is removed. When this occurs,
+                        // blocked reads or writes will be left waiting for the migration
+                        // decision indefinitely. To avoid that, notify the
+                        // TenantMigrationDonorAccessBlocker here that the commit or abort
+                        // opTime has been majority committed (guaranteed to be true since by
+                        // design the donor never marks its state doc as garbage collectable
+                        // before the migration decision is majority committed).
                         mtab->onMajorityCommitPointUpdate(
                             _donorStateDoc.getCommitOrAbortOpTime().get());
                     }
 
                     if (_donorStateDoc.getState() == ShardSplitDonorStateEnum::kAborted) {
                         invariant(mtab->inStateAborted());
-                        // The migration durably aborted and is now marked as garbage collectable,
-                        // remove its TenantMigrationDonorAccessBlocker right away to allow
-                        // back-to-back migration retries.
+                        // The migration durably aborted and is now marked as garbage
+                        // collectable, remove its TenantMigrationDonorAccessBlocker right away
+                        // to allow back-to-back migration retries.
                         TenantMigrationAccessBlockerRegistry::get(_opCtx->getServiceContext())
                             .remove(tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
                     }
@@ -305,7 +330,7 @@ void ShardSplitDonorOpObserver::onInserts(OperationContext* opCtx,
                                           std::vector<InsertStatement>::const_iterator first,
                                           std::vector<InsertStatement>::const_iterator last,
                                           bool fromMigrate) {
-    if (nss != NamespaceString::kTenantSplitDonorsNamespace ||
+    if (nss != NamespaceString::kShardSplitDonorsNamespace ||
         tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
         return;
     }
@@ -313,45 +338,41 @@ void ShardSplitDonorOpObserver::onInserts(OperationContext* opCtx,
     for (auto it = first; it != last; it++) {
         auto donorStateDoc = parseAndValidateDonorDocument(it->doc);
         switch (donorStateDoc.getState()) {
-            case ShardSplitDonorStateEnum::kBlocking:
-                onBlockerInitialization(opCtx, donorStateDoc);
+            case ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+                onTransitionToAbortingIndexBuilds(opCtx, donorStateDoc);
                 break;
             case ShardSplitDonorStateEnum::kAborted:
                 // If the operation starts aborted, do not do anything.
                 break;
-            case ShardSplitDonorStateEnum::kUninitialized:
-            case ShardSplitDonorStateEnum::kCommitted:
-                uasserted(ErrorCodes::IllegalOperation,
-                          "cannot insert a donor's state doc with 'state' other than 'kAborted' or "
-                          "'kBlocking'");
-                break;
             default:
-                MONGO_UNREACHABLE;
+                uasserted(ErrorCodes::IllegalOperation,
+                          "Cannot insert donor's state document with state other than 'aborted' or "
+                          "'aborting index builds'.");
         }
     }
 }
 
 void ShardSplitDonorOpObserver::onUpdate(OperationContext* opCtx,
                                          const OplogUpdateEntryArgs& args) {
-    if (args.nss != NamespaceString::kTenantSplitDonorsNamespace ||
+    if (args.nss != NamespaceString::kShardSplitDonorsNamespace ||
         tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
         return;
     }
 
     auto donorStateDoc = parseAndValidateDonorDocument(args.updateArgs->updatedDoc);
     switch (donorStateDoc.getState()) {
+        case ShardSplitDonorStateEnum::kBlocking:
+            onTransitionToBlocking(opCtx, donorStateDoc);
+            break;
         case ShardSplitDonorStateEnum::kCommitted:
         case ShardSplitDonorStateEnum::kAborted:
             opCtx->recoveryUnit()->registerChange(
                 std::make_unique<TenantMigrationDonorCommitOrAbortHandler>(opCtx, donorStateDoc));
             break;
-        case ShardSplitDonorStateEnum::kBlocking:
-            uasserted(ErrorCodes::IllegalOperation,
-                      "The state document should be inserted as blocking and never transition to "
-                      "blocking");
-            break;
         default:
-            MONGO_UNREACHABLE;
+            uasserted(ErrorCodes::IllegalOperation,
+                      "Cannot update donor's state document with state other than 'aborted', "
+                      "'committed', or 'aborted'");
     }
 }
 
@@ -359,13 +380,12 @@ void ShardSplitDonorOpObserver::aboutToDelete(OperationContext* opCtx,
                                               NamespaceString const& nss,
                                               const UUID& uuid,
                                               BSONObj const& doc) {
-    if (nss != NamespaceString::kTenantSplitDonorsNamespace ||
+    if (nss != NamespaceString::kShardSplitDonorsNamespace ||
         tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
         return;
     }
 
     auto donorStateDoc = parseAndValidateDonorDocument(doc);
-
     uassert(ErrorCodes::IllegalOperation,
             str::stream() << "cannot delete a donor's state document " << doc
                           << " since it has not been marked as garbage collectable and is not a"
@@ -390,8 +410,7 @@ void ShardSplitDonorOpObserver::onDelete(OperationContext* opCtx,
                                          const UUID& uuid,
                                          StmtId stmtId,
                                          const OplogDeleteEntryArgs& args) {
-    if (nss != NamespaceString::kTenantSplitDonorsNamespace ||
-        !tenantIdsToDeleteDecoration(opCtx) ||
+    if (nss != NamespaceString::kShardSplitDonorsNamespace || !tenantIdsToDeleteDecoration(opCtx) ||
         tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
         return;
     }
@@ -414,7 +433,7 @@ repl::OpTime ShardSplitDonorOpObserver::onDropCollection(OperationContext* opCtx
                                                          const UUID& uuid,
                                                          std::uint64_t numRecords,
                                                          const CollectionDropType dropType) {
-    if (collectionName == NamespaceString::kTenantSplitDonorsNamespace) {
+    if (collectionName == NamespaceString::kShardSplitDonorsNamespace) {
         opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) {
             TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
                 .removeAll(TenantMigrationAccessBlocker::BlockerType::kDonor);
diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
index c52868126e6..6f2e376de47 100644
--- a/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
@@ -35,6 +35,7 @@
 #include "mongo/db/serverless/shard_split_donor_op_observer.h"
 #include "mongo/db/serverless/shard_split_state_machine_gen.h"
 #include "mongo/db/serverless/shard_split_test_utils.h"
+#include "mongo/db/serverless/shard_split_utils.h"
 #include "mongo/db/service_context_d_test_fixture.h"
 #include "mongo/dbtests/mock/mock_replica_set.h"
 
@@ -129,7 +130,8 @@ protected:
     std::vector<std::shared_ptr<TenantMigrationDonorAccessBlocker>>
     createBlockersAndStartBlockingWrites(const std::vector<std::string>& tenants,
                                          OperationContext* opCtx,
-                                         const std::string& connectionStr) {
+                                         const std::string& connectionStr,
+                                         bool isSecondary = false) {
         auto uuid = UUID::gen();
         std::vector<std::shared_ptr<TenantMigrationDonorAccessBlocker>> blockers;
         for (const auto& tenant : tenants) {
@@ -141,7 +143,10 @@ protected:
                 _connectionStr);
 
             blockers.push_back(mtab);
-            mtab->startBlockingWrites();
+            if (!isSecondary) {
+                mtab->startBlockingWrites();
+            }
+
             TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()).add(tenant, mtab);
         }
 
@@ -160,7 +165,7 @@ protected:
         MockReplicaSet("donorSet", 3, true /* hasPrimary */, true /* dollarPrefixHosts */);
     MockReplicaSet _recipientReplSet =
         MockReplicaSet("recipientSet", 3, true /* hasPrimary */, true /* dollarPrefixHosts */);
-    const NamespaceString _nss = NamespaceString::kTenantSplitDonorsNamespace;
+    const NamespaceString _nss = NamespaceString::kShardSplitDonorsNamespace;
     std::vector<std::string> _tenantIds = {"tenant1", "tenantAB"};
     std::string _connectionStr = _replSet.getConnectionString();
     UUID _uuid = UUID::gen();
@@ -253,7 +258,30 @@ TEST_F(ShardSplitDonorOpObserverTest, InsertValidAbortedDocument) {
     }
 }
 
-TEST_F(ShardSplitDonorOpObserverTest, InsertBlockingDocumentPrimary) {
+TEST_F(ShardSplitDonorOpObserverTest, InsertAbortingIndexDocumentPrimary) {
+    test::shard_split::reconfigToAddRecipientNodes(
+        getServiceContext(), _recipientTagName, _replSet.getHosts(), _recipientReplSet.getHosts());
+
+    auto stateDocument = defaultStateDocument();
+    stateDocument.setState(ShardSplitDonorStateEnum::kAbortingIndexBuilds);
+    stateDocument.setRecipientConnectionString(mongo::serverless::makeRecipientConnectionString(
+        repl::ReplicationCoordinator::get(_opCtx.get())->getConfig(),
+        _recipientTagName,
+        _recipientSetName));
+
+    auto mtabVerifier = [opCtx = _opCtx.get()](std::shared_ptr<TenantMigrationAccessBlocker> mtab) {
+        ASSERT_TRUE(mtab);
+        // The OpObserver does not set the mtab to blocking for primaries.
+        ASSERT_OK(mtab->checkIfCanWrite(Timestamp(1, 1)));
+        ASSERT_OK(mtab->checkIfCanWrite(Timestamp(1, 3)));
+        ASSERT_OK(mtab->checkIfLinearizableReadWasAllowed(opCtx));
+        ASSERT_EQ(mtab->checkIfCanBuildIndex().code(), ErrorCodes::TenantMigrationConflict);
+    };
+
+    runInsertTestCase(stateDocument, _tenantIds, mtabVerifier);
+}
+
+TEST_F(ShardSplitDonorOpObserverTest, UpdateBlockingDocumentPrimary) {
     test::shard_split::reconfigToAddRecipientNodes(
         getServiceContext(), _recipientTagName, _replSet.getHosts(), _recipientReplSet.getHosts());
 
@@ -274,15 +302,16 @@ TEST_F(ShardSplitDonorOpObserverTest, InsertBlockingDocumentPrimary) {
         ASSERT_EQ(mtab->checkIfCanBuildIndex().code(), ErrorCodes::TenantMigrationConflict);
     };
 
-    runInsertTestCase(stateDocument, _tenantIds, mtabVerifier);
+    runUpdateTestCase(stateDocument, _tenantIds, mtabVerifier);
 }
 
-TEST_F(ShardSplitDonorOpObserverTest, InsertBlockingDocumentSecondary) {
+TEST_F(ShardSplitDonorOpObserverTest, UpdateBlockingDocumentSecondary) {
     test::shard_split::reconfigToAddRecipientNodes(
         getServiceContext(), _recipientTagName, _replSet.getHosts(), _recipientReplSet.getHosts());
 
     // This indicates the instance is secondary for the OpObserver.
     repl::UnreplicatedWritesBlock setSecondary(_opCtx.get());
+    createBlockersAndStartBlockingWrites(_tenantIds, _opCtx.get(), _connectionStr, true);
 
     auto stateDocument = defaultStateDocument();
     stateDocument.setState(ShardSplitDonorStateEnum::kBlocking);
@@ -299,18 +328,15 @@ TEST_F(ShardSplitDonorOpObserverTest, InsertBlockingDocumentSecondary) {
         ASSERT_EQ(mtab->checkIfCanBuildIndex().code(), ErrorCodes::TenantMigrationConflict);
     };
 
-    runInsertTestCase(stateDocument, _tenantIds, mtabVerifier);
+    runUpdateTestCase(stateDocument, _tenantIds, mtabVerifier);
 }
 
-
-TEST_F(ShardSplitDonorOpObserverTest, TransitionToBlockingFail) {
+TEST_F(ShardSplitDonorOpObserverTest, TransitionToAbortingIndexBuildsFail) {
     // This indicates the instance is secondary for the OpObserver.
     repl::UnreplicatedWritesBlock setSecondary(_opCtx.get());
 
     auto stateDocument = defaultStateDocument();
-    stateDocument.setState(ShardSplitDonorStateEnum::kBlocking);
-    stateDocument.setBlockTimestamp(Timestamp(1, 1));
-
+    stateDocument.setState(ShardSplitDonorStateEnum::kAbortingIndexBuilds);
 
     CollectionUpdateArgs updateArgs;
     updateArgs.stmtIds = {};
diff --git a/src/mongo/db/serverless/shard_split_donor_service.cpp b/src/mongo/db/serverless/shard_split_donor_service.cpp
index f37a9416f5e..deb78f1779b 100644
--- a/src/mongo/db/serverless/shard_split_donor_service.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service.cpp
@@ -68,54 +68,16 @@ MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterMarkingStateGarbageCollectable);
 MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeSplitConfigRemoval);
 MONGO_FAIL_POINT_DEFINE(skipShardSplitRecipientCleanup);
 MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeLeavingBlockingState);
+MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterUpdatingToCommittedState);
+MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeSendingStepUpToRecipients);
+MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterReceivingAbortCmd);
 
 const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max());
 
-bool shouldStopInsertingDonorStateDoc(Status status) {
-    return status.isOK() || status == ErrorCodes::ConflictingOperationInProgress;
-}
-
-void setStateDocTimestamps(WithLock,
-                           ShardSplitDonorStateEnum nextState,
-                           repl::OpTime time,
-                           ShardSplitDonorDocument& stateDoc) {
-    switch (nextState) {
-        case ShardSplitDonorStateEnum::kUninitialized:
-            break;
-        case ShardSplitDonorStateEnum::kBlocking:
-            stateDoc.setBlockTimestamp(time.getTimestamp());
-            break;
-        case ShardSplitDonorStateEnum::kAborted:
-            stateDoc.setCommitOrAbortOpTime(time);
-            break;
-        case ShardSplitDonorStateEnum::kCommitted:
-            stateDoc.setCommitOrAbortOpTime(time);
-            break;
-        default:
-            MONGO_UNREACHABLE;
-    }
-}
-
 bool isAbortedDocumentPersistent(WithLock, ShardSplitDonorDocument& stateDoc) {
     return !!stateDoc.getAbortReason();
 }
 
-void setMtabToBlockingForTenants(ServiceContext* context,
-                                 OperationContext* opCtx,
-                                 const std::vector<StringData>& tenantIds) {
-    // Start blocking writes before getting an oplog slot to guarantee no
-    // writes to the tenant's data can commit with a timestamp after the
-    // block timestamp.
-    for (const auto& tenantId : tenantIds) {
-        auto mtab = tenant_migration_access_blocker::getTenantMigrationDonorAccessBlocker(context,
-                                                                                          tenantId);
-        invariant(mtab);
-        mtab->startBlockingWrites();
-
-        opCtx->recoveryUnit()->onRollback([mtab] { mtab->rollBackStartBlocking(); });
-    }
-}
-
 void checkForTokenInterrupt(const CancellationToken& token) {
     uassert(ErrorCodes::CallbackCanceled, "Donor service interrupted", !token.isCanceled());
 }
@@ -304,11 +266,14 @@ ShardSplitDonorService::DonorStateMachine::DonorStateMachine(
 
 void ShardSplitDonorService::DonorStateMachine::tryAbort() {
     LOGV2(6086502, "Received 'abortShardSplit' command.", "id"_attr = _migrationId);
-    stdx::lock_guard<Latch> lg(_mutex);
-    _abortRequested = true;
-    if (_abortSource) {
-        _abortSource->cancel();
+    {
+        stdx::lock_guard<Latch> lg(_mutex);
+        _abortRequested = true;
+        if (_abortSource) {
+            _abortSource->cancel();
+        }
     }
+    pauseShardSplitAfterReceivingAbortCmd.pauseWhileSet();
 }
 
 void ShardSplitDonorService::DonorStateMachine::tryForget() {
@@ -417,19 +382,16 @@ SemiFuture<void> ShardSplitDonorService::DonorStateMachine::run(
                 // Note we do not use the abort split token here because the abortShardSplit
                 // command waits for a decision to be persisted which will not happen if
                 // inserting the initial state document fails.
-                if (MONGO_unlikely(pauseShardSplitBeforeBlockingState.shouldFail())) {
-                    pauseShardSplitBeforeBlockingState.pauseWhileSet();
-                }
-                return _enterBlockingOrAbortedState(executor, primaryToken, abortToken);
+                return _enterAbortIndexBuildsOrAbortedState(executor, primaryToken, abortToken);
+            })
+            .then([this, executor, abortToken] {
+                // Start tracking the abortToken for killing operation contexts
+                _cancelableOpCtxFactory.emplace(abortToken, _markKilledExecutor);
+                return _abortIndexBuildsAndEnterBlockingState(executor, abortToken);
             })
             .then([this, executor, abortToken, criticalSectionTimer] {
                 criticalSectionTimer->reset();
-                checkForTokenInterrupt(abortToken);
-                _cancelableOpCtxFactory.emplace(abortToken, _markKilledExecutor);
 
-                _abortIndexBuilds(abortToken);
-            })
-            .then([this, executor, abortToken] {
                 auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
                 pauseShardSplitAfterBlocking.pauseWhileSet(opCtx.get());
 
@@ -440,7 +402,12 @@ SemiFuture<void> ShardSplitDonorService::DonorStateMachine::run(
                 return _applySplitConfigToDonor(executor, abortToken);
             })
             .then([this, executor, abortToken] {
-                return _waitForRecipientToAcceptSplitAndTriggerElection(executor, abortToken);
+                return _waitForRecipientToAcceptSplit(executor, abortToken);
+            })
+            .then([this, executor, primaryToken] {
+                // only cancel operations on stepdown from here out
+                _cancelableOpCtxFactory.emplace(primaryToken, _markKilledExecutor);
+                return _triggerElectionAndEnterCommitedState(executor, primaryToken);
             })
             // anchor ensures the instance will still exists even if the primary stepped down
             .onCompletion([this,
@@ -572,6 +539,143 @@ bool ShardSplitDonorService::DonorStateMachine::_hasInstalledSplitConfig(WithLoc
         config.getRecipientConfig()->getReplSetName() == *_stateDoc.getRecipientSetName();
 }
 
+ConnectionString ShardSplitDonorService::DonorStateMachine::_setupAcceptanceMonitoring(
+    WithLock lock, const CancellationToken& abortToken) {
+    auto recipientConnectionString = [stateDoc = _stateDoc]() {
+        if (stateDoc.getRecipientConnectionString()) {
+            return *stateDoc.getRecipientConnectionString();
+        }
+
+        auto recipientTagName = stateDoc.getRecipientTagName();
+        invariant(recipientTagName);
+        auto recipientSetName = stateDoc.getRecipientSetName();
+        invariant(recipientSetName);
+        auto config = repl::ReplicationCoordinator::get(cc().getServiceContext())->getConfig();
+        return serverless::makeRecipientConnectionString(
+            config, *recipientTagName, *recipientSetName);
+    }();
+
+    // Always start the replica set monitor if we haven't reached a decision yet
+    _splitAcceptancePromise.setWith([&]() -> Future<void> {
+        if (_stateDoc.getState() > ShardSplitDonorStateEnum::kBlocking ||
+            MONGO_unlikely(skipShardSplitWaitForSplitAcceptance.shouldFail())) {
+            return SemiFuture<void>::makeReady().unsafeToInlineFuture();
+        }
+
+        // Optionally select a task executor for unit testing
+        auto executor = _splitAcceptanceTaskExecutorForTest
+            ? *_splitAcceptanceTaskExecutorForTest
+            : _shardSplitService->getInstanceCleanupExecutor();
+
+        LOGV2(6142508,
+              "Monitoring recipient nodes for split acceptance.",
+              "id"_attr = _migrationId,
+              "recipientConnectionString"_attr = recipientConnectionString);
+
+        return detail::makeRecipientAcceptSplitFuture(
+                   executor, abortToken, recipientConnectionString, _migrationId)
+            .unsafeToInlineFuture();
+    });
+
+    return recipientConnectionString;
+}
+
+ExecutorFuture<void>
+ShardSplitDonorService::DonorStateMachine::_enterAbortIndexBuildsOrAbortedState(
+    const ScopedTaskExecutorPtr& executor,
+    const CancellationToken& primaryToken,
+    const CancellationToken& abortToken) {
+    ShardSplitDonorStateEnum nextState;
+    {
+        stdx::lock_guard<Latch> lg(_mutex);
+        if (_stateDoc.getState() == ShardSplitDonorStateEnum::kAborted) {
+            if (isAbortedDocumentPersistent(lg, _stateDoc)) {
+                // Node has step up and created an instance using a document in abort state. No
+                // need to write the document as it already exists.
+                return ExecutorFuture(**executor);
+            }
+
+            _abortReason =
+                Status(ErrorCodes::TenantMigrationAborted, "Aborted due to 'abortShardSplit'.");
+            BSONObjBuilder bob;
+            _abortReason->serializeErrorToBSON(&bob);
+            _stateDoc.setAbortReason(bob.obj());
+            _stateDoc.setExpireAt(_serviceContext->getFastClockSource()->now() +
+                                  Milliseconds{repl::shardSplitGarbageCollectionDelayMS.load()});
+            nextState = ShardSplitDonorStateEnum::kAborted;
+
+            LOGV2(6670500, "Entering 'aborted' state.", "id"_attr = _stateDoc.getId());
+        } else {
+            // Always set up acceptance monitoring.
+            auto recipientConnectionString = _setupAcceptanceMonitoring(lg, abortToken);
+
+            if (_stateDoc.getState() > ShardSplitDonorStateEnum::kUninitialized) {
+                // Node has stepped up and resumed a shard split. No need to write the document as
+                // it already exists.
+                return ExecutorFuture(**executor);
+            }
+
+            _stateDoc.setRecipientConnectionString(recipientConnectionString);
+            nextState = ShardSplitDonorStateEnum::kAbortingIndexBuilds;
+
+            LOGV2(
+                6670501, "Entering 'aborting index builds' state.", "id"_attr = _stateDoc.getId());
+        }
+    }
+
+    return _updateStateDocument(executor, primaryToken, nextState)
+        .then([this, executor, primaryToken](repl::OpTime opTime) {
+            return _waitForMajorityWriteConcern(executor, std::move(opTime), primaryToken);
+        })
+        .then([this, executor, nextState]() {
+            uassert(ErrorCodes::TenantMigrationAborted,
+                    "Shard split operation aborted.",
+                    nextState != ShardSplitDonorStateEnum::kAborted);
+        });
+}
+
+ExecutorFuture<void>
+ShardSplitDonorService::DonorStateMachine::_abortIndexBuildsAndEnterBlockingState(
+    const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken) {
+    checkForTokenInterrupt(abortToken);
+
+    boost::optional<std::vector<StringData>> tenantIds;
+    {
+        stdx::lock_guard<Latch> lg(_mutex);
+        if (_stateDoc.getState() > ShardSplitDonorStateEnum::kAbortingIndexBuilds) {
+            return ExecutorFuture(**executor);
+        }
+
+        tenantIds = _stateDoc.getTenantIds();
+        invariant(tenantIds);
+    }
+
+    LOGV2(6436100, "Aborting index builds for shard split.", "id"_attr = _migrationId);
+
+    // Abort any in-progress index builds. No new index builds can start while we are doing this
+    // because the mtab prevents it.
+    auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
+    auto* indexBuildsCoordinator = IndexBuildsCoordinator::get(opCtx.get());
+    for (const auto& tenantId : *tenantIds) {
+        indexBuildsCoordinator->abortTenantIndexBuilds(
+            opCtx.get(), MigrationProtocolEnum::kMultitenantMigrations, tenantId, "shard split");
+    }
+
+    if (MONGO_unlikely(pauseShardSplitBeforeBlockingState.shouldFail())) {
+        pauseShardSplitBeforeBlockingState.pauseWhileSet();
+    }
+
+    {
+        stdx::lock_guard<Latch> lg(_mutex);
+        LOGV2(8423358, "Entering 'blocking' state.", "id"_attr = _stateDoc.getId());
+    }
+
+    return _updateStateDocument(executor, abortToken, ShardSplitDonorStateEnum::kBlocking)
+        .then([this, self = shared_from_this(), executor, abortToken](repl::OpTime opTime) {
+            return _waitForMajorityWriteConcern(executor, std::move(opTime), abortToken);
+        });
+}
+
 ExecutorFuture<void>
 ShardSplitDonorService::DonorStateMachine::_waitForRecipientToReachBlockTimestamp(
     const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken) {
@@ -661,7 +765,7 @@ ExecutorFuture<void> sendStepUpToRecipient(const HostAndPort recipient,
     return AsyncTry([executor, recipient, token] {
                executor::RemoteCommandRequest request(
                    recipient, "admin", BSON("replSetStepUp" << 1 << "skipDryRun" << true), nullptr);
-
+               pauseShardSplitBeforeSendingStepUpToRecipients.pauseWhileSet();
                return executor->scheduleRemoteCommand(request, token)
                    .then([](const auto& response) {
                        return getStatusFromCommandResult(response.data);
@@ -676,10 +780,26 @@ ExecutorFuture<void> sendStepUpToRecipient(const HostAndPort recipient,
         .on(executor, token);
 }
 
-ExecutorFuture<void>
-ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTriggerElection(
+ExecutorFuture<void> ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplit(
     const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken) {
+
     checkForTokenInterrupt(abortToken);
+    {
+        stdx::lock_guard<Latch> lg(_mutex);
+        if (_stateDoc.getState() > ShardSplitDonorStateEnum::kBlocking) {
+            return ExecutorFuture(**executor);
+        }
+    }
+
+    LOGV2(6142501, "Waiting for recipient to accept the split.", "id"_attr = _migrationId);
+
+    return ExecutorFuture(**executor).then([&]() { return _splitAcceptancePromise.getFuture(); });
+}
+
+ExecutorFuture<void>
+ShardSplitDonorService::DonorStateMachine::_triggerElectionAndEnterCommitedState(
+    const ScopedTaskExecutorPtr& executor, const CancellationToken& primaryToken) {
+    checkForTokenInterrupt(primaryToken);
 
     std::vector<HostAndPort> recipients;
     {
@@ -699,10 +819,7 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
     auto remoteCommandExecutor =
         _splitAcceptanceTaskExecutorForTest ? *_splitAcceptanceTaskExecutorForTest : **executor;
 
-    LOGV2(6142501, "Waiting for recipient to accept the split.", "id"_attr = _migrationId);
-
     return ExecutorFuture(**executor)
-        .then([&]() { return _splitAcceptancePromise.getFuture(); })
         .then([this] {
             auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
             if (MONGO_unlikely(pauseShardSplitBeforeLeavingBlockingState.shouldFail())) {
@@ -723,7 +840,7 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
                 uasserted(ErrorCodes::InternalError, "simulate a shard split error");
             }
         })
-        .then([this, recipients, abortToken, remoteCommandExecutor] {
+        .then([this, recipients, primaryToken, remoteCommandExecutor] {
             LOGV2(6493901,
                   "Triggering an election after recipient has accepted the split.",
                   "id"_attr = _migrationId);
@@ -733,14 +850,16 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
             // succeed). Selecting a random node has a 2/3 chance to succeed for replSetStepUp. If
             // the first command fail, we know this node is the most out-of-date. Therefore we
             // select the next node and we know the first node selected will vote for the second.
-            return sendStepUpToRecipient(recipients[0], remoteCommandExecutor, abortToken)
-                .onCompletion([this, recipients, remoteCommandExecutor, abortToken](Status status) {
-                    if (status.isOK()) {
-                        return ExecutorFuture<void>(remoteCommandExecutor, status);
-                    }
+            return sendStepUpToRecipient(recipients[0], remoteCommandExecutor, primaryToken)
+                .onCompletion(
+                    [this, recipients, remoteCommandExecutor, primaryToken](Status status) {
+                        if (status.isOK()) {
+                            return ExecutorFuture<void>(remoteCommandExecutor, status);
+                        }
 
-                    return sendStepUpToRecipient(recipients[1], remoteCommandExecutor, abortToken);
-                })
+                        return sendStepUpToRecipient(
+                            recipients[1], remoteCommandExecutor, primaryToken);
+                    })
                 .onCompletion([this](Status replSetStepUpStatus) {
                     if (!replSetStepUpStatus.isOK()) {
                         LOGV2(6493904,
@@ -756,180 +875,93 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
                 });
         })
         .thenRunOn(**executor)
-        .then([this, executor, abortToken]() {
+        .then([this, executor, primaryToken]() {
             LOGV2(6142503, "Entering 'committed' state.", "id"_attr = _stateDoc.getId());
+            auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
+            pauseShardSplitAfterUpdatingToCommittedState.pauseWhileSet(opCtx.get());
 
-            return _updateStateDocument(executor, abortToken, ShardSplitDonorStateEnum::kCommitted)
-                .then([this, executor, abortToken](repl::OpTime opTime) {
-                    return _waitForMajorityWriteConcern(executor, std::move(opTime), abortToken);
+            return _updateStateDocument(
+                       executor, primaryToken, ShardSplitDonorStateEnum::kCommitted)
+                .then([this, executor, primaryToken](repl::OpTime opTime) {
+                    return _waitForMajorityWriteConcern(executor, std::move(opTime), primaryToken);
                 });
         });
 }
 
-ExecutorFuture<void> ShardSplitDonorService::DonorStateMachine::_enterBlockingOrAbortedState(
-    const ScopedTaskExecutorPtr& executor,
-    const CancellationToken& primaryToken,
-    const CancellationToken& abortToken) {
-    ShardSplitDonorStateEnum nextState;
-    {
-        stdx::lock_guard<Latch> lg(_mutex);
-        if (_stateDoc.getState() == ShardSplitDonorStateEnum::kAborted) {
-            if (isAbortedDocumentPersistent(lg, _stateDoc)) {
-                // Node has step up and created an instance using a document in abort state. No
-                // need to write the document as it already exists.
-                return ExecutorFuture(**executor);
-            }
-
-            _abortReason =
-                Status(ErrorCodes::TenantMigrationAborted, "Aborted due to 'abortShardSplit'.");
-            BSONObjBuilder bob;
-            _abortReason->serializeErrorToBSON(&bob);
-            _stateDoc.setAbortReason(bob.obj());
-            _stateDoc.setExpireAt(_serviceContext->getFastClockSource()->now() +
-                                  Milliseconds{repl::shardSplitGarbageCollectionDelayMS.load()});
-            nextState = ShardSplitDonorStateEnum::kAborted;
-
-            LOGV2(8423355, "Entering 'aborted' state.", "id"_attr = _stateDoc.getId());
-        } else {
-            auto recipientConnectionString = [stateDoc = _stateDoc]() {
-                if (stateDoc.getRecipientConnectionString()) {
-                    return *stateDoc.getRecipientConnectionString();
-                }
-
-                auto recipientTagName = stateDoc.getRecipientTagName();
-                invariant(recipientTagName);
-                auto recipientSetName = stateDoc.getRecipientSetName();
-                invariant(recipientSetName);
-                auto config =
-                    repl::ReplicationCoordinator::get(cc().getServiceContext())->getConfig();
-                return serverless::makeRecipientConnectionString(
-                    config, *recipientTagName, *recipientSetName);
-            }();
-
-            // Always start the replica set monitor if we haven't reached a decision yet
-            _splitAcceptancePromise.setWith([&]() -> Future<void> {
-                if (_stateDoc.getState() > ShardSplitDonorStateEnum::kBlocking ||
-                    MONGO_unlikely(skipShardSplitWaitForSplitAcceptance.shouldFail())) {
-                    return SemiFuture<void>::makeReady().unsafeToInlineFuture();
-                }
-
-                // Optionally select a task executor for unit testing
-                auto executor = _splitAcceptanceTaskExecutorForTest
-                    ? *_splitAcceptanceTaskExecutorForTest
-                    : _shardSplitService->getInstanceCleanupExecutor();
-
-                LOGV2(6142508,
-                      "Monitoring recipient nodes for split acceptance.",
-                      "id"_attr = _migrationId,
-                      "recipientConnectionString"_attr = recipientConnectionString);
-
-                return detail::makeRecipientAcceptSplitFuture(
-                           executor, abortToken, recipientConnectionString, _migrationId)
-                    .unsafeToInlineFuture();
-            });
-
-            if (_stateDoc.getState() > ShardSplitDonorStateEnum::kUninitialized) {
-                // Node has step up and resumed a shard split. No need to write the document as
-                // it already exists.
-                return ExecutorFuture(**executor);
-            }
-
-            // Otherwise, record the recipient connection string
-            _stateDoc.setRecipientConnectionString(recipientConnectionString);
-            _stateDoc.setState(ShardSplitDonorStateEnum::kBlocking);
-            nextState = ShardSplitDonorStateEnum::kBlocking;
-
-            LOGV2(8423358, "Entering 'blocking' state.", "id"_attr = _stateDoc.getId());
-        }
-    }
-
-    return AsyncTry([this, nextState, uuid = _migrationId]() {
-               auto opCtxHolder = _cancelableOpCtxFactory->makeOperationContext(&cc());
-               auto opCtx = opCtxHolder.get();
-
-               AutoGetCollection collection(opCtx, _stateDocumentsNS, MODE_IX);
-
-               writeConflictRetry(
-                   opCtx, "ShardSplitDonorInsertStateDoc", _stateDocumentsNS.ns(), [&] {
-                       const auto filter = BSON(ShardSplitDonorDocument::kIdFieldName << uuid);
-                       const auto getUpdatedStateDocBson = [&]() {
-                           stdx::lock_guard<Latch> lg(_mutex);
-                           return _stateDoc.toBSON();
-                       };
-
-                       WriteUnitOfWork wuow(opCtx);
-                       if (nextState == ShardSplitDonorStateEnum::kBlocking) {
-                           stdx::lock_guard<Latch> lg(_mutex);
-
-                           insertTenantAccessBlocker(lg, opCtx, _stateDoc);
-
-                           auto tenantIds = _stateDoc.getTenantIds();
-                           invariant(tenantIds);
-                           setMtabToBlockingForTenants(_serviceContext, opCtx, tenantIds.get());
-                       }
-
-                       // Reserve an opTime for the write.
-                       auto oplogSlot = LocalOplogInfo::get(opCtx)->getNextOpTimes(opCtx, 1U)[0];
-                       setStateDocTimestamps(
-                           stdx::lock_guard<Latch>{_mutex}, nextState, oplogSlot, _stateDoc);
-
-                       auto updateResult = Helpers::upsert(opCtx,
-                                                           _stateDocumentsNS.ns(),
-                                                           filter,
-                                                           getUpdatedStateDocBson(),
-                                                           /*fromMigrate=*/false);
-
-
-                       // We only want to insert, not modify, document
-                       invariant(updateResult.numMatched == 0);
-                       wuow.commit();
-                   });
-
-               return repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
-           })
-        .until([](StatusWith<repl::OpTime> swOpTime) {
-            return shouldStopInsertingDonorStateDoc(swOpTime.getStatus());
-        })
-        .withBackoffBetweenIterations(kExponentialBackoff)
-        .on(**executor, primaryToken)
-        .then([this, executor, primaryToken](repl::OpTime opTime) {
-            return _waitForMajorityWriteConcern(executor, std::move(opTime), primaryToken);
-        })
-        .then([this, executor, nextState]() {
-            uassert(ErrorCodes::TenantMigrationAborted,
-                    "Shard split operation aborted.",
-                    nextState != ShardSplitDonorStateEnum::kAborted);
-        });
-}
-
 ExecutorFuture<repl::OpTime> ShardSplitDonorService::DonorStateMachine::_updateStateDocument(
     const ScopedTaskExecutorPtr& executor,
     const CancellationToken& token,
     ShardSplitDonorStateEnum nextState) {
-    auto tenantIds = [&]() {
+    auto [tenantIds, isInsert] = [&]() {
         stdx::lock_guard<Latch> lg(_mutex);
-        _stateDoc.setState(nextState);
-
-        return _stateDoc.getTenantIds();
+        auto isInsert = _stateDoc.getState() == ShardSplitDonorStateEnum::kUninitialized ||
+            _stateDoc.getState() == ShardSplitDonorStateEnum::kAborted;
+        return std::make_pair(_stateDoc.getTenantIds(), isInsert);
     }();
 
-    return AsyncTry([this, tenantIds = std::move(tenantIds), uuid = _migrationId, nextState] {
+    return AsyncTry([this,
+                     tenantIds = std::move(tenantIds),
+                     isInsert = isInsert,
+                     uuid = _migrationId,
+                     nextState] {
                auto opCtxHolder = _cancelableOpCtxFactory->makeOperationContext(&cc());
                auto opCtx = opCtxHolder.get();
 
                AutoGetCollection collection(opCtx, _stateDocumentsNS, MODE_IX);
-               uassert(ErrorCodes::NamespaceNotFound,
-                       str::stream() << _stateDocumentsNS.ns() << " does not exist",
-                       collection);
+
+               if (!isInsert) {
+                   uassert(ErrorCodes::NamespaceNotFound,
+                           str::stream() << _stateDocumentsNS.ns() << " does not exist",
+                           collection);
+               }
 
                writeConflictRetry(
-                   opCtx, "ShardSplitDonorUpdateStateDoc", _stateDocumentsNS.ns(), [&] {
+                   opCtx, "ShardSplitDonorUpdateStateDoc", _stateDocumentsNS.ns(), [&]() {
                        WriteUnitOfWork wuow(opCtx);
 
+                       if (nextState == ShardSplitDonorStateEnum::kBlocking) {
+                           // Start blocking writes before getting an oplog slot to guarantee no
+                           // writes to the tenant's data can commit with a timestamp after the
+                           // block timestamp.
+                           for (const auto& tenantId : *tenantIds) {
+                               auto mtab = tenant_migration_access_blocker::
+                                   getTenantMigrationDonorAccessBlocker(_serviceContext, tenantId);
+                               invariant(mtab);
+                               mtab->startBlockingWrites();
+
+                               opCtx->recoveryUnit()->onRollback(
+                                   [mtab] { mtab->rollBackStartBlocking(); });
+                           }
+                       }
+
                        // Reserve an opTime for the write.
                        auto oplogSlot = LocalOplogInfo::get(opCtx)->getNextOpTimes(opCtx, 1U)[0];
-                       setStateDocTimestamps(
-                           stdx::lock_guard<Latch>{_mutex}, nextState, oplogSlot, _stateDoc);
+                       {
+                           stdx::lock_guard<Latch> lg(_mutex);
+                           _stateDoc.setState(nextState);
+                           switch (nextState) {
+                               case ShardSplitDonorStateEnum::kUninitialized:
+                               case ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+                                   break;
+                               case ShardSplitDonorStateEnum::kBlocking:
+                                   _stateDoc.setBlockTimestamp(oplogSlot.getTimestamp());
+                                   break;
+                               case ShardSplitDonorStateEnum::kCommitted:
+                                   _stateDoc.setCommitOrAbortOpTime(oplogSlot);
+                                   break;
+                               case ShardSplitDonorStateEnum::kAborted: {
+                                   _stateDoc.setCommitOrAbortOpTime(oplogSlot);
+
+                                   invariant(_abortReason);
+                                   BSONObjBuilder bob;
+                                   _abortReason.get().serializeErrorToBSON(&bob);
+                                   _stateDoc.setAbortReason(bob.obj());
+                                   break;
+                               }
+                               default:
+                                   MONGO_UNREACHABLE;
+                           }
+                       }
 
                        const auto filter = BSON(ShardSplitDonorDocument::kIdFieldName << uuid);
                        const auto updatedStateDocBson = [&]() {
@@ -942,15 +974,19 @@ ExecutorFuture<repl::OpTime> ShardSplitDonorService::DonorStateMachine::_updateS
                                                            updatedStateDocBson,
                                                            /*fromMigrate=*/false);
 
-                       invariant(updateResult.numDocsModified == 1);
+                       if (isInsert) {
+                           invariant(!updateResult.existing);
+                           invariant(!updateResult.upsertedId.isEmpty());
+                       } else {
+                           invariant(updateResult.numDocsModified == 1);
+                       }
+
                        wuow.commit();
                    });
 
                return repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
            })
-        .until([](StatusWith<repl::OpTime> swOpTime) {
-            return shouldStopInsertingDonorStateDoc(swOpTime.getStatus());
-        })
+        .until([](StatusWith<repl::OpTime> swOpTime) { return swOpTime.getStatus().isOK(); })
         .withBackoffBetweenIterations(kExponentialBackoff)
         .on(**executor, token);
 }
@@ -1148,30 +1184,4 @@ ExecutorFuture<void> ShardSplitDonorService::DonorStateMachine::_cleanRecipientS
         .on(**executor, primaryToken)
         .ignoreValue();
 }
-
-void ShardSplitDonorService::DonorStateMachine::_abortIndexBuilds(
-    const CancellationToken& abortToken) {
-    checkForTokenInterrupt(abortToken);
-
-    boost::optional<std::vector<StringData>> tenantIds;
-    {
-        stdx::lock_guard<Latch> lg(_mutex);
-        if (_stateDoc.getState() > ShardSplitDonorStateEnum::kBlocking) {
-            return;
-        }
-        tenantIds = _stateDoc.getTenantIds();
-        invariant(tenantIds);
-    }
-
-    LOGV2(6436100, "Aborting index build for shard split.", "id"_attr = _migrationId);
-
-    // Before applying the split config, abort any in-progress index builds. No new index builds
-    // can start while we are doing this because the mtab prevents it.
-    auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
-    auto* indexBuildsCoordinator = IndexBuildsCoordinator::get(opCtx.get());
-    for (const auto& tenantId : *tenantIds) {
-        indexBuildsCoordinator->abortTenantIndexBuilds(
-            opCtx.get(), MigrationProtocolEnum::kMultitenantMigrations, tenantId, "shard split");
-    }
-}
 }  // namespace mongo
diff --git a/src/mongo/db/serverless/shard_split_donor_service.h b/src/mongo/db/serverless/shard_split_donor_service.h
index 9c6c3645de2..bf1548527dc 100644
--- a/src/mongo/db/serverless/shard_split_donor_service.h
+++ b/src/mongo/db/serverless/shard_split_donor_service.h
@@ -56,7 +56,7 @@ public:
     }
 
     NamespaceString getStateDocumentsNS() const override {
-        return NamespaceString::kTenantSplitDonorsNamespace;
+        return NamespaceString::kShardSplitDonorsNamespace;
     }
 
     ThreadPool::Limits getThreadPoolLimits() const override;
@@ -156,9 +156,12 @@ public:
 
 private:
     // Tasks
-    ExecutorFuture<void> _enterBlockingOrAbortedState(const ScopedTaskExecutorPtr& executor,
-                                                      const CancellationToken& primaryToken,
-                                                      const CancellationToken& abortToken);
+    ExecutorFuture<void> _enterAbortIndexBuildsOrAbortedState(const ScopedTaskExecutorPtr& executor,
+                                                              const CancellationToken& primaryToken,
+                                                              const CancellationToken& abortToken);
+
+    ExecutorFuture<void> _abortIndexBuildsAndEnterBlockingState(
+        const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken);
 
     ExecutorFuture<void> _waitForRecipientToReachBlockTimestamp(
         const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken);
@@ -166,8 +169,11 @@ private:
     ExecutorFuture<void> _applySplitConfigToDonor(const ScopedTaskExecutorPtr& executor,
                                                   const CancellationToken& abortToken);
 
-    ExecutorFuture<void> _waitForRecipientToAcceptSplitAndTriggerElection(
-        const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken);
+    ExecutorFuture<void> _waitForRecipientToAcceptSplit(const ScopedTaskExecutorPtr& executor,
+                                                        const CancellationToken& primaryToken);
+
+    ExecutorFuture<void> _triggerElectionAndEnterCommitedState(
+        const ScopedTaskExecutorPtr& executor, const CancellationToken& primaryToken);
 
     ExecutorFuture<void> _waitForForgetCmdThenMarkGarbageCollectable(
         const ScopedTaskExecutorPtr& executor, const CancellationToken& primaryToken);
@@ -192,7 +198,7 @@ private:
 
     void _initiateTimeout(const ScopedTaskExecutorPtr& executor,
                           const CancellationToken& abortToken);
-
+    ConnectionString _setupAcceptanceMonitoring(WithLock lock, const CancellationToken& abortToken);
     bool _hasInstalledSplitConfig(WithLock lock);
 
     /*
@@ -202,10 +208,8 @@ private:
     ExecutorFuture<void> _cleanRecipientStateDoc(const ScopedTaskExecutorPtr& executor,
                                                  const CancellationToken& token);
 
-    void _abortIndexBuilds(const CancellationToken& abortToken);
-
 private:
-    const NamespaceString _stateDocumentsNS = NamespaceString::kTenantSplitDonorsNamespace;
+    const NamespaceString _stateDocumentsNS = NamespaceString::kShardSplitDonorsNamespace;
     mutable Mutex _mutex = MONGO_MAKE_LATCH("ShardSplitDonorService::_mutex");
 
     const UUID _migrationId;
diff --git a/src/mongo/db/serverless/shard_split_donor_service_test.cpp b/src/mongo/db/serverless/shard_split_donor_service_test.cpp
index f061e686c13..5824029d097 100644
--- a/src/mongo/db/serverless/shard_split_donor_service_test.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service_test.cpp
@@ -90,11 +90,11 @@ StatusWith<ShardSplitDonorDocument> getStateDocument(OperationContext* opCtx,
                                                      const UUID& shardSplitId) {
     // Use kLastApplied so that we can read the state document as a secondary.
     ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kLastApplied);
-    AutoGetCollectionForRead collection(opCtx, NamespaceString::kTenantSplitDonorsNamespace);
+    AutoGetCollectionForRead collection(opCtx, NamespaceString::kShardSplitDonorsNamespace);
     if (!collection) {
         return Status(ErrorCodes::NamespaceNotFound,
                       str::stream() << "Collection not found looking for state document: "
-                                    << NamespaceString::kTenantSplitDonorsNamespace.ns());
+                                    << NamespaceString::kShardSplitDonorsNamespace.ns());
     }
 
     BSONObj result;
@@ -192,6 +192,9 @@ std::ostringstream& operator<<(std::ostringstream& builder,
         case mongo::ShardSplitDonorStateEnum::kUninitialized:
             builder << "kUninitialized";
             break;
+        case mongo::ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+            builder << "kAbortingIndexBuilds";
+            break;
         case mongo::ShardSplitDonorStateEnum::kAborted:
             builder << "kAborted";
             break;
@@ -348,8 +351,7 @@ public:
         // The database needs to be open before using shard split donor service.
         {
             auto opCtx = cc().makeOperationContext();
-            AutoGetDb autoDb(
-                opCtx.get(), NamespaceString::kTenantSplitDonorsNamespace.db(), MODE_X);
+            AutoGetDb autoDb(opCtx.get(), NamespaceString::kShardSplitDonorsNamespace.db(), MODE_X);
             auto db = autoDb.ensureDbExists(opCtx.get());
             ASSERT_TRUE(db);
         }
@@ -484,18 +486,14 @@ TEST_F(ShardSplitDonorServiceTest, BasicShardSplitDonorServiceInstanceCreation)
     ASSERT_EQ(_uuid, serviceInstance->getId());
 
     waitForMonitorAndProcessHello();
-
     waitForReplSetStepUp(Status(ErrorCodes::OK, ""));
 
     auto result = serviceInstance->decisionFuture().get();
-
     ASSERT_TRUE(hasActiveSplitForTenants(opCtx.get(), _tenantIds));
-
     ASSERT(!result.abortReason);
     ASSERT_EQ(result.state, mongo::ShardSplitDonorStateEnum::kCommitted);
 
     serviceInstance->tryForget();
-
     auto completionFuture = serviceInstance->completionFuture();
     completionFuture.wait();
 
@@ -692,7 +690,7 @@ TEST_F(ShardSplitDonorServiceTest, ReconfigToRemoveSplitConfig) {
 }
 
 // Abort scenario : abortSplit called before startSplit.
-TEST_F(ShardSplitDonorServiceTest, CreateInstanceInAbortState) {
+TEST_F(ShardSplitDonorServiceTest, CreateInstanceInAbortedState) {
     auto opCtx = makeOperationContext();
     auto serviceContext = getServiceContext();
 
@@ -1067,4 +1065,49 @@ TEST_F(ShardSplitRecipientCleanupTest, ShardSplitRecipientCleanup) {
               ErrorCodes::NoMatchingDocument);
 }
 
+class ShardSplitAbortedStepUpTest : public ShardSplitPersistenceTest {
+public:
+    repl::ReplSetConfig initialDonorConfig() override {
+        BSONArrayBuilder members;
+        members.append(BSON("_id" << 1 << "host"
+                                  << "node1"));
+
+        return repl::ReplSetConfig::parse(BSON("_id"
+                                               << "donorSetName"
+                                               << "version" << 1 << "protocolVersion" << 1
+                                               << "members" << members.arr()));
+    }
+
+    ShardSplitDonorDocument initialStateDocument() override {
+
+        auto stateDocument = defaultStateDocument();
+
+        stateDocument.setState(mongo::ShardSplitDonorStateEnum::kAborted);
+        stateDocument.setBlockTimestamp(Timestamp(1, 1));
+        stateDocument.setCommitOrAbortOpTime(repl::OpTime(Timestamp(1, 1), 1));
+
+        Status status(ErrorCodes::InternalError, abortReason);
+        BSONObjBuilder bob;
+        status.serializeErrorToBSON(&bob);
+        stateDocument.setAbortReason(bob.obj());
+
+        return stateDocument;
+    }
+
+    std::string abortReason{"Testing simulated error"};
+};
+
+TEST_F(ShardSplitAbortedStepUpTest, ShardSplitAbortedStepUp) {
+    auto opCtx = makeOperationContext();
+    auto splitService = repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
+                            ->lookupServiceByName(ShardSplitDonorService::kServiceName);
+    auto optionalDonor = ShardSplitDonorService::DonorStateMachine::lookup(
+        opCtx.get(), splitService, BSON("_id" << _uuid));
+
+    ASSERT(optionalDonor);
+    auto result = optionalDonor->get()->decisionFuture().get();
+
+    ASSERT_EQ(result.state, mongo::ShardSplitDonorStateEnum::kAborted);
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/serverless/shard_split_state_machine.idl b/src/mongo/db/serverless/shard_split_state_machine.idl
index 8aa65017c1b..ee3462f5a05 100644
--- a/src/mongo/db/serverless/shard_split_state_machine.idl
+++ b/src/mongo/db/serverless/shard_split_state_machine.idl
@@ -40,6 +40,7 @@ enums:
         type: string
         values:
             kUninitialized: "uninitialized"
+            kAbortingIndexBuilds: "aborting index builds"
             kBlocking: "blocking"
             kCommitted: "committed"
             kAborted: "aborted"
diff --git a/src/mongo/db/serverless/shard_split_utils.cpp b/src/mongo/db/serverless/shard_split_utils.cpp
index b9bb407220d..041c133b02b 100644
--- a/src/mongo/db/serverless/shard_split_utils.cpp
+++ b/src/mongo/db/serverless/shard_split_utils.cpp
@@ -149,7 +149,7 @@ repl::ReplSetConfig makeSplitConfig(const repl::ReplSetConfig& config,
 }
 
 Status insertStateDoc(OperationContext* opCtx, const ShardSplitDonorDocument& stateDoc) {
-    const auto nss = NamespaceString::kTenantSplitDonorsNamespace;
+    const auto nss = NamespaceString::kShardSplitDonorsNamespace;
     AutoGetCollection collection(opCtx, nss, MODE_IX);
 
     uassert(ErrorCodes::PrimarySteppedDown,
@@ -176,7 +176,7 @@ Status insertStateDoc(OperationContext* opCtx, const ShardSplitDonorDocument& st
 }
 
 Status updateStateDoc(OperationContext* opCtx, const ShardSplitDonorDocument& stateDoc) {
-    const auto nss = NamespaceString::kTenantSplitDonorsNamespace;
+    const auto nss = NamespaceString::kShardSplitDonorsNamespace;
     AutoGetCollection collection(opCtx, nss, MODE_IX);
 
     if (!collection) {
@@ -198,7 +198,7 @@ Status updateStateDoc(OperationContext* opCtx, const ShardSplitDonorDocument& st
 }
 
 StatusWith<bool> deleteStateDoc(OperationContext* opCtx, const UUID& shardSplitId) {
-    const auto nss = NamespaceString::kTenantSplitDonorsNamespace;
+    const auto nss = NamespaceString::kShardSplitDonorsNamespace;
     AutoGetCollection collection(opCtx, nss, MODE_IX);
 
     if (!collection) {
diff --git a/src/mongo/db/serverless/shard_split_utils.h b/src/mongo/db/serverless/shard_split_utils.h
index b58f24b5a1a..2d9ab8402e7 100644
--- a/src/mongo/db/serverless/shard_split_utils.h
+++ b/src/mongo/db/serverless/shard_split_utils.h
@@ -64,7 +64,7 @@ repl::ReplSetConfig makeSplitConfig(const repl::ReplSetConfig& config,
 
 /**
  * Inserts the shard split state document 'stateDoc' into
- * 'config.tenantSplitDonors' collection. Also, creates the collection if not present
+ * 'config.shardSplitDonors' collection. Also, creates the collection if not present
  * before inserting the document.
  *
  * NOTE: A state doc might get inserted based on a decision made out of a stale read within a
diff --git a/src/mongo/db/service_context_d_test_fixture.cpp b/src/mongo/db/service_context_d_test_fixture.cpp
index a6d84a4cd40..9eda18d4812 100644
--- a/src/mongo/db/service_context_d_test_fixture.cpp
+++ b/src/mongo/db/service_context_d_test_fixture.cpp
@@ -124,6 +124,8 @@ ServiceContextMongoDTest::ServiceContextMongoDTest(Options options)
 
     storageGlobalParams.dbpath = _tempDir.path();
 
+    storageGlobalParams.ephemeral = options._ephemeral;
+
     // Since unit tests start in their own directories, by default skip lock file and metadata file
     // for faster startup.
     auto opCtx = serviceContext->makeOperationContext(getClient());
diff --git a/src/mongo/db/service_context_d_test_fixture.h b/src/mongo/db/service_context_d_test_fixture.h
index 8b21eaf0b2e..3973a344ffd 100644
--- a/src/mongo/db/service_context_d_test_fixture.h
+++ b/src/mongo/db/service_context_d_test_fixture.h
@@ -82,8 +82,16 @@ protected:
             return std::move(*this);
         }
 
+        Options ephemeral(bool ephemeral) {
+            _ephemeral = ephemeral;
+            return std::move(*this);
+        }
+
     private:
         std::string _engine = "wiredTiger";
+        // We use ephemeral instances by default to advise Storage Engines (in particular
+        // WiredTiger) not to perform Disk I/O.
+        bool _ephemeral = true;
         RepairAction _repair = RepairAction::kNoRepair;
         StorageEngineInitFlags _initFlags = kDefaultStorageEngineInitFlags;
         bool _useReplSettings = false;
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 7c0c30e58e7..bc167fba693 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -41,7 +41,7 @@
 #include "mongo/db/auth/authorization_session.h"
 #include "mongo/db/auth/impersonation_session.h"
 #include "mongo/db/auth/ldap_cumulative_operation_stats.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/security_token_authentication_guard.h"
 #include "mongo/db/client.h"
 #include "mongo/db/command_can_run_here.h"
 #include "mongo/db/commands.h"
@@ -654,6 +654,7 @@ private:
         CommandHelpers::uassertShouldAttemptParse(opCtx, command, request);
         _startOperationTime = getClientOperationTime(opCtx);
 
+        rpc::readRequestMetadata(opCtx, request, command->requiresAuth());
         _invocation = command->parse(opCtx, request);
         CommandInvocation::set(opCtx, _invocation);
 
@@ -1244,7 +1245,7 @@ Future<void> RunCommandImpl::_runImpl() {
 
 Future<void> RunCommandImpl::_runCommand() {
     auto shouldCheckoutSession = _ecd->getSessionOptions().getTxnNumber() &&
-        !shouldCommandSkipSessionCheckout(_ecd->getInvocation()->definition()->getName());
+        _ecd->getInvocation()->definition()->shouldCheckoutSession();
     if (shouldCheckoutSession) {
         return future_util::makeState<CheckoutSessionAndInvokeCommand>(_ecd).thenWithState(
             [](auto* path) { return path->run(); });
@@ -1276,8 +1277,7 @@ void RunCommandAndWaitForWriteConcern::_waitForWriteConcern(BSONObjBuilder& bb)
     }
 
     CurOp::get(opCtx)->debug().writeConcern.emplace(opCtx->getWriteConcern());
-    _execContext->behaviors->waitForWriteConcern(
-        opCtx, invocation, repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(), bb);
+    _execContext->behaviors->waitForWriteConcern(opCtx, invocation, _ecd->getLastOpBeforeRun(), bb);
 }
 
 Future<void> RunCommandAndWaitForWriteConcern::_runImpl() {
@@ -1309,7 +1309,7 @@ void RunCommandAndWaitForWriteConcern::_setup() {
         // server defaults.  So, warn if the operation has not specified writeConcern and is on
         // a shard/config server.
         if (!opCtx->getClient()->isInDirectClient() &&
-            (!opCtx->inMultiDocumentTransaction() || isTransactionCommand(command->getName()))) {
+            (!opCtx->inMultiDocumentTransaction() || command->isTransactionCommand())) {
             if (_isInternalClient()) {
                 // WriteConcern should always be explicitly specified by operations received
                 // from internal clients (ie. from a mongos or mongod), even if it is empty
@@ -1406,6 +1406,14 @@ void ExecCommandDatabase::_initiateCommand() {
 
     Client* client = opCtx->getClient();
 
+    if (auto scope = request.validatedTenancyScope; scope && scope->hasAuthenticatedUser()) {
+        uassert(ErrorCodes::Unauthorized,
+                str::stream() << "Command " << command->getName()
+                              << " is not supported in multitenancy mode",
+                command->allowedWithSecurityToken());
+        _tokenAuthorizationSessionGuard.emplace(opCtx, request.validatedTenancyScope.get());
+    }
+
     if (isHello()) {
         // Preload generic ClientMetadata ahead of our first hello request. After the first
         // request, metaElement should always be empty.
@@ -1429,13 +1437,6 @@ void ExecCommandDatabase::_initiateCommand() {
         }
     });
 
-    rpc::readRequestMetadata(opCtx, request, command->requiresAuth());
-    uassert(ErrorCodes::Unauthorized,
-            str::stream() << "Command " << command->getName()
-                          << " is not supported in multitenancy mode",
-            command->allowedWithSecurityToken() || auth::getSecurityToken(opCtx) == boost::none);
-    _tokenAuthorizationSessionGuard.emplace(opCtx);
-
     rpc::TrackingMetadata::get(opCtx).initWithOperName(command->getName());
 
     auto const replCoord = repl::ReplicationCoordinator::get(opCtx);
@@ -1449,7 +1450,6 @@ void ExecCommandDatabase::_initiateCommand() {
 
     // Start authz contract tracking before we evaluate failpoints
     auto authzSession = AuthorizationSession::get(client);
-
     authzSession->startContractTracking();
 
     CommandHelpers::evaluateFailCommandFailPoint(opCtx, _invocation.get());
@@ -1683,7 +1683,7 @@ void ExecCommandDatabase::_initiateCommand() {
 
         boost::optional<ChunkVersion> shardVersion;
         if (auto shardVersionElem = request.body[ChunkVersion::kShardVersionField]) {
-            shardVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(shardVersionElem);
+            shardVersion = ChunkVersion::parse(shardVersionElem);
         }
 
         boost::optional<DatabaseVersion> databaseVersion;
@@ -1950,10 +1950,11 @@ void curOpCommandSetup(OperationContext* opCtx, const OpMsgRequest& request) {
 
 Future<void> parseCommand(std::shared_ptr<HandleRequest::ExecutionContext> execContext) try {
     const auto& msg = execContext->getMessage();
-    auto opMsgReq = rpc::opMsgRequestFromAnyProtocol(msg);
+    auto client = execContext->getOpCtx()->getClient();
+    auto opMsgReq = rpc::opMsgRequestFromAnyProtocol(msg, client);
+
     if (msg.operation() == dbQuery) {
-        checkAllowedOpQueryCommand(*(execContext->getOpCtx()->getClient()),
-                                   opMsgReq.getCommandName());
+        checkAllowedOpQueryCommand(*client, opMsgReq.getCommandName());
     }
     execContext->setRequest(opMsgReq);
     return Status::OK();
diff --git a/src/mongo/db/session_catalog_mongod.cpp b/src/mongo/db/session_catalog_mongod.cpp
index fe77c218e91..ba5d503ebd8 100644
--- a/src/mongo/db/session_catalog_mongod.cpp
+++ b/src/mongo/db/session_catalog_mongod.cpp
@@ -38,6 +38,7 @@
 #include "mongo/db/create_indexes_gen.h"
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/index_builds_coordinator.h"
+#include "mongo/db/internal_transactions_feature_flag_gen.h"
 #include "mongo/db/namespace_string.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/ops/write_ops.h"
@@ -374,25 +375,59 @@ int removeExpiredTransactionSessionsFromDisk(
 }
 
 void createTransactionTable(OperationContext* opCtx) {
-    auto serviceCtx = opCtx->getServiceContext();
     CollectionOptions options;
-    auto createCollectionStatus =
-        repl::StorageInterface::get(serviceCtx)
-            ->createCollection(opCtx, NamespaceString::kSessionTransactionsTableNamespace, options);
+    auto storageInterface = repl::StorageInterface::get(opCtx);
+    auto createCollectionStatus = storageInterface->createCollection(
+        opCtx, NamespaceString::kSessionTransactionsTableNamespace, options);
+
     if (createCollectionStatus == ErrorCodes::NamespaceExists) {
-        return;
-    }
+        bool collectionIsEmpty = false;
+        {
+            AutoGetCollection autoColl(
+                opCtx, NamespaceString::kSessionTransactionsTableNamespace, LockMode::MODE_IS);
+            invariant(autoColl);
+
+            if (autoColl->getIndexCatalog()->findIndexByName(
+                    opCtx, MongoDSessionCatalog::kConfigTxnsPartialIndexName)) {
+                // Index already exists, so there's nothing to do.
+                return;
+            }
 
-    uassertStatusOKWithContext(
-        createCollectionStatus,
-        str::stream() << "Failed to create the "
-                      << NamespaceString::kSessionTransactionsTableNamespace.ns() << " collection");
+            collectionIsEmpty = autoColl->isEmpty(opCtx);
+        }
+
+        if (!collectionIsEmpty) {
+            // Unless explicitly enabled, don't create the index to avoid delaying step up.
+            if (feature_flags::gFeatureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp
+                    .isEnabledAndIgnoreFCV()) {
+                AutoGetCollection autoColl(
+                    opCtx, NamespaceString::kSessionTransactionsTableNamespace, LockMode::MODE_X);
+                IndexBuildsCoordinator::get(opCtx)->createIndex(
+                    opCtx,
+                    autoColl->uuid(),
+                    MongoDSessionCatalog::getConfigTxnPartialIndexSpec(),
+                    IndexBuildsManager::IndexConstraints::kEnforce,
+                    false /* fromMigration */);
+            }
+
+            return;
+        }
+
+        // The index does not exist and the collection is empty, so fall through to create it on the
+        // empty collection. This can happen after a failover because the collection and index
+        // creation are recorded as separate oplog entries.
+    } else {
+        uassertStatusOKWithContext(createCollectionStatus,
+                                   str::stream()
+                                       << "Failed to create the "
+                                       << NamespaceString::kSessionTransactionsTableNamespace.ns()
+                                       << " collection");
+    }
 
     auto indexSpec = MongoDSessionCatalog::getConfigTxnPartialIndexSpec();
 
-    const auto createIndexStatus =
-        repl::StorageInterface::get(opCtx)->createIndexesOnEmptyCollection(
-            opCtx, NamespaceString::kSessionTransactionsTableNamespace, {indexSpec});
+    const auto createIndexStatus = storageInterface->createIndexesOnEmptyCollection(
+        opCtx, NamespaceString::kSessionTransactionsTableNamespace, {indexSpec});
     uassertStatusOKWithContext(
         createIndexStatus,
         str::stream() << "Failed to create partial index for the "
diff --git a/src/mongo/db/sessions_collection.cpp b/src/mongo/db/sessions_collection.cpp
index 45c59c3631d..b72c85cbadc 100644
--- a/src/mongo/db/sessions_collection.cpp
+++ b/src/mongo/db/sessions_collection.cpp
@@ -236,7 +236,7 @@ LogicalSessionIdSet SessionsCollection::_doFindRemoved(
     auto wrappedSend = [&](BSONObj batch) {
         BSONObjBuilder batchWithReadConcernLocal(batch);
         batchWithReadConcernLocal.append(repl::ReadConcernArgs::kReadConcernFieldName,
-                                         repl::ReadConcernArgs::kImplicitDefault);
+                                         repl::ReadConcernArgs::kLocal);
         auto swBatchResult = send(batchWithReadConcernLocal.obj());
 
         auto result =
diff --git a/src/mongo/db/sessions_collection_rs.cpp b/src/mongo/db/sessions_collection_rs.cpp
index 8d53352c455..1bce83b547f 100644
--- a/src/mongo/db/sessions_collection_rs.cpp
+++ b/src/mongo/db/sessions_collection_rs.cpp
@@ -38,7 +38,6 @@
 #include "mongo/bson/bsonobj.h"
 #include "mongo/client/authenticate.h"
 #include "mongo/client/connection_string.h"
-#include "mongo/client/query.h"
 #include "mongo/client/read_preference.h"
 #include "mongo/client/remote_command_targeter_factory_impl.h"
 #include "mongo/db/concurrency/d_concurrency.h"
diff --git a/src/mongo/db/sessions_collection_standalone.cpp b/src/mongo/db/sessions_collection_standalone.cpp
index 477f20b28a9..93e7aab9821 100644
--- a/src/mongo/db/sessions_collection_standalone.cpp
+++ b/src/mongo/db/sessions_collection_standalone.cpp
@@ -31,7 +31,6 @@
 
 #include "mongo/db/sessions_collection_standalone.h"
 
-#include "mongo/client/query.h"
 #include "mongo/db/dbdirectclient.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/rpc/get_status_from_command_result.h"
diff --git a/src/mongo/db/stats/counters.h b/src/mongo/db/stats/counters.h
index 3d054806f46..cbf429127f6 100644
--- a/src/mongo/db/stats/counters.h
+++ b/src/mongo/db/stats/counters.h
@@ -113,7 +113,7 @@ public:
         _checkWrap(&OpCounters::_acceptableErrorInCommand, 1);
     }
 
-    // thse are used by snmp, and other things, do not remove
+    // thse are used by metrics things, do not remove
     const AtomicWord<long long>* getInsert() const {
         return &*_insert;
     }
diff --git a/src/mongo/db/storage/kv/durable_catalog_test.cpp b/src/mongo/db/storage/kv/durable_catalog_test.cpp
index ed3162f03fd..9df5105231f 100644
--- a/src/mongo/db/storage/kv/durable_catalog_test.cpp
+++ b/src/mongo/db/storage/kv/durable_catalog_test.cpp
@@ -62,6 +62,8 @@ static const long kExpectedVersion = 1;
 
 class DurableCatalogTest : public CatalogTestFixture {
 public:
+    explicit DurableCatalogTest(Options options = {}) : CatalogTestFixture(std::move(options)) {}
+
     void setUp() override {
         CatalogTestFixture::setUp();
 
@@ -143,13 +145,17 @@ public:
             WriteUnitOfWork wuow(operationContext());
             const bool isSecondaryBackgroundIndexBuild = false;
             boost::optional<UUID> buildUUID(twoPhase, UUID::gen());
-            ASSERT_OK(collWriter.getWritableCollection()->prepareForIndexBuild(
-                operationContext(), desc.get(), buildUUID, isSecondaryBackgroundIndexBuild));
-            entry = collWriter.getWritableCollection()->getIndexCatalog()->createIndexEntry(
-                operationContext(),
-                collWriter.getWritableCollection(),
-                std::move(desc),
-                CreateIndexEntryFlags::kNone);
+            ASSERT_OK(collWriter.getWritableCollection(operationContext())
+                          ->prepareForIndexBuild(operationContext(),
+                                                 desc.get(),
+                                                 buildUUID,
+                                                 isSecondaryBackgroundIndexBuild));
+            entry = collWriter.getWritableCollection(operationContext())
+                        ->getIndexCatalog()
+                        ->createIndexEntry(operationContext(),
+                                           collWriter.getWritableCollection(operationContext()),
+                                           std::move(desc),
+                                           CreateIndexEntryFlags::kNone);
             wuow.commit();
         }
 
@@ -191,6 +197,9 @@ private:
 };
 
 class ImportCollectionTest : public DurableCatalogTest {
+public:
+    explicit ImportCollectionTest() : DurableCatalogTest(Options{}.ephemeral(false)) {}
+
 protected:
     void setUp() override {
         DurableCatalogTest::setUp();
@@ -541,8 +550,9 @@ TEST_F(DurableCatalogTest, SinglePhaseIndexBuild) {
         Lock::CollectionLock collLk(operationContext(), collection->ns(), MODE_X);
 
         WriteUnitOfWork wuow(operationContext());
-        getCollectionWriter().getWritableCollection()->indexBuildSuccess(operationContext(),
-                                                                         indexEntry);
+        getCollectionWriter()
+            .getWritableCollection(operationContext())
+            ->indexBuildSuccess(operationContext(), indexEntry);
         wuow.commit();
     }
 
@@ -564,8 +574,9 @@ TEST_F(DurableCatalogTest, TwoPhaseIndexBuild) {
         Lock::CollectionLock collLk(operationContext(), collection->ns(), MODE_X);
 
         WriteUnitOfWork wuow(operationContext());
-        getCollectionWriter().getWritableCollection()->indexBuildSuccess(operationContext(),
-                                                                         indexEntry);
+        getCollectionWriter()
+            .getWritableCollection(operationContext())
+            ->indexBuildSuccess(operationContext(), indexEntry);
         wuow.commit();
     }
 
diff --git a/src/mongo/db/storage/kv/storage_engine_test.cpp b/src/mongo/db/storage/kv/storage_engine_test.cpp
index 2b600a7479d..ae0569a10ec 100644
--- a/src/mongo/db/storage/kv/storage_engine_test.cpp
+++ b/src/mongo/db/storage/kv/storage_engine_test.cpp
@@ -685,7 +685,7 @@ TEST_F(TimestampKVEngineTest, TimestampAdvancesOnNotification) {
     _storageEngine->getTimestampMonitor()->clearListeners();
 }
 
-TEST_F(StorageEngineTest, UseAlternateStorageLocation) {
+TEST_F(StorageEngineTestNotEphemeral, UseAlternateStorageLocation) {
     auto opCtx = cc().makeOperationContext();
 
     const NamespaceString coll1Ns("db.coll1");
diff --git a/src/mongo/db/storage/record_store_test_oplog.cpp b/src/mongo/db/storage/record_store_test_oplog.cpp
index 6c61f93ee76..cc014de1681 100644
--- a/src/mongo/db/storage/record_store_test_oplog.cpp
+++ b/src/mongo/db/storage/record_store_test_oplog.cpp
@@ -530,7 +530,7 @@ TEST(RecordStoreTestHarness, OplogVisibilityStandalone) {
                 rs->insertRecord(opCtx.get(), obj.objdata(), obj.objsize(), Timestamp());
             ASSERT_OK(res.getStatus());
             id1 = res.getValue();
-            StatusWith<RecordId> expectedId = record_id_helpers::keyForOptime(ts);
+            StatusWith<RecordId> expectedId = record_id_helpers::keyForOptime(ts, KeyFormat::Long);
             ASSERT_OK(expectedId.getStatus());
             // RecordId should be extracted from 'ts' field when inserting into oplog namespace
             ASSERT(expectedId.getValue().compare(id1) == 0);
diff --git a/src/mongo/db/storage/storage_engine_test_fixture.h b/src/mongo/db/storage/storage_engine_test_fixture.h
index eaedd287615..4e8d0e27f54 100644
--- a/src/mongo/db/storage/storage_engine_test_fixture.h
+++ b/src/mongo/db/storage/storage_engine_test_fixture.h
@@ -202,7 +202,8 @@ public:
 
 class StorageEngineRepairTest : public StorageEngineTest {
 public:
-    StorageEngineRepairTest() : StorageEngineTest(Options{}.repair(RepairAction::kRepair)) {}
+    StorageEngineRepairTest()
+        : StorageEngineTest(Options{}.repair(RepairAction::kRepair).ephemeral(false)) {}
 
     void tearDown() {
         auto repairObserver = StorageRepairObserver::get(getGlobalServiceContext());
@@ -220,6 +221,11 @@ public:
     }
 };
 
+class StorageEngineTestNotEphemeral : public StorageEngineTest {
+public:
+    StorageEngineTestNotEphemeral() : StorageEngineTest(Options{}.ephemeral(false)){};
+};
+
 }  // namespace mongo
 
 #undef MONGO_LOGV2_DEFAULT_COMPONENT
diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h
index 0455f09d245..108ae66629c 100644
--- a/src/mongo/db/storage/storage_options.h
+++ b/src/mongo/db/storage/storage_options.h
@@ -82,6 +82,9 @@ struct StorageGlobalParams {
 
     bool dur;  // --dur durability (now --journal)
 
+    // Whether the Storage Engine selected should be ephemeral in nature or not.
+    bool ephemeral = false;
+
     // --journalCommitInterval
     static constexpr int kMaxJournalCommitIntervalMs = 500;
     AtomicWord<int> journalCommitIntervalMs;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
index e5ceaea5c82..6277a27f62e 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
@@ -118,7 +118,6 @@ public:
                               "RAM. See http://dochub.mongodb.org/core/faq-memory-diagnostics-wt");
             }
         }
-        const bool ephemeral = false;
         auto kv =
             std::make_unique<WiredTigerKVEngine>(getCanonicalName().toString(),
                                                  params.dbpath,
@@ -127,7 +126,7 @@ public:
                                                  cacheMB,
                                                  wiredTigerGlobalOptions.getMaxHistoryFileSizeMB(),
                                                  params.dur,
-                                                 ephemeral,
+                                                 params.ephemeral,
                                                  params.repair);
         kv->setRecordStoreExtraOptions(wiredTigerGlobalOptions.collectionConfig);
         kv->setSortedDataInterfaceExtraOptions(wiredTigerGlobalOptions.indexConfig);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 6fa19a03260..7cfbe29f304 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -468,6 +468,12 @@ WiredTigerKVEngine::WiredTigerKVEngine(const std::string& canonicalName,
         ss << WiredTigerUtil::generateRestoreConfig() << ",";
     }
 
+    // If we've requested an ephemeral instance we store everything into memory instead of backing
+    // it onto disk. Logging is not supported in this instance, thus we also have to disable it.
+    if (_ephemeral) {
+        ss << "in_memory=true,log=(enabled=false),";
+    }
+
     string config = ss.str();
     LOGV2(22315, "Opening WiredTiger", "config"_attr = config);
     auto startTime = Date_t::now();
@@ -1994,6 +2000,11 @@ bool WiredTigerKVEngine::supportsDirectoryPerDB() const {
 }
 
 void WiredTigerKVEngine::_checkpoint(WT_SESSION* session) {
+    // Ephemeral WiredTiger instances cannot do a checkpoint to disk as there is no disk backing
+    // the data.
+    if (_ephemeral) {
+        return;
+    }
     // TODO: SERVER-64507: Investigate whether we can smartly rely on one checkpointer if two or
     // more threads checkpoint at the same time.
     stdx::lock_guard lk(_checkpointMutex);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
index fdf49f19ba2..a84b6ca6061 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
@@ -239,8 +239,6 @@ void WiredTigerOplogManager::_updateOplogVisibilityLoop(WiredTigerSessionCache*
         invariant(_triggerOplogVisibilityUpdate);
         _triggerOplogVisibilityUpdate = false;
 
-        lk.unlock();
-
         // Fetch the all_durable timestamp from the storage engine, which is guaranteed not to have
         // any holes behind it in-memory.
         const uint64_t newTimestamp = sessionCache->getKVEngine()->getAllDurableTimestamp().asULL();
@@ -256,7 +254,6 @@ void WiredTigerOplogManager::_updateOplogVisibilityLoop(WiredTigerSessionCache*
             continue;
         }
 
-        lk.lock();
         // Publish the new timestamp value. Avoid going backward.
         auto currentVisibleTimestamp = getOplogReadTimestamp();
         if (newTimestamp > currentVisibleTimestamp) {
diff --git a/src/mongo/db/tenant_id.h b/src/mongo/db/tenant_id.h
index 7af08a45d95..dc9b46705c3 100644
--- a/src/mongo/db/tenant_id.h
+++ b/src/mongo/db/tenant_id.h
@@ -55,12 +55,12 @@ public:
      */
     static const TenantId kSystemTenantId;
 
-    explicit TenantId(const OID& oid) : _oid(oid), _idStr(oid.toString()) {}
+    explicit TenantId(const OID& oid) : _oid(oid) {}
 
     TenantId() = delete;
 
-    const std::string& toString() const {
-        return _idStr;
+    std::string toString() const {
+        return _oid.toString();
     }
 
     /**
@@ -105,7 +105,6 @@ public:
 
 private:
     OID _oid;
-    std::string _idStr;
 };
 
 inline bool operator==(const TenantId& lhs, const TenantId& rhs) {
diff --git a/src/mongo/db/timeseries/SConscript b/src/mongo/db/timeseries/SConscript
index 0be6ef8c7a3..42b4803dc5e 100644
--- a/src/mongo/db/timeseries/SConscript
+++ b/src/mongo/db/timeseries/SConscript
@@ -27,12 +27,15 @@ env.Library(
         'flat_bson.cpp',
     ],
     LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/bson/util/bson_column',
         '$BUILD_DIR/mongo/db/catalog/database_holder',
         '$BUILD_DIR/mongo/db/commands/server_status',
         '$BUILD_DIR/mongo/db/namespace_string',
         '$BUILD_DIR/mongo/db/server_options_core',
+        '$BUILD_DIR/mongo/db/storage/storage_options',
         '$BUILD_DIR/mongo/db/views/views',
         '$BUILD_DIR/mongo/util/fail_point',
+        'bucket_compression',
         'timeseries_options',
     ],
 )
diff --git a/src/mongo/db/timeseries/bucket_catalog.cpp b/src/mongo/db/timeseries/bucket_catalog.cpp
index 71ffb71dbec..41148c11dd0 100644
--- a/src/mongo/db/timeseries/bucket_catalog.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog.cpp
@@ -34,11 +34,17 @@
 #include <algorithm>
 #include <boost/iterator/transform_iterator.hpp>
 
+#include "mongo/bson/util/bsoncolumn.h"
 #include "mongo/db/catalog/database_holder.h"
 #include "mongo/db/commands/server_status.h"
 #include "mongo/db/concurrency/exception_util.h"
 #include "mongo/db/operation_context.h"
+#include "mongo/db/storage/storage_parameters_gen.h"
+#include "mongo/db/timeseries/bucket_catalog_helpers.h"
+#include "mongo/db/timeseries/bucket_compression.h"
+#include "mongo/db/timeseries/timeseries_constants.h"
 #include "mongo/db/timeseries/timeseries_options.h"
+#include "mongo/logv2/redaction.h"
 #include "mongo/platform/compiler.h"
 #include "mongo/stdx/thread.h"
 #include "mongo/util/fail_point.h"
@@ -255,6 +261,24 @@ void BucketCatalog::ExecutionStatsController::incNumBucketsClosedDueToMemoryThre
     _globalStats->numBucketsClosedDueToMemoryThreshold.fetchAndAddRelaxed(increment);
 }
 
+void BucketCatalog::ExecutionStatsController::incNumBucketsArchivedDueToTimeForward(
+    long long increment) {
+    _collectionStats->numBucketsArchivedDueToTimeForward.fetchAndAddRelaxed(increment);
+    _globalStats->numBucketsArchivedDueToTimeForward.fetchAndAddRelaxed(increment);
+}
+
+void BucketCatalog::ExecutionStatsController::incNumBucketsArchivedDueToTimeBackward(
+    long long increment) {
+    _collectionStats->numBucketsArchivedDueToTimeBackward.fetchAndAddRelaxed(increment);
+    _globalStats->numBucketsArchivedDueToTimeBackward.fetchAndAddRelaxed(increment);
+}
+
+void BucketCatalog::ExecutionStatsController::incNumBucketsArchivedDueToMemoryThreshold(
+    long long increment) {
+    _collectionStats->numBucketsArchivedDueToMemoryThreshold.fetchAndAddRelaxed(increment);
+    _globalStats->numBucketsArchivedDueToMemoryThreshold.fetchAndAddRelaxed(increment);
+}
+
 void BucketCatalog::ExecutionStatsController::incNumCommits(long long increment) {
     _collectionStats->numCommits.fetchAndAddRelaxed(increment);
     _globalStats->numCommits.fetchAndAddRelaxed(increment);
@@ -270,11 +294,23 @@ void BucketCatalog::ExecutionStatsController::incNumMeasurementsCommitted(long l
     _globalStats->numMeasurementsCommitted.fetchAndAddRelaxed(increment);
 }
 
+void BucketCatalog::ExecutionStatsController::incNumBucketsReopened(long long increment) {
+    _collectionStats->numBucketsReopened.fetchAndAddRelaxed(increment);
+    _globalStats->numBucketsReopened.fetchAndAddRelaxed(increment);
+}
+
+void BucketCatalog::ExecutionStatsController::incNumBucketsKeptOpenDueToLargeMeasurements(
+    long long increment) {
+    _collectionStats->numBucketsKeptOpenDueToLargeMeasurements.fetchAndAddRelaxed(increment);
+    _globalStats->numBucketsKeptOpenDueToLargeMeasurements.fetchAndAddRelaxed(increment);
+}
+
 class BucketCatalog::Bucket {
 public:
     friend class BucketCatalog;
 
-    Bucket(const OID& id, StripeNumber stripe) : _id(id), _stripe(stripe) {}
+    Bucket(const OID& id, StripeNumber stripe, BucketKey::Hash hash)
+        : _id(id), _stripe(stripe), _keyHash(hash) {}
 
     /**
      * Returns the ID for the underlying bucket.
@@ -290,6 +326,13 @@ public:
         return _stripe;
     }
 
+    /**
+     * Returns the pre-computed hash of the corresponding BucketKey
+     */
+    BucketKey::Hash keyHash() const {
+        return _keyHash;
+    }
+
     // Returns the time associated with the bucket (id)
     Date_t getTime() const {
         return _minTime;
@@ -338,7 +381,6 @@ private:
     void _calculateBucketFieldsAndSizeChange(const BSONObj& doc,
                                              boost::optional<StringData> metaField,
                                              NewFieldNames* newFieldNamesToBeInserted,
-                                             uint32_t* newFieldNamesSize,
                                              uint32_t* sizeToBeAdded) const {
         // BSON size for an object with an empty object field where field name is empty string.
         // We can use this as an offset to know the size when we have real field names.
@@ -347,7 +389,6 @@ private:
         dassert(emptyObjSize == BSON("" << BSONObj()).objsize());
 
         newFieldNamesToBeInserted->clear();
-        *newFieldNamesSize = 0;
         *sizeToBeAdded = 0;
         auto numMeasurementsFieldLength = numDigits(_numMeasurements);
         for (const auto& elem : doc) {
@@ -357,12 +398,24 @@ private:
                 continue;
             }
 
-            // If the field name is new, add the size of an empty object with that field name.
             auto hashedKey = StringSet::hasher().hashed_key(fieldName);
             if (!_fieldNames.contains(hashedKey)) {
+                // Record the new field name only if it hasn't been committed yet. There could be
+                // concurrent batches writing to this bucket with the same new field name, but
+                // they're not guaranteed to commit successfully.
                 newFieldNamesToBeInserted->push_back(hashedKey);
-                *newFieldNamesSize += elem.fieldNameSize();
-                *sizeToBeAdded += emptyObjSize + fieldName.size();
+
+                // Only update the bucket size once to account for the new field name if it isn't
+                // already pending a commit from another batch.
+                if (!_uncommittedFieldNames.contains(hashedKey)) {
+                    // Add the size of an empty object with that field name.
+                    *sizeToBeAdded += emptyObjSize + fieldName.size();
+
+                    // The control.min and control.max summaries don't have any information for this
+                    // new field name yet. Add two measurements worth of data to account for this.
+                    // As this is the first measurement for this field, min == max.
+                    *sizeToBeAdded += elem.size() * 2;
+                }
             }
 
             // Add the element size, taking into account that the name will be changed to its
@@ -400,20 +453,21 @@ private:
     // The stripe which owns this bucket.
     const StripeNumber _stripe;
 
+    // The pre-computed hash of the associated BucketKey
+    const BucketKey::Hash _keyHash;
+
     // The namespace that this bucket is used for.
     NamespaceString _ns;
 
     // The metadata of the data that this bucket contains.
     BucketMetadata _metadata;
 
-    // Extra metadata combinations that are supported without normalizing the metadata object.
-    static constexpr std::size_t kNumFieldOrderCombinationsWithoutNormalizing = 1;
-    boost::container::static_vector<BSONObj, kNumFieldOrderCombinationsWithoutNormalizing>
-        _nonNormalizedKeyMetadatas;
-
-    // Top-level field names of the measurements that have been inserted into the bucket.
+    // Top-level hashed field names of the measurements that have been inserted into the bucket.
     StringSet _fieldNames;
 
+    // Top-level hashed new field names that have not yet been committed into the bucket.
+    StringSet _uncommittedFieldNames;
+
     // Time field for the measurements that have been inserted into the bucket.
     std::string _timeField;
 
@@ -427,9 +481,6 @@ private:
     // measurements.
     timeseries::Schema _schema;
 
-    // The latest time that has been inserted into the bucket.
-    Date_t _latestTime;
-
     // The total size in bytes of the bucket's BSON serialization, including measurements to be
     // inserted.
     uint64_t _size = 0;
@@ -441,9 +492,14 @@ private:
     // The number of committed measurements in the bucket.
     uint32_t _numCommittedMeasurements = 0;
 
-    // Whether the bucket is full. This can be due to number of measurements, size, or time
+    // Whether the bucket has been marked for a rollover action. It can be marked for closure due to
+    // number of measurements, size, or schema changes, or it can be marked for archival due to time
     // range.
-    bool _full = false;
+    RolloverAction _rolloverAction = RolloverAction::kNone;
+
+    // Whether this bucket was kept open after exceeding the bucket max size to improve bucketing
+    // performance for large measurements.
+    bool _keptOpenDueToLargeMeasurements = false;
 
     // The batch that has been prepared and is currently in the process of being committed, if
     // any.
@@ -533,9 +589,10 @@ void BucketCatalog::WriteBatch::_addMeasurement(const BSONObj& doc) {
     _measurements.push_back(doc);
 }
 
-void BucketCatalog::WriteBatch::_recordNewFields(NewFieldNames&& fields) {
+void BucketCatalog::WriteBatch::_recordNewFields(Bucket* bucket, NewFieldNames&& fields) {
     for (auto&& field : fields) {
         _newFieldNamesToBeInserted[field] = field.hash();
+        bucket->_uncommittedFieldNames.emplace(field);
     }
 }
 
@@ -547,6 +604,7 @@ void BucketCatalog::WriteBatch::_prepareCommit(Bucket* bucket) {
     // by someone else.
     for (auto it = _newFieldNamesToBeInserted.begin(); it != _newFieldNamesToBeInserted.end();) {
         StringMapHashedKey fieldName(it->first, it->second);
+        bucket->_uncommittedFieldNames.erase(fieldName);
         if (bucket->_fieldNames.contains(fieldName)) {
             _newFieldNamesToBeInserted.erase(it++);
             continue;
@@ -597,6 +655,104 @@ BucketCatalog& BucketCatalog::get(OperationContext* opCtx) {
     return get(opCtx->getServiceContext());
 }
 
+Status BucketCatalog::reopenBucket(OperationContext* opCtx,
+                                   const CollectionPtr& coll,
+                                   const BSONObj& bucketDoc) {
+    const NamespaceString ns = coll->ns().getTimeseriesViewNamespace();
+    const boost::optional<TimeseriesOptions> options = coll->getTimeseriesOptions();
+    invariant(options,
+              str::stream() << "Attempting to reopen a bucket for a non-timeseries collection: "
+                            << ns);
+
+    BSONElement bucketIdElem = bucketDoc.getField(timeseries::kBucketIdFieldName);
+    if (bucketIdElem.eoo() || bucketIdElem.type() != BSONType::jstOID) {
+        return {ErrorCodes::BadValue,
+                str::stream() << timeseries::kBucketIdFieldName
+                              << " is missing or not an ObjectId"};
+    }
+
+    // Validate the bucket document against the schema.
+    auto result = coll->checkValidation(opCtx, bucketDoc);
+    if (result.first != Collection::SchemaValidationResult::kPass) {
+        return result.second;
+    }
+
+    BSONElement metadata;
+    auto metaFieldName = options->getMetaField();
+    if (metaFieldName) {
+        metadata = bucketDoc.getField(*metaFieldName);
+    }
+
+    // Buckets are spread across independently-lockable stripes to improve parallelism. We map a
+    // bucket to a stripe by hashing the BucketKey.
+    auto key = BucketKey{ns, BucketMetadata{metadata, coll->getDefaultCollator()}};
+    auto stripeNumber = _getStripeNumber(key);
+
+    auto bucketId = bucketIdElem.OID();
+    std::unique_ptr<Bucket> bucket = std::make_unique<Bucket>(bucketId, stripeNumber, key.hash);
+
+    // Initialize the remaining member variables from the bucket document.
+    bucket->_ns = ns;
+    bucket->_metadata = key.metadata;
+    bucket->_timeField = options->getTimeField().toString();
+    bucket->_size = bucketDoc.objsize();
+    bucket->_minTime = bucketDoc.getObjectField(timeseries::kBucketControlFieldName)
+                           .getObjectField(timeseries::kBucketControlMinFieldName)
+                           .getField(options->getTimeField())
+                           .Date();
+
+    // Populate the top-level data field names.
+    const BSONObj& dataObj = bucketDoc.getObjectField(timeseries::kBucketDataFieldName);
+    for (const BSONElement& dataElem : dataObj) {
+        auto hashedKey = StringSet::hasher().hashed_key(dataElem.fieldName());
+        bucket->_fieldNames.emplace(hashedKey);
+    }
+
+    auto swMinMax = timeseries::generateMinMaxFromBucketDoc(bucketDoc, coll->getDefaultCollator());
+    if (!swMinMax.isOK()) {
+        return swMinMax.getStatus();
+    }
+    bucket->_minmax = std::move(swMinMax.getValue());
+
+    auto swSchema = timeseries::generateSchemaFromBucketDoc(bucketDoc, coll->getDefaultCollator());
+    if (!swSchema.isOK()) {
+        return swSchema.getStatus();
+    }
+    bucket->_schema = std::move(swSchema.getValue());
+
+    uint32_t numMeasurements = 0;
+    const bool isCompressed = timeseries::isCompressedBucket(bucketDoc);
+    const BSONElement timeColumnElem = dataObj.getField(options->getTimeField());
+
+    if (isCompressed && timeColumnElem.type() == BSONType::BinData) {
+        BSONColumn storage{timeColumnElem};
+        numMeasurements = storage.size();
+    } else {
+        numMeasurements = timeColumnElem.Obj().nFields();
+    }
+
+    bucket->_numMeasurements = numMeasurements;
+    bucket->_numCommittedMeasurements = numMeasurements;
+
+    ExecutionStatsController stats = _getExecutionStats(ns);
+    stats.incNumBucketsReopened();
+
+    // Register the reopened bucket with the catalog.
+    auto& stripe = _stripes[stripeNumber];
+    stdx::lock_guard stripeLock{stripe.mutex};
+
+    ClosedBuckets closedBuckets;
+    _expireIdleBuckets(&stripe, stripeLock, stats, &closedBuckets);
+
+    auto [it, inserted] = stripe.allBuckets.try_emplace(bucketId, std::move(bucket));
+    tassert(6668200, "Expected bucket to be inserted", inserted);
+    Bucket* unownedBucket = it->second.get();
+    stripe.openBuckets[key] = unownedBucket;
+    _initializeBucketState(bucketId);
+
+    return Status::OK();
+}
+
 BSONObj BucketCatalog::getMetadata(const BucketHandle& handle) const {
     auto const& stripe = _stripes[handle.stripe];
     stdx::lock_guard stripeLock{stripe.mutex};
@@ -648,59 +804,91 @@ StatusWith<BucketCatalog::InsertResult> BucketCatalog::insert(
     invariant(bucket);
 
     NewFieldNames newFieldNamesToBeInserted;
-    uint32_t newFieldNamesSize = 0;
     uint32_t sizeToBeAdded = 0;
-    bucket->_calculateBucketFieldsAndSizeChange(doc,
-                                                options.getMetaField(),
-                                                &newFieldNamesToBeInserted,
-                                                &newFieldNamesSize,
-                                                &sizeToBeAdded);
+    bucket->_calculateBucketFieldsAndSizeChange(
+        doc, options.getMetaField(), &newFieldNamesToBeInserted, &sizeToBeAdded);
+
+    auto determineRolloverAction = [&](Bucket* bucket) -> RolloverAction {
+        const bool canArchive = feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+            serverGlobalParams.featureCompatibility);
 
-    auto shouldCloseBucket = [&](Bucket* bucket) -> bool {
         if (bucket->schemaIncompatible(doc, metaFieldName, comparator)) {
             stats.incNumBucketsClosedDueToSchemaChange();
-            return true;
+            return RolloverAction::kClose;
         }
         if (bucket->_numMeasurements == static_cast<std::uint64_t>(gTimeseriesBucketMaxCount)) {
             stats.incNumBucketsClosedDueToCount();
-            return true;
-        }
-        if (bucket->_size + sizeToBeAdded > static_cast<std::uint64_t>(gTimeseriesBucketMaxSize)) {
-            stats.incNumBucketsClosedDueToSize();
-            return true;
+            return RolloverAction::kClose;
         }
         auto bucketTime = bucket->getTime();
         if (time - bucketTime >= Seconds(*options.getBucketMaxSpanSeconds())) {
-            stats.incNumBucketsClosedDueToTimeForward();
-            return true;
+            if (canArchive) {
+                stats.incNumBucketsArchivedDueToTimeForward();
+                return RolloverAction::kArchive;
+            } else {
+                stats.incNumBucketsClosedDueToTimeForward();
+                return RolloverAction::kClose;
+            }
         }
         if (time < bucketTime) {
-            stats.incNumBucketsClosedDueToTimeBackward();
-            return true;
+            if (canArchive) {
+                stats.incNumBucketsArchivedDueToTimeBackward();
+                return RolloverAction::kArchive;
+            } else {
+                stats.incNumBucketsClosedDueToTimeBackward();
+                return RolloverAction::kClose;
+            }
         }
-        return false;
+        if (bucket->_size + sizeToBeAdded > static_cast<std::uint64_t>(gTimeseriesBucketMaxSize)) {
+            bool keepBucketOpenForLargeMeasurements =
+                bucket->_numMeasurements < static_cast<std::uint64_t>(gTimeseriesBucketMinCount) &&
+                feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+                    serverGlobalParams.featureCompatibility);
+            if (keepBucketOpenForLargeMeasurements) {
+                // Instead of packing the bucket to the BSON size limit, 16MB, we'll limit the max
+                // bucket size to 12MB. This is to leave some space in the bucket if we need to add
+                // new internal fields to existing, full buckets.
+                static constexpr size_t largeMeasurementsMaxBucketSize =
+                    BSONObjMaxUserSize - (4 * 1024 * 1024);
+
+                if (bucket->_size + sizeToBeAdded > largeMeasurementsMaxBucketSize) {
+                    stats.incNumBucketsClosedDueToSize();
+                    return RolloverAction::kClose;
+                }
+
+                // There's enough space to add this measurement and we're still below the large
+                // measurement threshold.
+                if (!bucket->_keptOpenDueToLargeMeasurements) {
+                    // Only increment this metric once per bucket.
+                    bucket->_keptOpenDueToLargeMeasurements = true;
+                    stats.incNumBucketsKeptOpenDueToLargeMeasurements();
+                }
+                return RolloverAction::kNone;
+            } else {
+                stats.incNumBucketsClosedDueToSize();
+                return RolloverAction::kClose;
+            }
+        }
+        return RolloverAction::kNone;
     };
 
-    if (!bucket->_ns.isEmpty() && shouldCloseBucket(bucket)) {
-        info.openedDuetoMetadata = false;
-        bucket = _rollover(&stripe, stripeLock, bucket, info);
+    if (!bucket->_ns.isEmpty()) {
+        auto action = determineRolloverAction(bucket);
+        if (action != RolloverAction::kNone) {
+            info.openedDuetoMetadata = false;
+            bucket = _rollover(&stripe, stripeLock, bucket, info, action);
 
-        bucket->_calculateBucketFieldsAndSizeChange(doc,
-                                                    options.getMetaField(),
-                                                    &newFieldNamesToBeInserted,
-                                                    &newFieldNamesSize,
-                                                    &sizeToBeAdded);
+            bucket->_calculateBucketFieldsAndSizeChange(
+                doc, options.getMetaField(), &newFieldNamesToBeInserted, &sizeToBeAdded);
+        }
     }
 
     auto batch = bucket->_activeBatch(getOpId(opCtx, combine), stats);
     batch->_addMeasurement(doc);
-    batch->_recordNewFields(std::move(newFieldNamesToBeInserted));
+    batch->_recordNewFields(bucket, std::move(newFieldNamesToBeInserted));
 
     bucket->_numMeasurements++;
     bucket->_size += sizeToBeAdded;
-    if (time > bucket->_latestTime) {
-        bucket->_latestTime = time;
-    }
     if (bucket->_ns.isEmpty()) {
         // The namespace and metadata only need to be set if this bucket was newly created.
         bucket->_ns = ns;
@@ -799,29 +987,21 @@ boost::optional<BucketCatalog::ClosedBucket> BucketCatalog::finish(
                    getTimeseriesBucketClearedError(bucket->id(), bucket->_ns));
         }
     } else if (bucket->allCommitted()) {
-        if (bucket->_full) {
-            // Everything in the bucket has been committed, and nothing more will be added since the
-            // bucket is full. Thus, we can remove it.
-            _memoryUsage.fetchAndSubtract(bucket->_memoryUsage);
-
-            auto it = stripe.allBuckets.find(batch->bucket().id);
-            if (it != stripe.allBuckets.end()) {
-                bucket = it->second.get();
-
-                closedBucket = ClosedBucket{batch->bucket().id,
-                                            bucket->getTimeField().toString(),
-                                            bucket->numMeasurements()};
-
-                // Only remove from allBuckets and idleBuckets. If it was marked full, we know
-                // that happened in Stripe::rollover, and that there is already a new open
-                // bucket for this metadata.
-                _markBucketNotIdle(&stripe, stripeLock, bucket);
-                _eraseBucketState(batch->bucket().id);
-
-                stripe.allBuckets.erase(batch->bucket().id);
+        switch (bucket->_rolloverAction) {
+            case RolloverAction::kClose: {
+                closedBucket = ClosedBucket{
+                    bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()};
+                _removeBucket(&stripe, stripeLock, bucket, false);
+                break;
+            }
+            case RolloverAction::kArchive: {
+                _archiveBucket(&stripe, stripeLock, bucket);
+                break;
+            }
+            case RolloverAction::kNone: {
+                _markBucketIdle(&stripe, stripeLock, bucket);
+                break;
             }
-        } else {
-            _markBucketIdle(&stripe, stripeLock, bucket);
         }
     }
     return closedBucket;
@@ -897,6 +1077,7 @@ void BucketCatalog::_appendExecutionStatsToBuilder(const ExecutionStats* stats,
                           stats->numBucketsClosedDueToTimeBackward.load());
     builder->appendNumber("numBucketsClosedDueToMemoryThreshold",
                           stats->numBucketsClosedDueToMemoryThreshold.load());
+
     auto commits = stats->numCommits.load();
     builder->appendNumber("numCommits", commits);
     builder->appendNumber("numWaits", stats->numWaits.load());
@@ -905,8 +1086,20 @@ void BucketCatalog::_appendExecutionStatsToBuilder(const ExecutionStats* stats,
     if (commits) {
         builder->appendNumber("avgNumMeasurementsPerCommit", measurementsCommitted / commits);
     }
-}
 
+    if (feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+            serverGlobalParams.featureCompatibility)) {
+        builder->appendNumber("numBucketsArchivedDueToTimeForward",
+                              stats->numBucketsArchivedDueToTimeForward.load());
+        builder->appendNumber("numBucketsArchivedDueToTimeBackward",
+                              stats->numBucketsArchivedDueToTimeBackward.load());
+        builder->appendNumber("numBucketsArchivedDueToMemoryThreshold",
+                              stats->numBucketsArchivedDueToMemoryThreshold.load());
+        builder->appendNumber("numBucketsReopened", stats->numBucketsReopened.load());
+        builder->appendNumber("numBucketsKeptOpenDueToLargeMeasurements",
+                              stats->numBucketsKeptOpenDueToLargeMeasurements.load());
+    }
+}
 
 void BucketCatalog::appendExecutionStats(const NamespaceString& ns, BSONObjBuilder* builder) const {
     const std::shared_ptr<ExecutionStats> stats = _getExecutionStats(ns);
@@ -955,6 +1148,10 @@ std::size_t BucketCatalog::BucketHasher::operator()(const BucketKey& key) const
     return key.hash;
 }
 
+std::size_t BucketCatalog::PreHashed::operator()(const BucketKey::Hash& key) const {
+    return key;
+}
+
 BucketCatalog::StripeNumber BucketCatalog::_getStripeNumber(const BucketKey& key) {
     return key.hash % kNumberOfStripes;
 }
@@ -1050,23 +1247,51 @@ void BucketCatalog::_waitToCommitBatch(Stripe* stripe, const std::shared_ptr<Wri
     }
 }
 
-bool BucketCatalog::_removeBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket) {
-    auto it = stripe->allBuckets.find(bucket->id());
-    if (it == stripe->allBuckets.end()) {
-        return false;
-    }
-
+void BucketCatalog::_removeBucket(Stripe* stripe,
+                                  WithLock stripeLock,
+                                  Bucket* bucket,
+                                  bool archiving) {
     invariant(bucket->_batches.empty());
     invariant(!bucket->_preparedBatch);
 
+    auto allIt = stripe->allBuckets.find(bucket->id());
+    invariant(allIt != stripe->allBuckets.end());
+
     _memoryUsage.fetchAndSubtract(bucket->_memoryUsage);
     _markBucketNotIdle(stripe, stripeLock, bucket);
-    stripe->openBuckets.erase({bucket->_ns, bucket->_metadata});
-    _eraseBucketState(bucket->id());
 
-    stripe->allBuckets.erase(it);
+    // If the bucket was rolled over, then there may be a different open bucket for this metadata.
+    auto openIt = stripe->openBuckets.find({bucket->_ns, bucket->_metadata});
+    if (openIt != stripe->openBuckets.end() && openIt->second == bucket) {
+        stripe->openBuckets.erase(openIt);
+    }
+
+    // If we are cleaning up while archiving a bucket, then we want to preserve its state. Otherwise
+    // we can remove the state from the catalog altogether.
+    if (!archiving) {
+        _eraseBucketState(bucket->id());
+    }
+
+    stripe->allBuckets.erase(allIt);
+}
+
+void BucketCatalog::_archiveBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket) {
+    bool archived = false;
+    auto& archivedSet = stripe->archivedBuckets[bucket->keyHash()];
+    auto it = archivedSet.find(bucket->getTime());
+    if (it == archivedSet.end()) {
+        archivedSet.emplace(bucket->getTime(),
+                            ArchivedBucket{bucket->id(),
+                                           bucket->getTimeField().toString(),
+                                           bucket->numMeasurements()});
+
+        long long memory = _marginalMemoryUsageForArchivedBucket(archivedSet[bucket->getTime()],
+                                                                 archivedSet.size() == 1);
+        _memoryUsage.fetchAndAdd(memory);
 
-    return true;
+        archived = true;
+    }
+    _removeBucket(stripe, stripeLock, bucket, archived);
 }
 
 void BucketCatalog::_abort(Stripe* stripe,
@@ -1112,7 +1337,7 @@ void BucketCatalog::_abort(Stripe* stripe,
     }
 
     if (doRemove) {
-        [[maybe_unused]] bool removed = _removeBucket(stripe, stripeLock, bucket);
+        _removeBucket(stripe, stripeLock, bucket, false);
     }
 }
 
@@ -1135,19 +1360,54 @@ void BucketCatalog::_expireIdleBuckets(Stripe* stripe,
                                        ExecutionStatsController& stats,
                                        std::vector<BucketCatalog::ClosedBucket>* closedBuckets) {
     // As long as we still need space and have entries and remaining attempts, close idle buckets.
-    int32_t numClosed = 0;
+    int32_t numExpired = 0;
+
+    const bool canArchive = feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+        serverGlobalParams.featureCompatibility);
+
     while (!stripe->idleBuckets.empty() &&
            _memoryUsage.load() > getTimeseriesIdleBucketExpiryMemoryUsageThresholdBytes() &&
-           numClosed <= gTimeseriesIdleBucketExpiryMaxCountPerAttempt) {
+           numExpired <= gTimeseriesIdleBucketExpiryMaxCountPerAttempt) {
         Bucket* bucket = stripe->idleBuckets.back();
-        ClosedBucket closed{
-            bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()};
 
-        if (_removeBucket(stripe, stripeLock, bucket)) {
+        if (canArchive) {
+            _archiveBucket(stripe, stripeLock, bucket);
+            stats.incNumBucketsArchivedDueToMemoryThreshold();
+        } else {
+            ClosedBucket closed{
+                bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()};
+            _removeBucket(stripe, stripeLock, bucket, false);
             stats.incNumBucketsClosedDueToMemoryThreshold();
             closedBuckets->push_back(closed);
-            ++numClosed;
         }
+
+        ++numExpired;
+    }
+
+    while (canArchive && !stripe->archivedBuckets.empty() &&
+           _memoryUsage.load() > getTimeseriesIdleBucketExpiryMemoryUsageThresholdBytes() &&
+           numExpired <= gTimeseriesIdleBucketExpiryMaxCountPerAttempt) {
+
+        auto& [hash, archivedSet] = *stripe->archivedBuckets.begin();
+        invariant(!archivedSet.empty());
+
+        auto& [timestamp, bucket] = *archivedSet.begin();
+        ClosedBucket closed{bucket.bucketId, bucket.timeField, bucket.numMeasurements, true};
+
+        long long memory = _marginalMemoryUsageForArchivedBucket(bucket, archivedSet.size() == 1);
+        _eraseBucketState(bucket.bucketId);
+        if (archivedSet.size() == 1) {
+            // If this is the only entry, erase the whole map so we don't leave it empty.
+            stripe->archivedBuckets.erase(stripe->archivedBuckets.begin());
+        } else {
+            // Otherwise just erase this bucket from the map.
+            archivedSet.erase(archivedSet.begin());
+        }
+        _memoryUsage.fetchAndSubtract(memory);
+
+        stats.incNumBucketsClosedDueToMemoryThreshold();
+        closedBuckets->push_back(closed);
+        ++numExpired;
     }
 }
 
@@ -1158,8 +1418,8 @@ BucketCatalog::Bucket* BucketCatalog::_allocateBucket(Stripe* stripe,
 
     auto [bucketId, roundedTime] = generateBucketId(info.time, info.options);
 
-    auto [it, inserted] =
-        stripe->allBuckets.try_emplace(bucketId, std::make_unique<Bucket>(bucketId, info.stripe));
+    auto [it, inserted] = stripe->allBuckets.try_emplace(
+        bucketId, std::make_unique<Bucket>(bucketId, info.stripe, info.key.hash));
     tassert(6130900, "Expected bucket to be inserted", inserted);
     Bucket* bucket = it->second.get();
     stripe->openBuckets[info.key] = bucket;
@@ -1183,20 +1443,25 @@ BucketCatalog::Bucket* BucketCatalog::_allocateBucket(Stripe* stripe,
 BucketCatalog::Bucket* BucketCatalog::_rollover(Stripe* stripe,
                                                 WithLock stripeLock,
                                                 Bucket* bucket,
-                                                const CreationInfo& info) {
-
+                                                const CreationInfo& info,
+                                                RolloverAction action) {
+    invariant(action != RolloverAction::kNone);
     if (bucket->allCommitted()) {
-        // The bucket does not contain any measurements that are yet to be committed, so we can
-        // remove it now.
-        info.closedBuckets->push_back(ClosedBucket{
-            bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()});
+        // The bucket does not contain any measurements that are yet to be committed, so we can take
+        // action now.
+        if (action == RolloverAction::kClose) {
+            info.closedBuckets->push_back(ClosedBucket{
+                bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()});
 
-        bool removed = _removeBucket(stripe, stripeLock, bucket);
-        invariant(removed);
+            _removeBucket(stripe, stripeLock, bucket, false);
+        } else {
+            invariant(action == RolloverAction::kArchive);
+            _archiveBucket(stripe, stripeLock, bucket);
+        }
     } else {
-        // We must keep the bucket around until it is committed, just mark it full so it we know to
-        // clean it up when the last batch finishes.
-        bucket->_full = true;
+        // We must keep the bucket around until all measurements are committed committed, just mark
+        // the action we chose now so it we know what to do when the last batch finishes.
+        bucket->_rolloverAction = action;
     }
 
     return _allocateBucket(stripe, stripeLock, info);
@@ -1283,6 +1548,12 @@ boost::optional<BucketCatalog::BucketState> BucketCatalog::_setBucketState(const
     return state;
 }
 
+long long BucketCatalog::_marginalMemoryUsageForArchivedBucket(const ArchivedBucket& bucket,
+                                                               bool onlyEntryForMatchingMetaHash) {
+    return sizeof(std::size_t) + sizeof(Date_t) + sizeof(ArchivedBucket) + bucket.timeField.size() +
+        (onlyEntryForMatchingMetaHash ? sizeof(decltype(Stripe::archivedBuckets)::value_type) : 0);
+}
+
 class BucketCatalog::ServerStatus : public ServerStatusSection {
     struct BucketCounts {
         BucketCounts& operator+=(const BucketCounts& other) {
diff --git a/src/mongo/db/timeseries/bucket_catalog.h b/src/mongo/db/timeseries/bucket_catalog.h
index 2df33182d31..c2a82039ee8 100644
--- a/src/mongo/db/timeseries/bucket_catalog.h
+++ b/src/mongo/db/timeseries/bucket_catalog.h
@@ -67,9 +67,14 @@ class BucketCatalog {
         AtomicWord<long long> numBucketsClosedDueToTimeForward;
         AtomicWord<long long> numBucketsClosedDueToTimeBackward;
         AtomicWord<long long> numBucketsClosedDueToMemoryThreshold;
+        AtomicWord<long long> numBucketsArchivedDueToTimeForward;
+        AtomicWord<long long> numBucketsArchivedDueToTimeBackward;
+        AtomicWord<long long> numBucketsArchivedDueToMemoryThreshold;
         AtomicWord<long long> numCommits;
         AtomicWord<long long> numWaits;
         AtomicWord<long long> numMeasurementsCommitted;
+        AtomicWord<long long> numBucketsReopened;
+        AtomicWord<long long> numBucketsKeptOpenDueToLargeMeasurements;
     };
 
     class ExecutionStatsController {
@@ -87,9 +92,14 @@ class BucketCatalog {
         void incNumBucketsClosedDueToTimeForward(long long increment = 1);
         void incNumBucketsClosedDueToTimeBackward(long long increment = 1);
         void incNumBucketsClosedDueToMemoryThreshold(long long increment = 1);
+        void incNumBucketsArchivedDueToTimeForward(long long increment = 1);
+        void incNumBucketsArchivedDueToTimeBackward(long long increment = 1);
+        void incNumBucketsArchivedDueToMemoryThreshold(long long increment = 1);
         void incNumCommits(long long increment = 1);
         void incNumWaits(long long increment = 1);
         void incNumMeasurementsCommitted(long long increment = 1);
+        void incNumBucketsReopened(long long increment = 1);
+        void incNumBucketsKeptOpenDueToLargeMeasurements(long long increment = 1);
 
     private:
         std::shared_ptr<ExecutionStats> _collectionStats;
@@ -117,6 +127,7 @@ public:
         OID bucketId;
         std::string timeField;
         uint32_t numMeasurements;
+        bool eligibleForReopening = false;
     };
     using ClosedBuckets = std::vector<ClosedBucket>;
 
@@ -179,7 +190,7 @@ public:
         /**
          * Records a set of new-to-the-bucket fields. Active batches only.
          */
-        void _recordNewFields(NewFieldNames&& fields);
+        void _recordNewFields(Bucket* bucket, NewFieldNames&& fields);
 
         /**
          * Prepares the batch for commit. Sets min/max appropriately, records the number of
@@ -230,6 +241,13 @@ public:
     BucketCatalog operator=(const BucketCatalog&) = delete;
 
     /**
+     * Reopens a closed bucket into the catalog given the bucket document.
+     */
+    Status reopenBucket(OperationContext* opCtx,
+                        const CollectionPtr& coll,
+                        const BSONObj& bucketDoc);
+
+    /**
      * Returns the metadata for the given bucket in the following format:
      *     {<metadata field name>: <value>}
      * All measurements in the given bucket share same metadata value.
@@ -354,12 +372,14 @@ private:
      * Key to lookup open Bucket for namespace and metadata, with pre-computed hash.
      */
     struct BucketKey {
+        using Hash = std::size_t;
+
         BucketKey() = delete;
         BucketKey(const NamespaceString& nss, const BucketMetadata& meta);
 
         NamespaceString ns;
         BucketMetadata metadata;
-        std::size_t hash;
+        Hash hash;
 
         bool operator==(const BucketKey& other) const {
             return ns == other.ns && metadata == other.metadata;
@@ -379,6 +399,23 @@ private:
     };
 
     /**
+     * Hasher to support using a pre-computed hash as a key without having to compute another hash.
+     */
+    struct PreHashed {
+        std::size_t operator()(const BucketKey::Hash& key) const;
+    };
+
+    /**
+     * Information of a Bucket that got archived while performing an operation on this
+     * BucketCatalog.
+     */
+    struct ArchivedBucket {
+        OID bucketId;
+        std::string timeField;
+        uint32_t numMeasurements;
+    };
+
+    /**
      * Struct to hold a portion of the buckets managed by the catalog.
      *
      * Each of the bucket lists, as well as the buckets themselves, are protected by 'mutex'.
@@ -397,6 +434,12 @@ private:
         // Buckets that do not have any outstanding writes.
         using IdleList = std::list<Bucket*>;
         IdleList idleBuckets;
+
+        // Buckets that are not currently in the catalog, but which are eligible to receive more
+        // measurements. The top-level map is keyed by the hash of the BucketKey, while the stored
+        // map is keyed by the bucket's minimum timestamp.
+        stdx::unordered_map<BucketKey::Hash, std::map<Date_t, ArchivedBucket>, PreHashed>
+            archivedBuckets;
     };
 
     StripeNumber _getStripeNumber(const BucketKey& key);
@@ -444,7 +487,13 @@ private:
     /**
      * Removes the given bucket from the bucket catalog's internal data structures.
      */
-    bool _removeBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket);
+    void _removeBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket, bool archiving);
+
+    /**
+     * Archives the given bucket, minimizing the memory footprint but retaining the necessary
+     * information required to efficiently identify it as a candidate for future insertions.
+     */
+    void _archiveBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket);
 
     /**
      * Aborts 'batch', and if the corresponding bucket still exists, proceeds to abort any other
@@ -492,6 +541,11 @@ private:
     Bucket* _allocateBucket(Stripe* stripe, WithLock stripeLock, const CreationInfo& info);
 
     /**
+     * Mode enum to determine the rollover type decision for a given bucket.
+     */
+    enum class RolloverAction { kNone, kArchive, kClose };
+
+    /**
      * Close the existing, full bucket and open a new one for the same metadata.
      *
      * Writes information about the closed bucket to the 'info' parameter.
@@ -499,7 +553,8 @@ private:
     Bucket* _rollover(Stripe* stripe,
                       WithLock stripeLock,
                       Bucket* bucket,
-                      const CreationInfo& info);
+                      const CreationInfo& info,
+                      RolloverAction action);
 
     ExecutionStatsController _getExecutionStats(const NamespaceString& ns);
     std::shared_ptr<ExecutionStats> _getExecutionStats(const NamespaceString& ns) const;
@@ -531,6 +586,16 @@ private:
      */
     boost::optional<BucketState> _setBucketState(const OID& id, BucketState target);
 
+    /**
+     * Calculates the marginal memory usage for an archived bucket. The
+     * 'onlyEntryForMatchingMetaHash' parameter indicates that the bucket will be (if inserting)
+     * or was (if removing) the only bucket associated with it's meta hash value. If true, then
+     * the returned value will attempt to account for the overhead of the map data structure for
+     * the meta hash value.
+     */
+    static long long _marginalMemoryUsageForArchivedBucket(const ArchivedBucket& bucket,
+                                                           bool onlyEntryForMatchingMetaHash);
+
     static constexpr std::size_t kNumberOfStripes = 32;
     std::array<Stripe, kNumberOfStripes> _stripes;
 
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
index cc5bc65f3c1..6a5d03e77bb 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
@@ -57,7 +57,7 @@ StatusWith<std::pair<const BSONObj, const BSONObj>> extractMinAndMax(const BSONO
 }  // namespace
 
 StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
-                                               const CollatorInterface* collator) {
+                                               const StringData::ComparatorInterface* comparator) {
     auto swDocs = extractMinAndMax(bucketDoc);
     if (!swDocs.isOK()) {
         return swDocs.getStatus();
@@ -66,14 +66,14 @@ StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
     const auto& [minObj, maxObj] = swDocs.getValue();
 
     try {
-        return MinMax::parseFromBSON(minObj, maxObj, collator);
+        return MinMax::parseFromBSON(minObj, maxObj, comparator);
     } catch (...) {
         return exceptionToStatus();
     }
 }
 
 StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
-                                               const CollatorInterface* collator) {
+                                               const StringData::ComparatorInterface* comparator) {
     auto swDocs = extractMinAndMax(bucketDoc);
     if (!swDocs.isOK()) {
         return swDocs.getStatus();
@@ -82,7 +82,7 @@ StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
     const auto& [minObj, maxObj] = swDocs.getValue();
 
     try {
-        return Schema::parseFromBSON(minObj, maxObj, collator);
+        return Schema::parseFromBSON(minObj, maxObj, comparator);
     } catch (...) {
         return exceptionToStatus();
     }
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.h b/src/mongo/db/timeseries/bucket_catalog_helpers.h
index 015cae8ef66..3c84124e5b2 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers.h
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers.h
@@ -30,8 +30,8 @@
 #pragma once
 
 #include "mongo/base/status_with.h"
+#include "mongo/base/string_data_comparator_interface.h"
 #include "mongo/bson/bsonobj.h"
-#include "mongo/db/query/collation/collator_interface.h"
 #include "mongo/db/timeseries/flat_bson.h"
 
 namespace mongo::timeseries {
@@ -43,7 +43,7 @@ namespace mongo::timeseries {
  * Returns a bad status if the bucket document is malformed.
  */
 StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
-                                               const CollatorInterface* collator);
+                                               const StringData::ComparatorInterface* comparator);
 
 /**
  * Generates and returns a Schema object from an existing bucket document. Avoids unpacking the
@@ -52,6 +52,6 @@ StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
  * Returns a bad status if the bucket document is malformed or contains mixed schema measurements.
  */
 StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
-                                               const CollatorInterface* collator);
+                                               const StringData::ComparatorInterface* comparator);
 
 }  // namespace mongo::timeseries
diff --git a/src/mongo/db/timeseries/bucket_catalog_test.cpp b/src/mongo/db/timeseries/bucket_catalog_test.cpp
index ccc3b06a2fb..c6e91d25b53 100644
--- a/src/mongo/db/timeseries/bucket_catalog_test.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_test.cpp
@@ -32,6 +32,8 @@
 #include "mongo/db/catalog/create_collection.h"
 #include "mongo/db/catalog_raii.h"
 #include "mongo/db/timeseries/bucket_catalog.h"
+#include "mongo/db/timeseries/bucket_compression.h"
+#include "mongo/idl/server_parameter_test_util.h"
 #include "mongo/stdx/future.h"
 #include "mongo/unittest/bson_test_util.h"
 #include "mongo/unittest/death_test.h"
@@ -41,6 +43,15 @@
 
 namespace mongo {
 namespace {
+constexpr StringData kNumSchemaChanges = "numBucketsClosedDueToSchemaChange"_sd;
+constexpr StringData kNumBucketsReopened = "numBucketsReopened"_sd;
+constexpr StringData kNumArchivedDueToTimeForward = "numBucketsArchivedDueToTimeForward"_sd;
+constexpr StringData kNumArchivedDueToTimeBackward = "numBucketsArchivedDueToTimeBackward"_sd;
+constexpr StringData kNumArchivedDueToMemoryThreshold = "numBucketsArchivedDueToMemoryThreshold"_sd;
+constexpr StringData kNumClosedDueToTimeForward = "numBucketsClosedDueToTimeForward"_sd;
+constexpr StringData kNumClosedDueToTimeBackward = "numBucketsClosedDueToTimeBackward"_sd;
+constexpr StringData kNumClosedDueToMemoryThreshold = "numBucketsClosedDueToMemoryThreshold"_sd;
+
 class BucketCatalogTest : public CatalogTestFixture {
 protected:
     class Task {
@@ -72,8 +83,7 @@ protected:
     void _insertOneAndCommit(const NamespaceString& ns,
                              uint16_t numPreviouslyCommittedMeasurements);
 
-    long long _getNumWaits(const NamespaceString& ns);
-    long long _getNumSchemaChanges(const NamespaceString& ns);
+    long long _getExecutionStat(const NamespaceString& ns, StringData stat);
 
     // Check that each group of objects has compatible schema with itself, but that inserting the
     // first object in new group closes the existing bucket and opens a new one
@@ -177,16 +187,10 @@ void BucketCatalogTest::_insertOneAndCommit(const NamespaceString& ns,
     _commit(batch, numPreviouslyCommittedMeasurements);
 }
 
-long long BucketCatalogTest::_getNumWaits(const NamespaceString& ns) {
-    BSONObjBuilder builder;
-    _bucketCatalog->appendExecutionStats(ns, &builder);
-    return builder.obj().getIntField("numWaits");
-}
-
-long long BucketCatalogTest::_getNumSchemaChanges(const NamespaceString& ns) {
+long long BucketCatalogTest::_getExecutionStat(const NamespaceString& ns, StringData stat) {
     BSONObjBuilder builder;
     _bucketCatalog->appendExecutionStats(ns, &builder);
-    return builder.obj().getIntField("numBucketsClosedDueToSchemaChange");
+    return builder.obj().getIntField(stat);
 }
 
 void BucketCatalogTest::_testMeasurementSchema(
@@ -203,7 +207,7 @@ void BucketCatalogTest::_testMeasurementSchema(
             timestampedDoc.append(_timeField, Date_t::now());
             timestampedDoc.appendElements(doc);
 
-            auto pre = _getNumSchemaChanges(_ns1);
+            auto pre = _getExecutionStat(_ns1, kNumSchemaChanges);
             auto result = _bucketCatalog
                               ->insert(_opCtx,
                                        _ns1,
@@ -212,7 +216,7 @@ void BucketCatalogTest::_testMeasurementSchema(
                                        timestampedDoc.obj(),
                                        BucketCatalog::CombineWithInsertsFromOtherClients::kAllow)
                               .getValue();
-            auto post = _getNumSchemaChanges(_ns1);
+            auto post = _getExecutionStat(_ns1, kNumSchemaChanges);
 
             if (firstMember) {
                 if (firstGroup) {
@@ -978,5 +982,453 @@ TEST_F(BucketCatalogTest, SchemaChanges) {
     _testMeasurementSchema({{docs[18], docs[19]}, {docs[20], docs[21]}});
 }
 
+TEST_F(BucketCatalogTest, ReopenMalformedBucket) {
+    BSONObj bucketDoc = ::mongo::fromjson(
+        R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+            "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+                                   "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+            "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "1":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+                    "a":{"0":1,"1":2,"2":3},
+                    "b":{"0":1,"1":2,"2":3}}})");
+
+    AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+
+    {
+        // Missing _id field.
+        BSONObj missingIdObj = bucketDoc.removeField("_id");
+        ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingIdObj));
+
+        // Bad _id type.
+        BSONObj badIdObj = bucketDoc.addFields(BSON("_id" << 123));
+        ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badIdObj));
+    }
+
+    {
+        // Missing control field.
+        BSONObj missingControlObj = bucketDoc.removeField("control");
+        ASSERT_NOT_OK(
+            _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingControlObj));
+
+        // Bad control type.
+        BSONObj badControlObj = bucketDoc.addFields(BSON("control" << BSONArray()));
+        ASSERT_NOT_OK(
+            _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badControlObj));
+
+        // Bad control.version type.
+        BSONObj badVersionObj = bucketDoc.addFields(BSON(
+            "control" << BSON("version" << BSONArray() << "min"
+                                        << BSON("time" << BSON("$date"
+                                                               << "2022-06-06T15:34:00.000Z"))
+                                        << "max"
+                                        << BSON("time" << BSON("$date"
+                                                               << "2022-06-06T15:34:30.000Z")))));
+        ASSERT_NOT_OK(
+            _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badVersionObj));
+
+        // Bad control.min type.
+        BSONObj badMinObj = bucketDoc.addFields(BSON(
+            "control" << BSON("version" << 1 << "min" << 123 << "max"
+                                        << BSON("time" << BSON("$date"
+                                                               << "2022-06-06T15:34:30.000Z")))));
+        ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badMinObj));
+
+        // Bad control.max type.
+        BSONObj badMaxObj = bucketDoc.addFields(
+            BSON("control" << BSON("version" << 1 << "min"
+                                             << BSON("time" << BSON("$date"
+                                                                    << "2022-06-06T15:34:00.000Z"))
+                                             << "max" << 123)));
+        ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badMaxObj));
+
+        // Missing control.min.time.
+        BSONObj missingMinTimeObj = bucketDoc.addFields(BSON(
+            "control" << BSON("version" << 1 << "min" << BSON("abc" << 1) << "max"
+                                        << BSON("time" << BSON("$date"
+                                                               << "2022-06-06T15:34:30.000Z")))));
+        ASSERT_NOT_OK(
+            _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingMinTimeObj));
+
+        // Missing control.max.time.
+        BSONObj missingMaxTimeObj = bucketDoc.addFields(
+            BSON("control" << BSON("version" << 1 << "min"
+                                             << BSON("time" << BSON("$date"
+                                                                    << "2022-06-06T15:34:00.000Z"))
+                                             << "max" << BSON("abc" << 1))));
+        ASSERT_NOT_OK(
+            _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingMaxTimeObj));
+    }
+
+
+    {
+        // Missing data field.
+        BSONObj missingDataObj = bucketDoc.removeField("data");
+        ASSERT_NOT_OK(
+            _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingDataObj));
+
+        // Bad data type.
+        BSONObj badDataObj = bucketDoc.addFields(BSON("data" << 123));
+        ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badDataObj));
+    }
+}
+
+TEST_F(BucketCatalogTest, ReopenUncompressedBucketAndInsertCompatibleMeasurement) {
+    // Bucket document to reopen.
+    BSONObj bucketDoc = ::mongo::fromjson(
+        R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+            "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+                                   "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+            "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "1":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+                    "a":{"0":1,"1":2,"2":3},
+                    "b":{"0":1,"1":2,"2":3}}})");
+
+    RAIIServerParameterControllerForTest controller{"featureFlagTimeseriesScalabilityImprovements",
+                                                    true};
+    AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+    Status status = _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), bucketDoc);
+    ASSERT_OK(status);
+    ASSERT_EQ(1, _getExecutionStat(_ns1, kNumBucketsReopened));
+
+    // Insert a measurement that is compatible with the reopened bucket.
+    auto result =
+        _bucketCatalog->insert(_opCtx,
+                               _ns1,
+                               _getCollator(_ns1),
+                               _getTimeseriesOptions(_ns1),
+                               ::mongo::fromjson(R"({"time":{"$date":"2022-06-06T15:34:40.000Z"},
+                                                     "a":-100,"b":100})"),
+                               BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+    // No buckets are closed.
+    ASSERT(result.getValue().closedBuckets.empty());
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumSchemaChanges));
+
+    auto batch = result.getValue().batch;
+    ASSERT(batch->claimCommitRights());
+    ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+    ASSERT_EQ(batch->measurements().size(), 1);
+
+    // The reopened bucket already contains three committed measurements.
+    ASSERT_EQ(batch->numPreviouslyCommittedMeasurements(), 3);
+
+    // Verify that the min and max is updated correctly when inserting new measurements.
+    ASSERT_BSONOBJ_BINARY_EQ(batch->min(), BSON("u" << BSON("a" << -100)));
+    ASSERT_BSONOBJ_BINARY_EQ(
+        batch->max(),
+        BSON("u" << BSON("time" << Date_t::fromMillisSinceEpoch(1654529680000) << "b" << 100)));
+
+    _bucketCatalog->finish(batch, {});
+}
+
+TEST_F(BucketCatalogTest, ReopenUncompressedBucketAndInsertIncompatibleMeasurement) {
+    // Bucket document to reopen.
+    BSONObj bucketDoc = ::mongo::fromjson(
+        R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+            "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+                                   "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+            "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "1":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+                    "a":{"0":1,"1":2,"2":3},
+                    "b":{"0":1,"1":2,"2":3}}})");
+
+    RAIIServerParameterControllerForTest controller{"featureFlagTimeseriesScalabilityImprovements",
+                                                    true};
+    AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+    Status status = _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), bucketDoc);
+    ASSERT_OK(status);
+    ASSERT_EQ(1, _getExecutionStat(_ns1, kNumBucketsReopened));
+
+    // Insert a measurement that is incompatible with the reopened bucket.
+    auto result =
+        _bucketCatalog->insert(_opCtx,
+                               _ns1,
+                               _getCollator(_ns1),
+                               _getTimeseriesOptions(_ns1),
+                               ::mongo::fromjson(R"({"time":{"$date":"2022-06-06T15:34:40.000Z"},
+                                                     "a":{},"b":{}})"),
+                               BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+    // The reopened bucket gets closed as the schema is incompatible.
+    ASSERT_EQ(1, result.getValue().closedBuckets.size());
+    ASSERT_EQ(1, _getExecutionStat(_ns1, kNumSchemaChanges));
+
+    auto batch = result.getValue().batch;
+    ASSERT(batch->claimCommitRights());
+    ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+    ASSERT_EQ(batch->measurements().size(), 1);
+
+    // Since the reopened bucket was incompatible, we opened a new one.
+    ASSERT_EQ(batch->numPreviouslyCommittedMeasurements(), 0);
+
+    _bucketCatalog->finish(batch, {});
+}
+
+TEST_F(BucketCatalogTest, ReopenCompressedBucketAndInsertCompatibleMeasurement) {
+    // Bucket document to reopen.
+    BSONObj bucketDoc = ::mongo::fromjson(
+        R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+            "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+                                   "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+            "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "1":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+                    "a":{"0":1,"1":2,"2":3},
+                    "b":{"0":1,"1":2,"2":3}}})");
+
+    timeseries::CompressionResult compressionResult =
+        timeseries::compressBucket(bucketDoc,
+                                   _timeField,
+                                   _ns1,
+                                   /*eligibleForReopening=*/false,
+                                   /*validateDecompression=*/true);
+    const BSONObj& compressedBucketDoc = compressionResult.compressedBucket.get();
+
+    RAIIServerParameterControllerForTest controller{"featureFlagTimeseriesScalabilityImprovements",
+                                                    true};
+    AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+    Status status =
+        _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), compressedBucketDoc);
+    ASSERT_OK(status);
+    ASSERT_EQ(1, _getExecutionStat(_ns1, kNumBucketsReopened));
+
+    // Insert a measurement that is compatible with the reopened bucket.
+    auto result =
+        _bucketCatalog->insert(_opCtx,
+                               _ns1,
+                               _getCollator(_ns1),
+                               _getTimeseriesOptions(_ns1),
+                               ::mongo::fromjson(R"({"time":{"$date":"2022-06-06T15:34:40.000Z"},
+                                                     "a":-100,"b":100})"),
+                               BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+    // No buckets are closed.
+    ASSERT(result.getValue().closedBuckets.empty());
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumSchemaChanges));
+
+    auto batch = result.getValue().batch;
+    ASSERT(batch->claimCommitRights());
+    ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+    ASSERT_EQ(batch->measurements().size(), 1);
+
+    // The reopened bucket already contains three committed measurements.
+    ASSERT_EQ(batch->numPreviouslyCommittedMeasurements(), 3);
+
+    // Verify that the min and max is updated correctly when inserting new measurements.
+    ASSERT_BSONOBJ_BINARY_EQ(batch->min(), BSON("u" << BSON("a" << -100)));
+    ASSERT_BSONOBJ_BINARY_EQ(
+        batch->max(),
+        BSON("u" << BSON("time" << Date_t::fromMillisSinceEpoch(1654529680000) << "b" << 100)));
+
+    _bucketCatalog->finish(batch, {});
+}
+
+TEST_F(BucketCatalogTest, ReopenCompressedBucketAndInsertIncompatibleMeasurement) {
+    // Bucket document to reopen.
+    BSONObj bucketDoc = ::mongo::fromjson(
+        R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+            "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+                                   "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+            "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "1":{"$date":"2022-06-06T15:34:30.000Z"},
+                            "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+                    "a":{"0":1,"1":2,"2":3},
+                    "b":{"0":1,"1":2,"2":3}}})");
+
+    timeseries::CompressionResult compressionResult =
+        timeseries::compressBucket(bucketDoc,
+                                   _timeField,
+                                   _ns1,
+                                   /*eligibleForReopening=*/false,
+                                   /*validateDecompression=*/true);
+    const BSONObj& compressedBucketDoc = compressionResult.compressedBucket.get();
+
+    RAIIServerParameterControllerForTest controller{"featureFlagTimeseriesScalabilityImprovements",
+                                                    true};
+    AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+    Status status =
+        _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), compressedBucketDoc);
+    ASSERT_OK(status);
+    ASSERT_EQ(1, _getExecutionStat(_ns1, kNumBucketsReopened));
+
+    // Insert a measurement that is incompatible with the reopened bucket.
+    auto result =
+        _bucketCatalog->insert(_opCtx,
+                               _ns1,
+                               _getCollator(_ns1),
+                               _getTimeseriesOptions(_ns1),
+                               ::mongo::fromjson(R"({"time":{"$date":"2022-06-06T15:34:40.000Z"},
+                                                     "a":{},"b":{}})"),
+                               BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+    // The reopened bucket gets closed as the schema is incompatible.
+    ASSERT_EQ(1, result.getValue().closedBuckets.size());
+    ASSERT_EQ(1, _getExecutionStat(_ns1, kNumSchemaChanges));
+
+    auto batch = result.getValue().batch;
+    ASSERT(batch->claimCommitRights());
+    ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+    ASSERT_EQ(batch->measurements().size(), 1);
+
+    // Since the reopened bucket was incompatible, we opened a new one.
+    ASSERT_EQ(batch->numPreviouslyCommittedMeasurements(), 0);
+
+    _bucketCatalog->finish(batch, {});
+}
+
+TEST_F(BucketCatalogTest, ArchiveIfTimeForward) {
+    RAIIServerParameterControllerForTest featureFlag{"featureFlagTimeseriesScalabilityImprovements",
+                                                     true};
+    auto baseTimestamp = Date_t::now();
+
+    // Insert an initial document to make sure we have an open bucket.
+    auto result1 =
+        _bucketCatalog->insert(_opCtx,
+                               _ns1,
+                               _getCollator(_ns1),
+                               _getTimeseriesOptions(_ns1),
+                               BSON(_timeField << baseTimestamp),
+                               BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+    ASSERT_OK(result1.getStatus());
+    auto batch1 = result1.getValue().batch;
+    ASSERT(batch1->claimCommitRights());
+    ASSERT_OK(_bucketCatalog->prepareCommit(batch1));
+    _bucketCatalog->finish(batch1, {});
+
+    // Make sure we start out with nothing closed or archived.
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumArchivedDueToTimeForward));
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToTimeForward));
+
+    // Now insert another that's too far forward to fit in the same bucket
+    auto result2 =
+        _bucketCatalog->insert(_opCtx,
+                               _ns1,
+                               _getCollator(_ns1),
+                               _getTimeseriesOptions(_ns1),
+                               BSON(_timeField << (baseTimestamp + Seconds{7200})),
+                               BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+    ASSERT_OK(result2.getStatus());
+    auto batch2 = result2.getValue().batch;
+    ASSERT(batch2->claimCommitRights());
+    ASSERT_OK(_bucketCatalog->prepareCommit(batch2));
+    _bucketCatalog->finish(batch2, {});
+
+    // Make sure it was archived, not closed.
+    ASSERT_EQ(1, _getExecutionStat(_ns1, kNumArchivedDueToTimeForward));
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToTimeForward));
+}
+
+TEST_F(BucketCatalogTest, ArchiveIfTimeBackward) {
+    RAIIServerParameterControllerForTest featureFlag{"featureFlagTimeseriesScalabilityImprovements",
+                                                     true};
+    auto baseTimestamp = Date_t::now();
+
+    // Insert an initial document to make sure we have an open bucket.
+    auto result1 =
+        _bucketCatalog->insert(_opCtx,
+                               _ns1,
+                               _getCollator(_ns1),
+                               _getTimeseriesOptions(_ns1),
+                               BSON(_timeField << baseTimestamp),
+                               BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+    ASSERT_OK(result1.getStatus());
+    auto batch1 = result1.getValue().batch;
+    ASSERT(batch1->claimCommitRights());
+    ASSERT_OK(_bucketCatalog->prepareCommit(batch1));
+    _bucketCatalog->finish(batch1, {});
+
+    // Make sure we start out with nothing closed or archived.
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumArchivedDueToTimeBackward));
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToTimeBackward));
+
+    // Now insert another that's too far Backward to fit in the same bucket
+    auto result2 =
+        _bucketCatalog->insert(_opCtx,
+                               _ns1,
+                               _getCollator(_ns1),
+                               _getTimeseriesOptions(_ns1),
+                               BSON(_timeField << (baseTimestamp - Seconds{7200})),
+                               BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+    ASSERT_OK(result2.getStatus());
+    auto batch2 = result2.getValue().batch;
+    ASSERT(batch2->claimCommitRights());
+    ASSERT_OK(_bucketCatalog->prepareCommit(batch2));
+    _bucketCatalog->finish(batch2, {});
+
+    // Make sure it was archived, not closed.
+    ASSERT_EQ(1, _getExecutionStat(_ns1, kNumArchivedDueToTimeBackward));
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToTimeBackward));
+}
+
+TEST_F(BucketCatalogTest, ArchivingUnderMemoryPressure) {
+    RAIIServerParameterControllerForTest featureFlag{"featureFlagTimeseriesScalabilityImprovements",
+                                                     true};
+    RAIIServerParameterControllerForTest memoryLimit{
+        "timeseriesIdleBucketExpiryMemoryUsageThreshold", 10000};
+
+    // Insert a measurement with a unique meta value, guaranteeing we will open a new bucket but not
+    // close an old one except under memory pressure.
+    long long meta = 0;
+    auto insertDocument = [&meta, this]() -> BucketCatalog::ClosedBuckets {
+        auto result =
+            _bucketCatalog->insert(_opCtx,
+                                   _ns1,
+                                   _getCollator(_ns1),
+                                   _getTimeseriesOptions(_ns1),
+                                   BSON(_timeField << Date_t::now() << _metaField << meta++),
+                                   BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+        ASSERT_OK(result.getStatus());
+        auto batch = result.getValue().batch;
+        ASSERT(batch->claimCommitRights());
+        ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+        _bucketCatalog->finish(batch, {});
+
+        return result.getValue().closedBuckets;
+    };
+
+    // Ensure we start out with no buckets archived or closed due to memory pressure.
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumArchivedDueToMemoryThreshold));
+    ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToMemoryThreshold));
+
+    // With a memory limit of 10000 bytes, we should be guaranteed to hit the memory limit with no
+    // more than 1000 buckets since an open bucket takes up at least 10 bytes (in reality,
+    // significantly more, but this is definitely a safe assumption).
+    for (int i = 0; i < 1000; ++i) {
+        [[maybe_unused]] auto closedBuckets = insertDocument();
+
+        if (0 < _getExecutionStat(_ns1, kNumArchivedDueToMemoryThreshold)) {
+            break;
+        }
+    }
+
+    // When we first hit the limit, we should try to archive some buckets prior to closing anything.
+    // However, depending on how the buckets are distributed over the stripes, it's possible that
+    // the current stripe will not have enough open buckets to archive to drop below the limit, and
+    // may immediately close a bucket it has just archived. We should be able to guarantee that we
+    // have archived a bucket prior to closing it though.
+    ASSERT_LT(0, _getExecutionStat(_ns1, kNumArchivedDueToMemoryThreshold));
+    auto numClosedInFirstRound = _getExecutionStat(_ns1, kNumClosedDueToMemoryThreshold);
+    ASSERT_LTE(numClosedInFirstRound, _getExecutionStat(_ns1, kNumArchivedDueToMemoryThreshold));
+
+    // If we continue to open more new buckets with distinct meta values, eventually we'll run out
+    // of open buckets to archive and have to start closing archived buckets to relieve memory
+    // pressure. Again, an archived bucket should take up more than 10 bytes in the catalog, so we
+    // should be fine with a maximum of 1000 iterations.
+    for (int i = 0; i < 1000; ++i) {
+        auto closedBuckets = insertDocument();
+
+        if (numClosedInFirstRound < _getExecutionStat(_ns1, kNumClosedDueToMemoryThreshold)) {
+            ASSERT_FALSE(closedBuckets.empty());
+            break;
+        }
+    }
+
+    // We should have closed some (additional) buckets by now.
+    ASSERT_LT(numClosedInFirstRound, _getExecutionStat(_ns1, kNumClosedDueToMemoryThreshold));
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/timeseries/bucket_compression.cpp b/src/mongo/db/timeseries/bucket_compression.cpp
index 94fa5264647..1ccd26da0bd 100644
--- a/src/mongo/db/timeseries/bucket_compression.cpp
+++ b/src/mongo/db/timeseries/bucket_compression.cpp
@@ -53,6 +53,7 @@ MONGO_FAIL_POINT_DEFINE(simulateBsonColumnCompressionDataLoss);
 CompressionResult compressBucket(const BSONObj& bucketDoc,
                                  StringData timeFieldName,
                                  const NamespaceString& nss,
+                                 bool eligibleForReopening,
                                  bool validateDecompression) try {
     CompressionResult result;
 
@@ -179,22 +180,36 @@ CompressionResult compressBucket(const BSONObj& bucketDoc,
     {
         BSONObjBuilder control(builder.subobjStart(kBucketControlFieldName));
 
-        // Set right version, leave other control fields unchanged
+        const bool shouldSetBucketClosed = !eligibleForReopening &&
+            feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+                serverGlobalParams.featureCompatibility);
+
+        // Set the version to indicate that the bucket was compressed and the closed flag if the
+        // bucket shouldn't be reopened. Leave other control fields unchanged.
+        bool closedSet = false;
         bool versionSet = false;
         for (const auto& controlField : controlElement.Obj()) {
             if (controlField.fieldNameStringData() == kBucketControlVersionFieldName) {
                 control.append(kBucketControlVersionFieldName, kTimeseriesControlCompressedVersion);
                 versionSet = true;
+            } else if (controlField.fieldNameStringData() == kBucketControlClosedFieldName &&
+                       shouldSetBucketClosed) {
+                control.append(kBucketControlClosedFieldName, true);
+                closedSet = true;
             } else {
                 control.append(controlField);
             }
         }
 
-        // Set version if it was missing from uncompressed bucket
+        // Set version and closed if it was missing from uncompressed bucket
         if (!versionSet) {
             control.append(kBucketControlVersionFieldName, kTimeseriesControlCompressedVersion);
         }
 
+        if (!closedSet && shouldSetBucketClosed) {
+            control.append(kBucketControlClosedFieldName, true);
+        }
+
         // Set count
         control.append(kBucketControlCountFieldName, static_cast<int32_t>(measurements.size()));
     }
diff --git a/src/mongo/db/timeseries/bucket_compression.h b/src/mongo/db/timeseries/bucket_compression.h
index e70bec965d7..30c788a9417 100644
--- a/src/mongo/db/timeseries/bucket_compression.h
+++ b/src/mongo/db/timeseries/bucket_compression.h
@@ -58,6 +58,7 @@ struct CompressionResult {
 CompressionResult compressBucket(const BSONObj& bucketDoc,
                                  StringData timeFieldName,
                                  const NamespaceString& nss,
+                                 bool eligibleForReopening,
                                  bool validateDecompression);
 
 /**
diff --git a/src/mongo/db/timeseries/timeseries.idl b/src/mongo/db/timeseries/timeseries.idl
index d3ede8fd3c3..3f858f17fe6 100644
--- a/src/mongo/db/timeseries/timeseries.idl
+++ b/src/mongo/db/timeseries/timeseries.idl
@@ -62,6 +62,18 @@ server_parameters:
         cpp_varname: "gTimeseriesIdleBucketExpiryMaxCountPerAttempt"
         default:  3
         validator: { gte: 2 }
+    "timeseriesBucketMinCount":
+        description: "Time-series buckets that need to be closed due to size
+                      (timeseriesBucketMaxSize) but haven't crossed this threshold are considered to
+                      contain large measurements, and will be kept open to improve bucketing
+                      performance. These buckets will be closed when they reach the threshold or if
+                      the bucket is close to the max BSON size limit. Setting this to 1 disables
+                      this behaviour."
+        set_at: [ startup ]
+        cpp_vartype: "std::int32_t"
+        cpp_varname: "gTimeseriesBucketMinCount"
+        default:  10
+        validator: { gte: 1 }
 
 enums:
     BucketGranularity:
diff --git a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
index be234d0d95f..6481aa7bbf9 100644
--- a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
+++ b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
@@ -50,6 +50,28 @@ namespace {
 NamespaceString makeTimeseriesBucketsNamespace(const NamespaceString& nss) {
     return nss.isTimeseriesBucketsCollection() ? nss : nss.makeTimeseriesBucketsNamespace();
 }
+
+/**
+ * Converts the key field on time to 'control.min.$timeField' field. Depends on error checking from
+ * 'createBucketsSpecFromTimeseriesSpec()' which should be called before this function.
+ */
+BSONObj convertToTTLTimeField(const BSONObj& origKeyField, StringData timeField) {
+    BSONObjBuilder keyBuilder;
+    uassert(ErrorCodes::CannotCreateIndex,
+            str::stream() << "TTL indexes are single-field indexes, compound indexes do "
+                             "not support TTL. Index spec: "
+                          << origKeyField,
+            origKeyField.nFields() == 1);
+
+    const auto& firstElem = origKeyField.firstElement();
+    uassert(ErrorCodes::InvalidOptions,
+            "TTL indexes on non-time fields are not supported on time-series collections",
+            firstElem.fieldName() == timeField);
+
+    keyBuilder.appendAs(firstElem,
+                        str::stream() << timeseries::kControlMinFieldNamePrefix << timeField);
+    return keyBuilder.obj();
+}
 }  // namespace
 
 
@@ -83,12 +105,17 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
     std::vector<mongo::BSONObj> indexes;
     for (const auto& origIndex : origIndexes) {
         BSONObjBuilder builder;
-        bool isBucketsIndexSpecCompatibleForDowngrade = true;
+        BSONObj keyField;
+        BSONObj originalKeyField;
+        bool isTTLIndex = false;
+        bool hasPartialFilterOnMetaField = false;
+        bool includeOriginalSpec = false;
+
         for (const auto& elem : origIndex) {
             if (elem.fieldNameStringData() == IndexDescriptor::kPartialFilterExprFieldName) {
-                if (feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() &&
-                    serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest()) {
-                    isBucketsIndexSpecCompatibleForDowngrade = false;
+                if (feature_flags::gTimeseriesMetricIndexes.isEnabled(
+                        serverGlobalParams.featureCompatibility)) {
+                    includeOriginalSpec = true;
                 } else {
                     uasserted(ErrorCodes::InvalidOptions,
                               "Partial indexes are not supported on time-series collections");
@@ -135,7 +162,7 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
                 // planner, this will be true.
                 bool assumeNoMixedSchemaData = true;
 
-                BSONObj bucketPred =
+                auto [hasMetricPred, bucketPred] =
                     BucketSpec::pushdownPredicate(expCtx,
                                                   options,
                                                   collationMatchesDefault,
@@ -144,6 +171,9 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
                                                   includeMetaField,
                                                   assumeNoMixedSchemaData,
                                                   BucketSpec::IneligiblePredicatePolicy::kError);
+
+                hasPartialFilterOnMetaField = !hasMetricPred;
+
                 builder.append(IndexDescriptor::kPartialFilterExprFieldName, bucketPred);
                 continue;
             }
@@ -171,11 +201,11 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
             }
 
             if (elem.fieldNameStringData() == IndexDescriptor::kExpireAfterSecondsFieldName) {
-                uasserted(ErrorCodes::InvalidOptions,
-                          "TTL indexes are not supported on time-series collections");
+                isTTLIndex = true;
+                builder.append(elem);
+                continue;
             }
 
-
             if (elem.fieldNameStringData() == IndexDescriptor::kUniqueFieldName) {
                 uassert(ErrorCodes::InvalidOptions,
                         "Unique indexes are not supported on time-series collections",
@@ -183,27 +213,28 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
             }
 
             if (elem.fieldNameStringData() == NewIndexSpec::kKeyFieldName) {
-                auto pluginName = IndexNames::findPluginName(elem.Obj());
+                originalKeyField = elem.Obj();
+
+                auto pluginName = IndexNames::findPluginName(originalKeyField);
                 uassert(ErrorCodes::InvalidOptions,
                         "Text indexes are not supported on time-series collections",
                         pluginName != IndexNames::TEXT);
 
                 auto bucketsIndexSpecWithStatus =
-                    timeseries::createBucketsIndexSpecFromTimeseriesIndexSpec(options, elem.Obj());
+                    timeseries::createBucketsIndexSpecFromTimeseriesIndexSpec(options,
+                                                                              originalKeyField);
                 uassert(ErrorCodes::CannotCreateIndex,
                         str::stream() << bucketsIndexSpecWithStatus.getStatus().toString()
                                       << " Command request: " << redact(origCmd.toBSON({})),
                         bucketsIndexSpecWithStatus.isOK());
 
-                if (!timeseries::isBucketsIndexSpecCompatibleForDowngrade(
+                if (timeseries::shouldIncludeOriginalSpec(
                         options,
                         BSON(NewIndexSpec::kKeyFieldName
                              << bucketsIndexSpecWithStatus.getValue()))) {
-                    isBucketsIndexSpecCompatibleForDowngrade = false;
+                    includeOriginalSpec = true;
                 }
-
-                builder.append(NewIndexSpec::kKeyFieldName,
-                               std::move(bucketsIndexSpecWithStatus.getValue()));
+                keyField = std::move(bucketsIndexSpecWithStatus.getValue());
                 continue;
             }
 
@@ -212,12 +243,24 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
             builder.append(elem);
         }
 
-        if (feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() &&
-            !isBucketsIndexSpecCompatibleForDowngrade) {
+        if (isTTLIndex) {
+            uassert(ErrorCodes::InvalidOptions,
+                    "TTL indexes are not supported on time-series collections",
+                    feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+                        serverGlobalParams.featureCompatibility));
+            uassert(ErrorCodes::InvalidOptions,
+                    "TTL indexes on time-series collections require a partialFilterExpression on "
+                    "the metaField",
+                    hasPartialFilterOnMetaField);
+            keyField = convertToTTLTimeField(originalKeyField, options.getTimeField());
+        }
+        builder.append(NewIndexSpec::kKeyFieldName, std::move(keyField));
+
+        if (feature_flags::gTimeseriesMetricIndexes.isEnabled(
+                serverGlobalParams.featureCompatibility) &&
+            includeOriginalSpec) {
             // Store the original user index definition on the transformed index definition for the
-            // time-series buckets collection if this is a newly supported index type on time-series
-            // collections. This is to avoid any additional downgrade steps for index types already
-            // supported in 5.0.
+            // time-series buckets collection.
             builder.appendObject(IndexDescriptor::kOriginalSpecFieldName, origIndex.objdata());
         }
 
diff --git a/src/mongo/db/timeseries/timeseries_constants.h b/src/mongo/db/timeseries/timeseries_constants.h
index 28dfd25ce78..be7d9a368f2 100644
--- a/src/mongo/db/timeseries/timeseries_constants.h
+++ b/src/mongo/db/timeseries/timeseries_constants.h
@@ -40,6 +40,7 @@ namespace timeseries {
 static constexpr StringData kBucketIdFieldName = "_id"_sd;
 static constexpr StringData kBucketDataFieldName = "data"_sd;
 static constexpr StringData kBucketMetaFieldName = "meta"_sd;
+static constexpr StringData kBucketControlClosedFieldName = "closed"_sd;
 static constexpr StringData kBucketControlFieldName = "control"_sd;
 static constexpr StringData kBucketControlVersionFieldName = "version"_sd;
 static constexpr StringData kBucketControlCountFieldName = "count"_sd;
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
index 638a2c8d6c4..afabfc4e0a6 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
@@ -47,7 +47,8 @@ protected:
         test(obj);
 
         NamespaceString nss{"test"};
-        auto compressionResult = timeseries::compressBucket(obj, "time", nss, true);
+        auto compressionResult =
+            timeseries::compressBucket(obj, "time", nss, /*eligibleForReopening=*/false, true);
         ASSERT_TRUE(compressionResult.compressedBucket.has_value());
         ASSERT_FALSE(compressionResult.decompressionFailed);
 
diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
index 83b6e3f6e9d..4dcf9a73eda 100644
--- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
+++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
@@ -149,7 +149,8 @@ StatusWith<BSONObj> createBucketsSpecFromTimeseriesSpec(const TimeseriesOptions&
 
         // Indexes on measurement fields are only supported when the 'gTimeseriesMetricIndexes'
         // feature flag is enabled.
-        if (!feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV()) {
+        if (!feature_flags::gTimeseriesMetricIndexes.isEnabled(
+                serverGlobalParams.featureCompatibility)) {
             auto reason = str::stream();
             reason << "Invalid index spec for time-series collection: "
                    << redact(timeseriesIndexSpecBSON) << ". ";
@@ -366,7 +367,7 @@ StatusWith<BSONObj> createBucketsShardKeySpecFromTimeseriesShardKeySpec(
 boost::optional<BSONObj> createTimeseriesIndexFromBucketsIndex(
     const TimeseriesOptions& timeseriesOptions, const BSONObj& bucketsIndex) {
     bool timeseriesMetricIndexesFeatureFlagEnabled =
-        feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV();
+        feature_flags::gTimeseriesMetricIndexes.isEnabled(serverGlobalParams.featureCompatibility);
 
     if (bucketsIndex.hasField(kOriginalSpecFieldName) &&
         timeseriesMetricIndexesFeatureFlagEnabled) {
@@ -406,21 +407,16 @@ std::list<BSONObj> createTimeseriesIndexesFromBucketsIndexes(
     return indexSpecs;
 }
 
-bool isBucketsIndexSpecCompatibleForDowngrade(const TimeseriesOptions& timeseriesOptions,
-                                              const BSONObj& bucketsIndex) {
+bool shouldIncludeOriginalSpec(const TimeseriesOptions& timeseriesOptions,
+                               const BSONObj& bucketsIndex) {
     if (!bucketsIndex.hasField(kKeyFieldName)) {
         return false;
     }
 
-    if (bucketsIndex.hasField(kPartialFilterExpressionFieldName)) {
-        // Partial indexes are not supported in FCV < 5.2.
-        return false;
-    }
-
     return createTimeseriesIndexSpecFromBucketsIndexSpec(
                timeseriesOptions,
                bucketsIndex.getField(kKeyFieldName).Obj(),
-               /*timeseriesMetricIndexesFeatureFlagEnabled=*/false) != boost::none;
+               /*timeseriesMetricIndexesFeatureFlagEnabled=*/false) == boost::none;
 }
 
 bool doesBucketsIndexIncludeMeasurement(OperationContext* opCtx,
diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
index ad1bb795fd2..144893c0d77 100644
--- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
+++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
@@ -71,10 +71,11 @@ std::list<BSONObj> createTimeseriesIndexesFromBucketsIndexes(
     const TimeseriesOptions& timeseriesOptions, const std::list<BSONObj>& bucketsIndexes);
 
 /**
- * Returns true if the 'bucketsIndex' is compatible for FCV downgrade.
+ * Returns true if the original index specification should be included when creating an index on the
+ * time-series buckets collection.
  */
-bool isBucketsIndexSpecCompatibleForDowngrade(const TimeseriesOptions& timeseriesOptions,
-                                              const BSONObj& bucketsIndex);
+bool shouldIncludeOriginalSpec(const TimeseriesOptions& timeseriesOptions,
+                               const BSONObj& bucketsIndex);
 
 /**
  * Returns true if 'bucketsIndex' uses a measurement field, excluding the time field. Checks both
diff --git a/src/mongo/db/transaction_api.cpp b/src/mongo/db/transaction_api.cpp
index 8f1f91e3080..0a950dbbe54 100644
--- a/src/mongo/db/transaction_api.cpp
+++ b/src/mongo/db/transaction_api.cpp
@@ -82,6 +82,7 @@ SyncTransactionWithRetries::SyncTransactionWithRetries(
       _txn(std::make_shared<details::TransactionWithRetries>(
           opCtx,
           executor,
+          _source.token(),
           txnClient ? std::move(txnClient)
                     : std::make_unique<details::SEPTransactionClient>(
                           opCtx,
@@ -101,6 +102,8 @@ StatusWith<CommitResult> SyncTransactionWithRetries::runNoThrow(OperationContext
     }
 
     auto txnResult = _txn->run(std::move(callback)).getNoThrow(opCtx);
+    // Cancel the source to guarantee the transaction will terminate if our opCtx was interrupted.
+    _source.cancel();
 
     // Post transaction processing, which must also happen inline.
     OperationTimeTracker::get(opCtx)->updateOperationTime(_txn->getOperationTime());
@@ -188,8 +191,7 @@ SemiFuture<CommitResult> TransactionWithRetries::run(Callback callback) noexcept
             return txnStatus.isOK() || txnStatus != ErrorCodes::TransactionAPIMustRetryTransaction;
         })
         .withBackoffBetweenIterations(kExponentialBackoff)
-        // Cancellation happens by interrupting the caller's opCtx.
-        .on(_executor, CancellationToken::uncancelable())
+        .on(_executor, _token)
         // Safe to inline because the continuation only holds state.
         .unsafeToInlineFuture()
         .tapAll([anchor = shared_from_this()](auto&&) {})
@@ -257,8 +259,7 @@ ExecutorFuture<CommitResult> TransactionWithRetries::_runCommitWithRetries() {
             return swResult.isOK() || swResult != ErrorCodes::TransactionAPIMustRetryCommit;
         })
         .withBackoffBetweenIterations(kExponentialBackoff)
-        // Cancellation happens by interrupting the caller's opCtx.
-        .on(_executor, CancellationToken::uncancelable());
+        .on(_executor, _token);
 }
 
 ExecutorFuture<void> TransactionWithRetries::_bestEffortAbort() {
@@ -297,12 +298,16 @@ SemiFuture<BSONObj> SEPTransactionClient::runCommand(StringData dbName, BSONObj
     invariant(!haveClient());
     auto client = _serviceContext->makeClient("SEP-internal-txn-client");
     AlternativeClientRegion clientRegion(client);
-    auto opCtxHolder = cc().makeOperationContext();
+    // Note that _token is only cancelled once the caller of the transaction no longer cares about
+    // its result, so CancelableOperationContexts only being interrupted by ErrorCodes::Interrupted
+    // shouldn't impact any upstream retry logic.
+    CancelableOperationContextFactory opCtxFactory(_token, _executor);
+    auto cancellableOpCtx = opCtxFactory.makeOperationContext(&cc());
     primeInternalClient(&cc());
 
     auto opMsgRequest = OpMsgRequest::fromDBAndBody(dbName, cmdBuilder.obj());
     auto requestMessage = opMsgRequest.serialize();
-    return _behaviors->handleRequest(opCtxHolder.get(), requestMessage)
+    return _behaviors->handleRequest(cancellableOpCtx.get(), requestMessage)
         .then([this](DbResponse dbResponse) {
             auto reply = rpc::makeReply(&dbResponse.response)->getCommandReply().getOwned();
             _hooks->runReplyHook(reply);
@@ -383,7 +388,7 @@ SemiFuture<std::vector<BSONObj>> SEPTransactionClient::exhaustiveFind(
                     // an error upon fetching more documents.
                     return result != ErrorCodes::InternalTransactionsExhaustiveFindHasMore;
                 })
-                .on(_executor, CancellationToken::uncancelable())
+                .on(_executor, _token)
                 .then([response = std::move(response)] { return std::move(*response); });
         })
         .semi();
@@ -494,6 +499,29 @@ int getMaxRetries() {
                                                                                 : kTxnRetryLimit;
 }
 
+bool isLocalTransactionFatalResult(const StatusWith<CommitResult>& swResult) {
+    // If the local node is shutting down all retries would fail and if the node has failed over,
+    // retries could eventually succeed on the new primary, but we want to prevent that since
+    // whatever command that ran the internal transaction will fail with this error and may be
+    // retried itself.
+    auto isLocalFatalStatus = [](Status status) -> bool {
+        return status.isA<ErrorCategory::NotPrimaryError>() ||
+            status.isA<ErrorCategory::ShutdownError>();
+    };
+
+    if (!swResult.isOK()) {
+        return isLocalFatalStatus(swResult.getStatus());
+    }
+    return isLocalFatalStatus(swResult.getValue().getEffectiveStatus());
+}
+
+// True if the transaction is running entirely against the local node, e.g. a single replica set
+// transaction on a mongod. False for remote transactions from a mongod or all transactions from a
+// mongos.
+bool isRunningLocalTransaction(const TransactionClient& txnClient) {
+    return !isMongos() && !txnClient.runsClusterOperations();
+}
+
 Transaction::ErrorHandlingStep Transaction::handleError(const StatusWith<CommitResult>& swResult,
                                                         int attemptCounter) const noexcept {
     stdx::lock_guard<Latch> lg(_mutex);
@@ -513,6 +541,11 @@ Transaction::ErrorHandlingStep Transaction::handleError(const StatusWith<CommitR
         return ErrorHandlingStep::kDoNotRetry;
     }
 
+    // If we're running locally, some errors mean we should not retry, like a failover or shutdown.
+    if (isRunningLocalTransaction(*_txnClient) && isLocalTransactionFatalResult(swResult)) {
+        return ErrorHandlingStep::kDoNotRetry;
+    }
+
     // If the op has a deadline, retry until it is reached regardless of the number of attempts.
     if (attemptCounter > getMaxRetries() && !_opDeadline) {
         return _isInCommit() ? ErrorHandlingStep::kDoNotRetry
diff --git a/src/mongo/db/transaction_api.h b/src/mongo/db/transaction_api.h
index d8d7c34e604..830ec9a7b2f 100644
--- a/src/mongo/db/transaction_api.h
+++ b/src/mongo/db/transaction_api.h
@@ -91,7 +91,8 @@ public:
      * transaction metadata to requests and parsing it from responses. Must be called before any
      * commands have been sent and cannot be called more than once.
      */
-    virtual void injectHooks(std::unique_ptr<details::TxnMetadataHooks> hooks) = 0;
+    virtual void initialize(std::unique_ptr<details::TxnMetadataHooks> hooks,
+                            const CancellationToken& token) = 0;
 
     /**
      * Runs the given command as part of the transaction that owns this transaction client.
@@ -195,6 +196,7 @@ public:
     }
 
 private:
+    CancellationSource _source;
     std::unique_ptr<ResourceYielder> _resourceYielder;
     std::shared_ptr<details::TransactionWithRetries> _txn;
 };
@@ -260,14 +262,17 @@ public:
                          std::unique_ptr<SEPTransactionClientBehaviors> behaviors)
         : _serviceContext(opCtx->getServiceContext()),
           _executor(executor),
+          _token(CancellationToken::uncancelable()),
           _behaviors(std::move(behaviors)) {}
 
     SEPTransactionClient(const SEPTransactionClient&) = delete;
     SEPTransactionClient operator=(const SEPTransactionClient&) = delete;
 
-    virtual void injectHooks(std::unique_ptr<details::TxnMetadataHooks> hooks) override {
+    virtual void initialize(std::unique_ptr<details::TxnMetadataHooks> hooks,
+                            const CancellationToken& token) override {
         invariant(!_hooks);
         _hooks = std::move(hooks);
+        _token = token;
     }
 
     virtual SemiFuture<BSONObj> runCommand(StringData dbName, BSONObj cmd) const override;
@@ -289,6 +294,7 @@ public:
 private:
     ServiceContext* const _serviceContext;
     std::shared_ptr<executor::TaskExecutor> _executor;
+    CancellationToken _token;
     std::unique_ptr<SEPTransactionClientBehaviors> _behaviors;
     std::unique_ptr<details::TxnMetadataHooks> _hooks;
 };
@@ -323,12 +329,13 @@ public:
      */
     Transaction(OperationContext* opCtx,
                 std::shared_ptr<executor::TaskExecutor> executor,
+                const CancellationToken& token,
                 std::unique_ptr<TransactionClient> txnClient)
         : _executor(executor),
           _txnClient(std::move(txnClient)),
           _service(opCtx->getServiceContext()) {
         _primeTransaction(opCtx);
-        _txnClient->injectHooks(_makeTxnMetadataHooks());
+        _txnClient->initialize(_makeTxnMetadataHooks(), token);
     }
 
     /**
@@ -483,9 +490,11 @@ public:
 
     TransactionWithRetries(OperationContext* opCtx,
                            std::shared_ptr<executor::TaskExecutor> executor,
+                           const CancellationToken& token,
                            std::unique_ptr<TransactionClient> txnClient)
-        : _internalTxn(std::make_shared<Transaction>(opCtx, executor, std::move(txnClient))),
-          _executor(executor) {}
+        : _internalTxn(std::make_shared<Transaction>(opCtx, executor, token, std::move(txnClient))),
+          _executor(executor),
+          _token(token) {}
 
     /**
      * Returns a bundle with the commit command status and write concern error, if any. Any error
@@ -518,6 +527,7 @@ private:
 
     std::shared_ptr<Transaction> _internalTxn;
     std::shared_ptr<executor::TaskExecutor> _executor;
+    CancellationToken _token;
 };
 
 }  // namespace details
diff --git a/src/mongo/db/transaction_api_test.cpp b/src/mongo/db/transaction_api_test.cpp
index 0418bd22ca6..bcfb8ba5815 100644
--- a/src/mongo/db/transaction_api_test.cpp
+++ b/src/mongo/db/transaction_api_test.cpp
@@ -142,7 +142,8 @@ class MockTransactionClient : public SEPTransactionClient {
 public:
     using SEPTransactionClient::SEPTransactionClient;
 
-    virtual void injectHooks(std::unique_ptr<TxnMetadataHooks> hooks) override {
+    virtual void initialize(std::unique_ptr<TxnMetadataHooks> hooks,
+                            const CancellationToken& token) override {
         _hooks = std::move(hooks);
     }
 
@@ -205,6 +206,7 @@ private:
     mutable StatusWith<BSONObj> _lastResponse{BSONObj()};
     mutable std::queue<StatusWith<BSONObj>> _responses;
     mutable std::vector<BSONObj> _sentRequests;
+    bool _runningLocalTransaction{false};
 };
 
 }  // namespace txn_api::details
@@ -329,9 +331,15 @@ protected:
         _mockClient = mockClient.get();
         _txnWithRetries = std::make_unique<txn_api::SyncTransactionWithRetries>(
             opCtx(), _executor, nullptr /* resourceYielder */, std::move(mockClient));
+
+        // The bulk of the API tests are for the non-local transaction cases, so set isMongos=true
+        // by default.
+        setMongos(true);
     }
 
     void tearDown() override {
+        setMongos(false);
+
         _executor->shutdown();
         _executor->join();
         _executor.reset();
@@ -406,7 +414,8 @@ private:
 
 class MockClusterOperationTransactionClient : public txn_api::TransactionClient {
 public:
-    virtual void injectHooks(std::unique_ptr<txn_api::details::TxnMetadataHooks> hooks) {}
+    virtual void initialize(std::unique_ptr<txn_api::details::TxnMetadataHooks> hooks,
+                            const CancellationToken& token) {}
 
     virtual SemiFuture<BSONObj> runCommand(StringData dbName, BSONObj cmd) const {
         MONGO_UNREACHABLE;
@@ -1944,6 +1953,9 @@ TEST_F(TxnAPITest, CanBeUsedWithinShardedOperationsIfClientSupportsIt) {
 }
 
 TEST_F(TxnAPITest, DoNotAllowCrossShardTransactionsOnShardWhenInClientTransaction) {
+    setMongos(false);
+    ON_BLOCK_EXIT([&] { setMongos(true); });
+
     opCtx()->setLogicalSessionId(makeLogicalSessionIdForTest());
     opCtx()->setTxnNumber(5);
     opCtx()->setInMultiDocumentTransaction();
@@ -1954,6 +1966,9 @@ TEST_F(TxnAPITest, DoNotAllowCrossShardTransactionsOnShardWhenInClientTransactio
 }
 
 TEST_F(TxnAPITest, DoNotAllowCrossShardTransactionsOnShardWhenInRetryableWrite) {
+    setMongos(false);
+    ON_BLOCK_EXIT([&] { setMongos(true); });
+
     opCtx()->setLogicalSessionId(makeLogicalSessionIdForTest());
     opCtx()->setTxnNumber(5);
     ASSERT_THROWS_CODE(
@@ -1963,21 +1978,170 @@ TEST_F(TxnAPITest, DoNotAllowCrossShardTransactionsOnShardWhenInRetryableWrite)
 }
 
 TEST_F(TxnAPITest, AllowCrossShardTransactionsOnMongosWhenInRetryableWrite) {
+    setMongos(true);
+    ON_BLOCK_EXIT([&] { setMongos(false); });
+
     opCtx()->setLogicalSessionId(makeLogicalSessionIdForTest());
     opCtx()->setTxnNumber(5);
-    setMongos(true);
     resetTxnWithRetriesWithClient(std::make_unique<MockClusterOperationTransactionClient>());
-    setMongos(false);
 }
 
 TEST_F(TxnAPITest, AllowCrossShardTransactionsOnMongosWhenInClientTransaction) {
+    setMongos(true);
+    ON_BLOCK_EXIT([&] { setMongos(false); });
+
     opCtx()->setLogicalSessionId(makeLogicalSessionIdForTest());
     opCtx()->setTxnNumber(5);
     opCtx()->setInMultiDocumentTransaction();
-    setMongos(true);
     resetTxnWithRetriesWithClient(std::make_unique<MockClusterOperationTransactionClient>());
+}
+
+TEST_F(TxnAPITest, FailoverAndShutdownErrorsAreFatalForLocalTransactionBodyError) {
     setMongos(false);
+    ON_BLOCK_EXIT([&] { setMongos(true); });
+    auto runTest = [&](bool expectSuccess, Status status) {
+        resetTxnWithRetries();
+
+        int attempt = -1;
+        auto swResult = txnWithRetries().runNoThrow(
+            opCtx(), [&](const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) {
+                attempt += 1;
+
+                mockClient()->setNextCommandResponse(kOKInsertResponse);
+                auto insertRes = txnClient
+                                     .runCommand("user"_sd,
+                                                 BSON("insert"
+                                                      << "foo"
+                                                      << "documents" << BSON_ARRAY(BSON("x" << 1))))
+                                     .get();
+                ASSERT_OK(getStatusFromWriteCommandReply(insertRes));
+
+                // Only throw once to verify the API gives up right away.
+                if (attempt == 0) {
+                    uassertStatusOK(status);
+                }
+                // The commit response.
+                mockClient()->setNextCommandResponse(kOKCommandResponse);
+                return SemiFuture<void>::makeReady();
+            });
+        if (!expectSuccess) {
+            ASSERT_EQ(swResult.getStatus(), status);
+
+            // The API should have returned without trying to abort.
+            auto lastRequest = mockClient()->getLastSentRequest();
+            ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "insert"_sd);
+        } else {
+            ASSERT(swResult.getStatus().isOK());
+            ASSERT(swResult.getValue().getEffectiveStatus().isOK());
+            auto lastRequest = mockClient()->getLastSentRequest();
+            ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+        }
+    };
+
+    runTest(false, Status(ErrorCodes::InterruptedDueToReplStateChange, "mock repl change error"));
+    runTest(false, Status(ErrorCodes::InterruptedAtShutdown, "mock shutdown error"));
+
+    // Verify the fatal for local logic doesn't apply to all transient or retriable errors.
+    runTest(true, Status(ErrorCodes::HostUnreachable, "mock transient error"));
 }
 
+TEST_F(TxnAPITest, FailoverAndShutdownErrorsAreFatalForLocalTransactionCommandError) {
+    setMongos(false);
+    ON_BLOCK_EXIT([&] { setMongos(true); });
+    auto runTest = [&](bool expectSuccess, Status status) {
+        resetTxnWithRetries();
+
+        int attempt = -1;
+        auto swResult = txnWithRetries().runNoThrow(
+            opCtx(), [&](const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) {
+                attempt += 1;
+
+                mockClient()->setNextCommandResponse(kOKInsertResponse);
+                auto insertRes = txnClient
+                                     .runCommand("user"_sd,
+                                                 BSON("insert"
+                                                      << "foo"
+                                                      << "documents" << BSON_ARRAY(BSON("x" << 1))))
+                                     .get();
+                ASSERT_OK(getStatusFromWriteCommandReply(insertRes));
+
+                // The commit response.
+                mockClient()->setNextCommandResponse(BSON("ok" << 0 << "code" << status.code()));
+                mockClient()->setNextCommandResponse(kOKCommandResponse);
+                return SemiFuture<void>::makeReady();
+            });
+        if (!expectSuccess) {
+            ASSERT(swResult.getStatus().isOK());
+            ASSERT_EQ(swResult.getValue().cmdStatus, status);
+            ASSERT(swResult.getValue().wcError.toStatus().isOK());
+
+            // The API should have returned without trying to abort.
+            auto lastRequest = mockClient()->getLastSentRequest();
+            ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+        } else {
+            ASSERT(swResult.getStatus().isOK());
+            ASSERT(swResult.getValue().getEffectiveStatus().isOK());
+            auto lastRequest = mockClient()->getLastSentRequest();
+            ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+        }
+    };
+
+    runTest(false, Status(ErrorCodes::InterruptedDueToReplStateChange, "mock repl change error"));
+    runTest(false, Status(ErrorCodes::InterruptedAtShutdown, "mock shutdown error"));
+
+    // Verify the fatal for local logic doesn't apply to all transient or retriable errors.
+    runTest(true, Status(ErrorCodes::HostUnreachable, "mock retriable error"));
+}
+
+TEST_F(TxnAPITest, FailoverAndShutdownErrorsAreFatalForLocalTransactionWCError) {
+    setMongos(false);
+    ON_BLOCK_EXIT([&] { setMongos(true); });
+    auto runTest = [&](bool expectSuccess, Status status) {
+        resetTxnWithRetries();
+
+        int attempt = -1;
+        auto swResult = txnWithRetries().runNoThrow(
+            opCtx(), [&](const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) {
+                attempt += 1;
+
+                mockClient()->setNextCommandResponse(kOKInsertResponse);
+                auto insertRes = txnClient
+                                     .runCommand("user"_sd,
+                                                 BSON("insert"
+                                                      << "foo"
+                                                      << "documents" << BSON_ARRAY(BSON("x" << 1))))
+                                     .get();
+                ASSERT_OK(getStatusFromWriteCommandReply(insertRes));
+
+                // The commit response.
+                auto wcError = BSON("code" << status.code() << "errmsg"
+                                           << "mock");
+                auto resWithWCError = BSON("ok" << 1 << "writeConcernError" << wcError);
+                mockClient()->setNextCommandResponse(resWithWCError);
+                mockClient()->setNextCommandResponse(kOKCommandResponse);
+                return SemiFuture<void>::makeReady();
+            });
+        if (!expectSuccess) {
+            ASSERT(swResult.getStatus().isOK());
+            ASSERT(swResult.getValue().cmdStatus.isOK());
+            ASSERT_EQ(swResult.getValue().wcError.toStatus(), status);
+
+            // The API should have returned without trying to abort.
+            auto lastRequest = mockClient()->getLastSentRequest();
+            ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+        } else {
+            ASSERT(swResult.getStatus().isOK());
+            ASSERT(swResult.getValue().getEffectiveStatus().isOK());
+            auto lastRequest = mockClient()->getLastSentRequest();
+            ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+        }
+    };
+
+    runTest(false, Status(ErrorCodes::InterruptedDueToReplStateChange, "mock repl change error"));
+    runTest(false, Status(ErrorCodes::InterruptedAtShutdown, "mock shutdown error"));
+
+    // Verify the fatal for local logic doesn't apply to all transient or retriable errors.
+    runTest(true, Status(ErrorCodes::HostUnreachable, "mock retriable error"));
+}
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index 04412b5df11..a4d31c366d4 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -207,8 +207,15 @@ struct ActiveTransactionHistory {
 ActiveTransactionHistory fetchActiveTransactionHistory(OperationContext* opCtx,
                                                        const LogicalSessionId& lsid,
                                                        bool fetchOplogEntries) {
-    // Storage engine operations require at least Global IS.
-    Lock::GlobalLock lk(opCtx, MODE_IS);
+    // FlowControl is only impacted when a MODE_IX global lock is acquired. If we are in a
+    // multi-document transaction, we must acquire a MODE_IX global lock. Prevent obtaining a flow
+    // control ticket while in a mutli-document transaction.
+    FlowControl::Bypass flowControlBypass(opCtx);
+
+    // Storage engine operations require at a least global MODE_IS lock. In multi-document
+    // transactions, storage opeartions require at least a global MODE_IX lock. Prevent lock
+    // upgrading in the case of a multi-document transaction.
+    Lock::GlobalLock lk(opCtx, opCtx->inMultiDocumentTransaction() ? MODE_IX : MODE_IS);
 
     ActiveTransactionHistory result;
 
@@ -612,6 +619,17 @@ TransactionParticipant::getOldestActiveTimestamp(Timestamp stableTimestamp) {
     }
 }
 
+boost::optional<TxnNumber> TransactionParticipant::Observer::getClientTxnNumber(
+    const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) const {
+    if (_isInternalSessionForNonRetryableWrite()) {
+        return boost::none;
+    } else if (_isInternalSessionForRetryableWrite()) {
+        invariant(_sessionId().getTxnNumber());
+        return _sessionId().getTxnNumber();
+    }
+    return {txnNumberAndRetryCounter.getTxnNumber()};
+}
+
 Session* TransactionParticipant::Observer::_session() const {
     return getTransactionParticipant.owner(_tp);
 }
@@ -652,29 +670,73 @@ boost::optional<TxnNumber> TransactionParticipant::Observer::_activeRetryableWri
 
 void TransactionParticipant::Participant::_uassertNoConflictingInternalTransactionForRetryableWrite(
     OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) {
+    auto clientTxnNumber = getClientTxnNumber(txnNumberAndRetryCounter);
+    if (!clientTxnNumber) {
+        // This must be a non-retryable child session transaction so there can't be a conflict.
+        return;
+    }
+
     auto& retryableWriteTxnParticipantCatalog =
         getRetryableWriteTransactionParticipantCatalog(opCtx);
-    invariant(retryableWriteTxnParticipantCatalog.isValid());
+    retryableWriteTxnParticipantCatalog.checkForConflictingInternalTransactions(
+        opCtx, *clientTxnNumber, txnNumberAndRetryCounter);
+}
 
-    for (const auto& it : retryableWriteTxnParticipantCatalog.getParticipants()) {
-        const auto& txnParticipant = it.second;
+bool TransactionParticipant::Participant::_verifyCanBeginMultiDocumentTransaction(
+    OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) {
+    if (txnNumberAndRetryCounter.getTxnNumber() ==
+        o().activeTxnNumberAndRetryCounter.getTxnNumber()) {
+        if (txnNumberAndRetryCounter.getTxnRetryCounter() <
+            o().activeTxnNumberAndRetryCounter.getTxnRetryCounter()) {
+            uasserted(
+                TxnRetryCounterTooOldInfo(*o().activeTxnNumberAndRetryCounter.getTxnRetryCounter()),
+                str::stream() << "Cannot start a transaction at given transaction number "
+                              << txnNumberAndRetryCounter.getTxnNumber() << " on session "
+                              << _sessionId() << " using txnRetryCounter "
+                              << txnNumberAndRetryCounter.getTxnRetryCounter()
+                              << " because it has already been restarted using a "
+                              << "higher txnRetryCounter "
+                              << o().activeTxnNumberAndRetryCounter.getTxnRetryCounter());
+        } else if (txnNumberAndRetryCounter.getTxnRetryCounter() ==
+                       o().activeTxnNumberAndRetryCounter.getTxnRetryCounter() ||
+                   o().activeTxnNumberAndRetryCounter.getTxnRetryCounter() ==
+                       kUninitializedTxnRetryCounter) {
+            // Servers in a sharded cluster can start a new transaction at the active transaction
+            // number to allow internal retries by routers on re-targeting errors, like
+            // StaleShard/DatabaseVersion or SnapshotTooOld.
+            uassert(ErrorCodes::ConflictingOperationInProgress,
+                    "Only servers in a sharded cluster can start a new transaction at the active "
+                    "transaction number",
+                    serverGlobalParams.clusterRole != ClusterRole::None);
 
-        if (txnParticipant._sessionId() == opCtx->getLogicalSessionId() ||
-            !txnParticipant._isInternalSessionForRetryableWrite()) {
-            continue;
-        }
+            if (_isInternalSessionForRetryableWrite() &&
+                o().txnState.isInSet(TransactionState::kCommitted)) {
+                // This is a retry of a committed internal transaction for retryable writes so
+                // skip resetting the state and updating the metrics.
+                return true;
+            }
 
-        uassert(ErrorCodes::RetryableTransactionInProgress,
-                str::stream() << "Cannot run retryable write with session id " << _sessionId()
-                              << " and transaction number "
-                              << txnNumberAndRetryCounter.getTxnNumber()
-                              << " because it is being executed in a retryable internal transaction"
-                              << " with session id " << txnParticipant._sessionId()
-                              << " and transaction number "
-                              << txnParticipant.getActiveTxnNumberAndRetryCounter().getTxnNumber()
-                              << " in state " << txnParticipant.o().txnState,
-                !txnParticipant.transactionIsOpen());
+            _uassertCanReuseActiveTxnNumberForTransaction(opCtx);
+        } else {
+            const auto restartableStates = TransactionState::kNone | TransactionState::kInProgress |
+                TransactionState::kAbortedWithoutPrepare | TransactionState::kAbortedWithPrepare;
+            uassert(ErrorCodes::IllegalOperation,
+                    str::stream() << "Cannot restart transaction "
+                                  << txnNumberAndRetryCounter.getTxnNumber()
+                                  << " using txnRetryCounter "
+                                  << txnNumberAndRetryCounter.getTxnRetryCounter()
+                                  << " because it is already in state " << o().txnState
+                                  << " with txnRetryCounter "
+                                  << o().activeTxnNumberAndRetryCounter.getTxnRetryCounter(),
+                    o().txnState.isInSet(restartableStates));
+        }
+    } else {
+        invariant(txnNumberAndRetryCounter.getTxnNumber() >
+                  o().activeTxnNumberAndRetryCounter.getTxnNumber());
     }
+
+    _uassertNoConflictingInternalTransactionForRetryableWrite(opCtx, txnNumberAndRetryCounter);
+    return false;
 }
 
 void TransactionParticipant::Participant::_uassertCanReuseActiveTxnNumberForTransaction(
@@ -808,57 +870,6 @@ void TransactionParticipant::Participant::_continueMultiDocumentTransaction(
 
 void TransactionParticipant::Participant::_beginMultiDocumentTransaction(
     OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) {
-    if (txnNumberAndRetryCounter.getTxnNumber() ==
-        o().activeTxnNumberAndRetryCounter.getTxnNumber()) {
-        if (txnNumberAndRetryCounter.getTxnRetryCounter() <
-            o().activeTxnNumberAndRetryCounter.getTxnRetryCounter()) {
-            uasserted(
-                TxnRetryCounterTooOldInfo(*o().activeTxnNumberAndRetryCounter.getTxnRetryCounter()),
-                str::stream() << "Cannot start a transaction at given transaction number "
-                              << txnNumberAndRetryCounter.getTxnNumber() << " on session "
-                              << _sessionId() << " using txnRetryCounter "
-                              << txnNumberAndRetryCounter.getTxnRetryCounter()
-                              << " because it has already been restarted using a "
-                              << "higher txnRetryCounter "
-                              << o().activeTxnNumberAndRetryCounter.getTxnRetryCounter());
-        } else if (txnNumberAndRetryCounter.getTxnRetryCounter() ==
-                       o().activeTxnNumberAndRetryCounter.getTxnRetryCounter() ||
-                   o().activeTxnNumberAndRetryCounter.getTxnRetryCounter() ==
-                       kUninitializedTxnRetryCounter) {
-            // Servers in a sharded cluster can start a new transaction at the active transaction
-            // number to allow internal retries by routers on re-targeting errors, like
-            // StaleShard/DatabaseVersion or SnapshotTooOld.
-            uassert(ErrorCodes::ConflictingOperationInProgress,
-                    "Only servers in a sharded cluster can start a new transaction at the active "
-                    "transaction number",
-                    serverGlobalParams.clusterRole != ClusterRole::None);
-
-            if (_isInternalSessionForRetryableWrite() &&
-                o().txnState.isInSet(TransactionState::kCommitted)) {
-                // This is a retry of a committed internal transaction for retryable writes so
-                // skip resetting the state and updating the metrics.
-                return;
-            }
-
-            _uassertCanReuseActiveTxnNumberForTransaction(opCtx);
-        } else {
-            const auto restartableStates = TransactionState::kNone | TransactionState::kInProgress |
-                TransactionState::kAbortedWithoutPrepare | TransactionState::kAbortedWithPrepare;
-            uassert(ErrorCodes::IllegalOperation,
-                    str::stream() << "Cannot restart transaction "
-                                  << txnNumberAndRetryCounter.getTxnNumber()
-                                  << " using txnRetryCounter "
-                                  << txnNumberAndRetryCounter.getTxnRetryCounter()
-                                  << " because it is already in state " << o().txnState
-                                  << " with txnRetryCounter "
-                                  << o().activeTxnNumberAndRetryCounter.getTxnRetryCounter(),
-                    o().txnState.isInSet(restartableStates));
-        }
-    } else {
-        invariant(txnNumberAndRetryCounter.getTxnNumber() >
-                  o().activeTxnNumberAndRetryCounter.getTxnNumber());
-    }
-
     // Aborts any in-progress txns.
     _setNewTxnNumberAndRetryCounter(opCtx, txnNumberAndRetryCounter);
     p().autoCommit = false;
@@ -1008,6 +1019,13 @@ void TransactionParticipant::Participant::beginOrContinue(
     // an argument on the request. The 'startTransaction' argument currently can only be specified
     // as true, which is verified earlier, when parsing the request.
     invariant(*startTransaction);
+
+    auto isRetry = _verifyCanBeginMultiDocumentTransaction(opCtx, txnNumberAndRetryCounter);
+    if (isRetry) {
+        // This is a retry for the active transaction, so we don't throw, and we also don't need to
+        // start the transaction since that already happened.
+        return;
+    }
     _beginMultiDocumentTransaction(opCtx, txnNumberAndRetryCounter);
 }
 
@@ -2722,6 +2740,12 @@ void TransactionParticipant::Participant::_setNewTxnNumberAndRetryCounter(
     if (o().txnState.isInProgress()) {
         _abortTransactionOnSession(opCtx);
     }
+    // If txnNumber ordering applies, abort any child transactions with a lesser txnNumber.
+    auto clientTxnNumber = getClientTxnNumber(txnNumberAndRetryCounter);
+    if (clientTxnNumber.has_value()) {
+        getRetryableWriteTransactionParticipantCatalog(opCtx).abortSupersededTransactions(
+            opCtx, *clientTxnNumber);
+    }
 
     stdx::unique_lock<Client> lk(*opCtx->getClient());
     o(lk).activeTxnNumberAndRetryCounter = txnNumberAndRetryCounter;
@@ -2753,8 +2777,8 @@ void RetryableWriteTransactionParticipantCatalog::addParticipant(
     invariant(*txnNumber >= _activeTxnNumber);
 
     if (txnNumber > _activeTxnNumber) {
+        reset();
         _activeTxnNumber = *txnNumber;
-        _participants.clear();
     }
     if (auto it = _participants.find(participant._sessionId()); it != _participants.end()) {
         invariant(it->second._tp == participant._tp);
@@ -2766,6 +2790,7 @@ void RetryableWriteTransactionParticipantCatalog::addParticipant(
 void RetryableWriteTransactionParticipantCatalog::reset() {
     _activeTxnNumber = kUninitializedTxnNumber;
     _participants.clear();
+    _hasSeenIncomingConflictingRetryableTransaction = false;
 }
 
 void RetryableWriteTransactionParticipantCatalog::markAsValid() {
@@ -2786,6 +2811,94 @@ bool RetryableWriteTransactionParticipantCatalog::isValid() const {
            });
 }
 
+void RetryableWriteTransactionParticipantCatalog::checkForConflictingInternalTransactions(
+    OperationContext* opCtx,
+    TxnNumber incomingClientTxnNumber,
+    const TxnNumberAndRetryCounter& incomingTxnNumberAndRetryCounter) {
+    invariant(isValid());
+
+    for (auto&& it : _participants) {
+        auto& sessionId = it.first;
+        auto& txnParticipant = it.second;
+
+        if (sessionId == opCtx->getLogicalSessionId() ||
+            !txnParticipant._isInternalSessionForRetryableWrite()) {
+            continue;
+        }
+
+        if (!txnParticipant.transactionIsOpen()) {
+            // The transaction isn't open, so it can't conflict with an incoming transaction.
+            continue;
+        }
+
+        auto clientTxnNumber =
+            txnParticipant.getClientTxnNumber(txnParticipant.getActiveTxnNumberAndRetryCounter());
+        invariant(clientTxnNumber.has_value());
+        if (*clientTxnNumber < incomingClientTxnNumber) {
+            // To match the behavior of client transactions when a logically earlier prepared
+            // transaction is in progress, throw an error to block the new transaction until the
+            // earlier one exists prepare.
+            uassert(ErrorCodes::RetryableTransactionInProgress,
+                    "Operation conflicts with an earlier retryable transaction in prepare",
+                    !txnParticipant.transactionIsPrepared());
+
+            // Otherwise skip this transaction because it will be aborted when this one begins.
+            continue;
+        }
+
+        if (!_hasSeenIncomingConflictingRetryableTransaction &&
+            txnParticipant.transactionIsInProgress()) {
+            // Only abort when the transaction is in progress since other states may not be safe,
+            // e.g. prepare.
+            _hasSeenIncomingConflictingRetryableTransaction = true;
+            txnParticipant._abortTransactionOnSession(opCtx);
+        } else {
+            uassert(
+                ErrorCodes::RetryableTransactionInProgress,
+                str::stream() << "Cannot run operation with session id "
+                              << opCtx->getLogicalSessionId() << " and transaction number "
+                              << incomingTxnNumberAndRetryCounter.getTxnNumber()
+                              << " because it conflicts with an active operation with session id "
+                              << sessionId << " and transaction number "
+                              << txnParticipant.getActiveTxnNumberAndRetryCounter().getTxnNumber()
+                              << " in state " << txnParticipant.o().txnState,
+                !txnParticipant.transactionIsOpen());
+        }
+    }
+}
+
+void RetryableWriteTransactionParticipantCatalog::abortSupersededTransactions(
+    OperationContext* opCtx, TxnNumber incomingClientTxnNumber) {
+    if (!isValid()) {
+        // This was called while refreshing from storage or applying ops on a secondary, so skip it.
+        return;
+    }
+
+    for (auto&& it : _participants) {
+        auto& sessionId = it.first;
+        auto& txnParticipant = it.second;
+
+        if (sessionId == opCtx->getLogicalSessionId() ||
+            !txnParticipant._isInternalSessionForRetryableWrite()) {
+            continue;
+        }
+
+        // We should never try to abort a prepared transaction. We should have earlier thrown either
+        // RetryableTransactionInProgress or PreparedTransactionInProgress.
+        invariant(!txnParticipant.transactionIsPrepared(),
+                  str::stream() << "Transaction on session " << sessionId
+                                << " unexpectedly in prepare");
+
+        auto clientTxnNumber =
+            txnParticipant.getClientTxnNumber(txnParticipant.getActiveTxnNumberAndRetryCounter());
+        invariant(clientTxnNumber.has_value());
+        if (*clientTxnNumber < incomingClientTxnNumber &&
+            txnParticipant.transactionIsInProgress()) {
+            txnParticipant._abortTransactionOnSession(opCtx);
+        }
+    }
+}
+
 void TransactionParticipant::Participant::refreshFromStorageIfNeeded(OperationContext* opCtx) {
     return _refreshFromStorageIfNeeded(opCtx, true);
 }
diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h
index fa5b98757af..e584960b1c8 100644
--- a/src/mongo/db/transaction_participant.h
+++ b/src/mongo/db/transaction_participant.h
@@ -359,6 +359,17 @@ public:
          */
         void reportUnstashedState(OperationContext* opCtx, BSONObjBuilder* builder) const;
 
+        /**
+         * Returns the transaction number associated with the client operation that spawned this
+         * transaction. ie the top-level txnNumber for a retryable write or client transaction or
+         * the txnNumber in the session id for a retryable transaction. The passed in
+         * txnNumberAndRetryCounter should be the active txnNumberAndRetryCounter of this
+         * participant. This must be provided so this method can be used before the participant's
+         * active txnNumberAndRetryCounter has been changed.
+         */
+        boost::optional<TxnNumber> getClientTxnNumber(
+            const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) const;
+
     protected:
         explicit Observer(TransactionParticipant* tp) : _tp(tp) {}
 
@@ -919,6 +930,12 @@ public:
         // byzantine messages, this check should never fail.
         void _uassertCanReuseActiveTxnNumberForTransaction(OperationContext* opCtx);
 
+        // Verifies we can begin a multi document transaction with the given txnNumber and
+        // txnRetryCounter. Throws if we cannot. Returns true if this is a retry of the active
+        // transaction and false otherwise.
+        bool _verifyCanBeginMultiDocumentTransaction(
+            OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter);
+
         // Attempt to begin or retry a retryable write at the given transaction number.
         void _beginOrContinueRetryableWrite(
             OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter);
@@ -1250,10 +1267,35 @@ public:
      */
     bool isValid() const;
 
+    /**
+     * If a transaction in the catalog conflicts with the incoming transaction and this is the first
+     * time that has happened, the conflicting transaction is aborted, on the assumption that the
+     * new transaction is likely from a fresher client and the client of the conflicting transaction
+     * has give up (e.g. crashed). To prevent livelocks if both clients are alive and retrying,
+     * RetryableTransactionInProgress is thrown on subsequent calls, forcing the incoming
+     * transaction to wait for the conflicting to complete.
+     */
+    void checkForConflictingInternalTransactions(
+        OperationContext* opCtx,
+        TxnNumber incomingClientTxnNumber,
+        const TxnNumberAndRetryCounter& incomingTxnNumberAndRetryCounter);
+
+    /**
+     * Aborts any child transactions that are logically superseded by the incoming transaction, ie
+     * retryable transactions where the txnNumber in their session id < the top-level txnNumber for
+     * a retryable write / client transaction or the session id txnNumber for a retryable
+     * transaction.
+     */
+    void abortSupersededTransactions(OperationContext* opCtx, TxnNumber incomingClientTxnNumber);
+
 private:
     TxnNumber _activeTxnNumber{kUninitializedTxnNumber};
     LogicalSessionIdMap<TransactionParticipant::Participant> _participants;
     bool _isValid{false};
+
+    // Set true after an incoming retryable transaction has conflicted with an open transaction in
+    // this catalog.
+    bool _hasSeenIncomingConflictingRetryableTransaction{false};
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp
index 736020b0bf1..ee477f77463 100644
--- a/src/mongo/db/transaction_participant_test.cpp
+++ b/src/mongo/db/transaction_participant_test.cpp
@@ -4874,18 +4874,44 @@ TEST_F(ShardTxnParticipantTest,
     ASSERT_TRUE(txnParticipant.transactionIsInProgress());
 }
 
-TEST_F(ShardTxnParticipantTest,
-       CannotRetryInProgressTransactionForRetryableWrite_ConflictingTransactionForRetryableWrite) {
+TEST_F(ShardTxnParticipantTest, CannotRetryInProgressRetryableTxn_ConflictingRetryableTxn) {
     const auto parentLsid = makeLogicalSessionIdForTest();
     const auto parentTxnNumber = *opCtx()->getTxnNumber();
 
     opCtx()->setLogicalSessionId(
         makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
-    auto sessionCheckout = checkOutSession();
-    auto txnParticipant = TransactionParticipant::get(opCtx());
-    ASSERT_TRUE(txnParticipant.transactionIsInProgress());
-    OperationContextSession::checkIn(opCtx(), OperationContextSession::CheckInReason::kDone);
+    {
+        auto sessionCheckout = checkOutSession();
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    }
+
+    // The first conflicting transaction should abort the active one.
+    const auto firstConflictingLsid =
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+    runFunctionFromDifferentOpCtx([firstConflictingLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(firstConflictingLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(
+            newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+        txnParticipant.unstashTransactionResources(newOpCtx, "insert");
+        txnParticipant.stashTransactionResources(newOpCtx);
+    });
 
+    // Continuing the interrupted transaction should throw without aborting the new active
+    // transaction.
+    {
+        ASSERT_THROWS_CODE(checkOutSession(boost::none /* startNewTxn */),
+                           AssertionException,
+                           ErrorCodes::RetryableTransactionInProgress);
+    }
+
+    // A second conflicting transaction should throw and not abort the active one.
     runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
         newOpCtx->setLogicalSessionId(
             makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
@@ -4900,20 +4926,71 @@ TEST_F(ShardTxnParticipantTest,
                            ErrorCodes::RetryableTransactionInProgress);
     });
 
-    ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    // Verify the first conflicting txn is still open.
+    runFunctionFromDifferentOpCtx([firstConflictingLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(firstConflictingLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(
+            newOpCtx, {0}, false /* autocommit */, boost::none /* startTransaction */);
+        txnParticipant.unstashTransactionResources(newOpCtx, "insert");
+        ASSERT(txnParticipant.transactionIsInProgress());
+    });
 }
 
-TEST_F(ShardTxnParticipantTest,
-       CannotRetryInProgressTransactionForRetryableWrite_ConflictingRetryableWrite) {
+TEST_F(ShardTxnParticipantTest, CannotRetryInProgressRetryableTxn_ConflictingRetryableWrite) {
     const auto parentLsid = makeLogicalSessionIdForTest();
     const auto parentTxnNumber = *opCtx()->getTxnNumber();
 
     opCtx()->setLogicalSessionId(
         makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
-    auto sessionCheckout = checkOutSession();
-    auto txnParticipant = TransactionParticipant::get(opCtx());
-    ASSERT_TRUE(txnParticipant.transactionIsInProgress());
-    OperationContextSession::checkIn(opCtx(), OperationContextSession::CheckInReason::kDone);
+    {
+        auto sessionCheckout = checkOutSession();
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    }
+
+    //
+    // The first conflicting retryable write should abort a conflicting retryable transaction.
+    //
+    runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(parentLsid);
+        newOpCtx->setTxnNumber(parentTxnNumber);
+
+        // Shouldn't throw.
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(newOpCtx,
+                                       {parentTxnNumber},
+                                       boost::none /* autocommit */,
+                                       boost::none /* startTransaction */);
+    });
+
+    // Continuing the interrupted transaction should throw because it was aborted. Note this does
+    // not throw RetryableTransactionInProgress because the retryable write that aborted the
+    // transaction completed.
+    {
+        auto sessionCheckout = checkOutSession(boost::none /* startNewTxn */);
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        ASSERT_THROWS_CODE(txnParticipant.unstashTransactionResources(opCtx(), "insert"),
+                           AssertionException,
+                           ErrorCodes::NoSuchTransaction);
+    }
+
+    //
+    // The second conflicting retryable write should throw and not abort a conflicting retryable
+    // transaction.
+    //
+    {
+        auto sessionCheckout = checkOutSession();
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+        txnParticipant.unstashTransactionResources(opCtx(), "insert");
+        txnParticipant.stashTransactionResources(opCtx());
+    }
 
     runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
         newOpCtx->setLogicalSessionId(parentLsid);
@@ -4929,7 +5006,290 @@ TEST_F(ShardTxnParticipantTest,
                            ErrorCodes::RetryableTransactionInProgress);
     });
 
-    ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    {
+        auto sessionCheckout = checkOutSession(boost::none /* startNewTxn */);
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        txnParticipant.beginOrContinue(
+            opCtx(), {parentTxnNumber}, false /* autocommit */, boost::none /* startTransaction */);
+        txnParticipant.unstashTransactionResources(opCtx(), "insert");
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    }
+}
+
+TEST_F(ShardTxnParticipantTest, RetryableTransactionInProgressCounterResetsUponNewTxnNumber) {
+    const auto parentLsid = makeLogicalSessionIdForTest();
+    auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+    opCtx()->setLogicalSessionId(
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+    {
+        auto sessionCheckout = checkOutSession();
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    }
+
+    // The first conflicting transaction should abort the active one.
+    const auto firstConflictingLsid =
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+    runFunctionFromDifferentOpCtx([firstConflictingLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(firstConflictingLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(
+            newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+        ASSERT(txnParticipant.transactionIsInProgress());
+        txnParticipant.unstashTransactionResources(newOpCtx, "insert");
+        txnParticipant.stashTransactionResources(newOpCtx);
+    });
+
+    // A second conflicting transaction should throw and not abort the active one.
+    runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(
+            makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        ASSERT_THROWS_CODE(txnParticipant.beginOrContinue(
+                               newOpCtx, {0}, false /* autocommit */, true /* startTransaction */),
+                           AssertionException,
+                           ErrorCodes::RetryableTransactionInProgress);
+    });
+
+    // Advance the txnNumber and verify the first new conflicting transaction does not throw
+    // RetryableTransactionInProgress.
+
+    parentTxnNumber += 1;
+    const auto higherChildLsid =
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+    runFunctionFromDifferentOpCtx([higherChildLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(higherChildLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(
+            newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+        ASSERT(txnParticipant.transactionIsInProgress());
+    });
+
+    const auto higherFirstConflictingLsid =
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+    runFunctionFromDifferentOpCtx([higherFirstConflictingLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(higherFirstConflictingLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(
+            newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+        ASSERT(txnParticipant.transactionIsInProgress());
+    });
+
+    // A second conflicting transaction should still throw and not abort the active one.
+    const auto higherSecondConflictingLsid =
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+    runFunctionFromDifferentOpCtx([higherSecondConflictingLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(higherSecondConflictingLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        ASSERT_THROWS_CODE(txnParticipant.beginOrContinue(
+                               newOpCtx, {0}, false /* autocommit */, true /* startTransaction */),
+                           AssertionException,
+                           ErrorCodes::RetryableTransactionInProgress);
+    });
+}
+
+TEST_F(ShardTxnParticipantTest, HigherTxnNumberAbortsLowerChildTransactions_RetryableTxn) {
+    const auto parentLsid = makeLogicalSessionIdForTest();
+    auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+    opCtx()->setLogicalSessionId(
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+    {
+        auto sessionCheckout = checkOutSession();
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    }
+
+    // Advance the txnNumber and verify the first new conflicting transaction does not throw
+    // RetryableTransactionInProgress.
+
+    parentTxnNumber += 1;
+
+    const auto higherChildLsid =
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+    runFunctionFromDifferentOpCtx([higherChildLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(higherChildLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(
+            newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+        ASSERT(txnParticipant.transactionIsInProgress());
+    });
+}
+
+TEST_F(ShardTxnParticipantTest, HigherTxnNumberAbortsLowerChildTransactions_RetryableWrite) {
+    const auto parentLsid = makeLogicalSessionIdForTest();
+    auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+    opCtx()->setLogicalSessionId(
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+    {
+        auto sessionCheckout = checkOutSession();
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    }
+
+    // Advance the txnNumber and verify the first new conflicting transaction does not throw
+    // RetryableTransactionInProgress.
+
+    parentTxnNumber += 1;
+
+    runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(parentLsid);
+        newOpCtx->setTxnNumber(parentTxnNumber);
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(newOpCtx,
+                                       {parentTxnNumber},
+                                       boost::none /* autocommit */,
+                                       boost::none /* startTransaction */);
+    });
+}
+
+TEST_F(ShardTxnParticipantTest, HigherTxnNumberAbortsLowerChildTransactions_Transaction) {
+    const auto parentLsid = makeLogicalSessionIdForTest();
+    auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+    opCtx()->setLogicalSessionId(
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+    {
+        auto sessionCheckout = checkOutSession();
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+    }
+
+    // Advance the txnNumber and verify the first new conflicting transaction does not throw
+    // RetryableTransactionInProgress.
+
+    parentTxnNumber += 1;
+
+    runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(parentLsid);
+        newOpCtx->setTxnNumber(parentTxnNumber);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(newOpCtx,
+                                       *newOpCtx->getTxnNumber(),
+                                       false /* autocommit */,
+                                       true /* startTransaction */);
+        ASSERT(txnParticipant.transactionIsInProgress());
+    });
+}
+
+TEST_F(ShardTxnParticipantTest, HigherTxnNumberDoesNotAbortPreparedLowerChildTransaction) {
+    const auto parentLsid = makeLogicalSessionIdForTest();
+    const auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+    // Start a prepared child transaction.
+    opCtx()->setLogicalSessionId(
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+    {
+        auto sessionCheckout = checkOutSession();
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        txnParticipant.unstashTransactionResources(opCtx(), "prepareTransaction");
+        txnParticipant.prepareTransaction(opCtx(), {});
+        ASSERT(txnParticipant.transactionIsPrepared());
+        txnParticipant.stashTransactionResources(opCtx());
+    }
+
+    // Advance the txnNumber and verify the first new conflicting transaction and retryable write
+    // throws RetryableTransactionInProgress.
+
+    const auto higherParentTxnNumber = parentTxnNumber + 1;
+
+    const auto higherChildLsid =
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, higherParentTxnNumber);
+    runFunctionFromDifferentOpCtx([higherChildLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(higherChildLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        ASSERT_THROWS_CODE(txnParticipant.beginOrContinue(
+                               newOpCtx, {0}, false /* autocommit */, true /* startTransaction */),
+                           AssertionException,
+                           ErrorCodes::RetryableTransactionInProgress);
+    });
+
+    runFunctionFromDifferentOpCtx([parentLsid, higherParentTxnNumber](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(parentLsid);
+        newOpCtx->setTxnNumber(higherParentTxnNumber);
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        ASSERT_THROWS_CODE(txnParticipant.beginOrContinue(newOpCtx,
+                                                          {higherParentTxnNumber},
+                                                          boost::none /* autocommit */,
+                                                          boost::none /* startTransaction */),
+                           AssertionException,
+                           ErrorCodes::RetryableTransactionInProgress);
+    });
+
+    // After the transaction leaves prepare a conflicting internal transaction can still abort an
+    // active transaction.
+
+    {
+        auto sessionCheckout = checkOutSession(boost::none /* startNewTxn */);
+        auto txnParticipant = TransactionParticipant::get(opCtx());
+        txnParticipant.beginOrContinue(
+            opCtx(), {parentTxnNumber}, false /* autocommit */, boost::none /* startTransaction */);
+        txnParticipant.unstashTransactionResources(opCtx(), "abortTransaction");
+        txnParticipant.abortTransaction(opCtx());
+    }
+
+    runFunctionFromDifferentOpCtx([higherChildLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(higherChildLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(
+            newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+        ASSERT(txnParticipant.transactionIsInProgress());
+    });
+
+    const auto higherConflictingChildLsid =
+        makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, higherParentTxnNumber);
+    runFunctionFromDifferentOpCtx([higherConflictingChildLsid](OperationContext* newOpCtx) {
+        newOpCtx->setLogicalSessionId(higherConflictingChildLsid);
+        newOpCtx->setTxnNumber(0);
+        newOpCtx->setInMultiDocumentTransaction();
+
+        MongoDOperationContextSession ocs(newOpCtx);
+        auto txnParticipant = TransactionParticipant::get(newOpCtx);
+        txnParticipant.beginOrContinue(
+            newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+        ASSERT(txnParticipant.transactionIsInProgress());
+    });
 }
 
 TEST_F(ShardTxnParticipantTest,
diff --git a/src/mongo/db/transaction_validation.cpp b/src/mongo/db/transaction_validation.cpp
index 6571711cc76..2f4eedd12b5 100644
--- a/src/mongo/db/transaction_validation.cpp
+++ b/src/mongo/db/transaction_validation.cpp
@@ -43,53 +43,21 @@ namespace mongo {
 
 using namespace fmt::literals;
 
-namespace {
-
-// TODO SERVER-65101: Replace this with a property on each command.
-const StringMap<int> retryableWriteCommands = {{"clusterDelete", 1},
-                                               {"clusterInsert", 1},
-                                               {"clusterUpdate", 1},
-                                               {"delete", 1},
-                                               {"findandmodify", 1},
-                                               {"findAndModify", 1},
-                                               {"insert", 1},
-                                               {"testInternalTransactions", 1},
-                                               {"update", 1},
-                                               {"_recvChunkStart", 1},
-                                               {"_configsvrRemoveChunks", 1},
-                                               {"_configsvrRemoveTags", 1},
-                                               {"_shardsvrCreateCollectionParticipant", 1},
-                                               {"_shardsvrDropCollectionParticipant", 1},
-                                               {"_shardsvrRenameCollectionParticipant", 1},
-                                               {"_shardsvrRenameCollectionParticipantUnblock", 1},
-                                               {"_configsvrRenameCollectionMetadata", 1},
-                                               {"_shardsvrParticipantBlock", 1},
-                                               {"_configsvrCollMod", 1},
-                                               {"_shardsvrCollModParticipant", 1},
-                                               {"_shardsvrSetClusterParameter", 1},
-                                               {"_shardsvrSetUserWriteBlockMode", 1}};
-
-// TODO SERVER-65101: Replace this with a property on each command.
-// Commands that can be sent with session info but should not check out a session.
-const StringMap<int> skipSessionCheckoutList = {
-    {"coordinateCommitTransaction", 1}, {"_recvChunkStart", 1}, {"replSetStepDown", 1}};
-
-// TODO SERVER-65101: Replace this with a property on each command.
-const StringMap<int> transactionCommands = {{"abortTransaction", 1},
-                                            {"clusterAbortTransaction", 1},
-                                            {"clusterCommitTransaction", 1},
-                                            {"commitTransaction", 1},
-                                            {"coordinateCommitTransaction", 1},
-                                            {"prepareTransaction", 1}};
-
-}  // namespace
-
 bool isRetryableWriteCommand(StringData cmdName) {
-    return retryableWriteCommands.find(cmdName) != retryableWriteCommands.cend();
+    auto command = CommandHelpers::findCommand(cmdName);
+    uassert(ErrorCodes::CommandNotFound,
+            str::stream() << "Encountered unknown command during retryability check: " << cmdName,
+            command);
+    return command->supportsRetryableWrite();
 }
 
 bool isTransactionCommand(StringData cmdName) {
-    return transactionCommands.find(cmdName) != transactionCommands.cend();
+    auto command = CommandHelpers::findCommand(cmdName);
+    uassert(ErrorCodes::CommandNotFound,
+            str::stream() << "Encountered unknown command during isTransactionCommand check: "
+                          << cmdName,
+            command);
+    return command->isTransactionCommand();
 }
 
 void validateWriteConcernForTransaction(const WriteConcernOptions& wcResult, StringData cmdName) {
@@ -104,10 +72,6 @@ bool isReadConcernLevelAllowedInTransaction(repl::ReadConcernLevel readConcernLe
         readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern;
 }
 
-bool shouldCommandSkipSessionCheckout(StringData cmdName) {
-    return skipSessionCheckoutList.find(cmdName) != skipSessionCheckoutList.cend();
-}
-
 void validateSessionOptions(const OperationSessionInfoFromClient& sessionOptions,
                             StringData cmdName,
                             const NamespaceString& nss,
diff --git a/src/mongo/db/transaction_validation.h b/src/mongo/db/transaction_validation.h
index b3372dfe5b4..e3c4d21f201 100644
--- a/src/mongo/db/transaction_validation.h
+++ b/src/mongo/db/transaction_validation.h
@@ -57,12 +57,6 @@ void validateWriteConcernForTransaction(const WriteConcernOptions& wcResult, Str
 bool isReadConcernLevelAllowedInTransaction(repl::ReadConcernLevel readConcernLevel);
 
 /**
- * Returns true if the given command is one of the commands that does not check out a session
- * regardless of its session options, e.g. two-phase commit commands.
- */
-bool shouldCommandSkipSessionCheckout(StringData cmdName);
-
-/**
  * Throws if the given session options are invalid for the given command and target namespace.
  */
 void validateSessionOptions(const OperationSessionInfoFromClient& sessionOptions,
diff --git a/src/mongo/db/views/durable_view_catalog.cpp b/src/mongo/db/views/durable_view_catalog.cpp
index 361505220b8..900988f8dbd 100644
--- a/src/mongo/db/views/durable_view_catalog.cpp
+++ b/src/mongo/db/views/durable_view_catalog.cpp
@@ -54,13 +54,16 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
 
-
 namespace mongo {
 
 namespace {
 void validateViewDefinitionBSON(OperationContext* opCtx,
                                 const BSONObj& viewDefinition,
                                 StringData dbName) {
+    // Internal callers should always pass in a valid 'dbName' against which to compare the
+    // 'viewDefinition'.
+    invariant(NamespaceString::validDBName(dbName));
+
     bool valid = true;
 
     for (const BSONElement& e : viewDefinition) {
@@ -122,7 +125,7 @@ Status DurableViewCatalog::onExternalInsert(OperationContext* opCtx,
                                             const BSONObj& doc,
                                             const NamespaceString& name) {
     try {
-        validateViewDefinitionBSON(opCtx, doc, name.toString());
+        validateViewDefinitionBSON(opCtx, doc, name.db());
     } catch (const DBException& e) {
         return e.toStatus();
     }
@@ -132,9 +135,6 @@ Status DurableViewCatalog::onExternalInsert(OperationContext* opCtx,
     NamespaceString viewOn(name.db(), doc.getStringField("viewOn"));
     BSONArray pipeline(doc.getObjectField("pipeline"));
     BSONObj collation(doc.getObjectField("collation"));
-    // Set updateDurableViewCatalog to false because the view has already been inserted into the
-    // durable view catalog.
-    const bool updateDurableViewCatalog = false;
 
     return catalog->createView(opCtx,
                                viewName,
@@ -142,7 +142,7 @@ Status DurableViewCatalog::onExternalInsert(OperationContext* opCtx,
                                pipeline,
                                collation,
                                view_catalog_helpers::validatePipeline,
-                               updateDurableViewCatalog);
+                               CollectionCatalog::ViewUpsertMode::kAlreadyDurableView);
 }
 
 void DurableViewCatalog::onSystemViewsCollectionDrop(OperationContext* opCtx,
diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript
index 59e5fe7547c..0198ea648b8 100644
--- a/src/mongo/dbtests/SConscript
+++ b/src/mongo/dbtests/SConscript
@@ -30,6 +30,7 @@ env.Library(
     ],
     LIBDEPS=[
         '$BUILD_DIR/mongo/db/catalog/catalog_impl',
+        '$BUILD_DIR/mongo/db/catalog/database_holder',
         '$BUILD_DIR/mongo/db/dbdirectclient',
         '$BUILD_DIR/mongo/db/index/index_access_method_factory',
         '$BUILD_DIR/mongo/db/index/index_access_methods',
diff --git a/src/mongo/dbtests/dbhelper_tests.cpp b/src/mongo/dbtests/dbhelper_tests.cpp
index 841f6de76a1..b7cf3eeec93 100644
--- a/src/mongo/dbtests/dbhelper_tests.cpp
+++ b/src/mongo/dbtests/dbhelper_tests.cpp
@@ -150,7 +150,7 @@ public:
         }
 
         BSONObj result;
-        Helpers::findById(opCtx1.get(), db, nss.ns(), idQuery, result, nullptr, nullptr);
+        Helpers::findById(opCtx1.get(), nss.ns(), idQuery, result, nullptr, nullptr);
         ASSERT_BSONOBJ_EQ(result, doc);
 
         // Assert that the same doc still exists after findByIdAndNoopUpdate
@@ -214,11 +214,11 @@ private:
 
         // Assert that the doc still exists in the collection.
         BSONObj res1;
-        Helpers::findById(opCtx1, db, nss.ns(), idQuery, res1, nullptr, nullptr);
+        Helpers::findById(opCtx1, nss.ns(), idQuery, res1, nullptr, nullptr);
         ASSERT_BSONOBJ_EQ(res1, doc);
 
         BSONObj res2;
-        Helpers::findById(opCtx2, db, nss.ns(), idQuery, res2, nullptr, nullptr);
+        Helpers::findById(opCtx2, nss.ns(), idQuery, res2, nullptr, nullptr);
         ASSERT_BSONOBJ_EQ(res2, doc);
 
         // Assert that findByIdAndNoopUpdate did not generate an oplog entry.
@@ -258,11 +258,11 @@ private:
 
         // Assert that the first storage transaction succeeded and that the doc is removed.
         BSONObj res1;
-        Helpers::findById(opCtx1, db, nss.ns(), idQuery, res1, nullptr, nullptr);
+        Helpers::findById(opCtx1, nss.ns(), idQuery, res1, nullptr, nullptr);
         ASSERT_BSONOBJ_EQ(res1, BSONObj());
 
         BSONObj res2;
-        Helpers::findById(opCtx2, db, nss.ns(), idQuery, res2, nullptr, nullptr);
+        Helpers::findById(opCtx2, nss.ns(), idQuery, res2, nullptr, nullptr);
         ASSERT_BSONOBJ_EQ(res2, BSONObj());
     }
 
diff --git a/src/mongo/dbtests/dbtests.cpp b/src/mongo/dbtests/dbtests.cpp
index 9c51f08adab..f84874775f7 100644
--- a/src/mongo/dbtests/dbtests.cpp
+++ b/src/mongo/dbtests/dbtests.cpp
@@ -164,7 +164,7 @@ Status createIndexFromSpec(OperationContext* opCtx, StringData ns, const BSONObj
         }
         WriteUnitOfWork wunit(opCtx);
         ASSERT_OK(indexer.commit(opCtx,
-                                 collection.getWritableCollection(),
+                                 collection.getWritableCollection(opCtx),
                                  MultiIndexBlock::kNoopOnCreateEachFn,
                                  MultiIndexBlock::kNoopOnCommitFn));
         ASSERT_OK(opCtx->recoveryUnit()->setTimestamp(Timestamp(1, 1)));
diff --git a/src/mongo/dbtests/indexcatalogtests.cpp b/src/mongo/dbtests/indexcatalogtests.cpp
index 7a648f654f0..a0fac2327fc 100644
--- a/src/mongo/dbtests/indexcatalogtests.cpp
+++ b/src/mongo/dbtests/indexcatalogtests.cpp
@@ -205,7 +205,7 @@ public:
             CollectionWriter coll(&opCtx, autoColl);
 
             WriteUnitOfWork wuow(&opCtx);
-            coll.getWritableCollection()->updateTTLSetting(&opCtx, "x_1", 10);
+            coll.getWritableCollection(&opCtx)->updateTTLSetting(&opCtx, "x_1", 10);
             wuow.commit();
         }
 
@@ -219,8 +219,8 @@ public:
 
             // Notify the catalog of the change.
             WriteUnitOfWork wuow(&opCtx);
-            desc = coll.getWritableCollection()->getIndexCatalog()->refreshEntry(
-                &opCtx, coll.getWritableCollection(), desc, CreateIndexEntryFlags::kIsReady);
+            desc = coll.getWritableCollection(&opCtx)->getIndexCatalog()->refreshEntry(
+                &opCtx, coll.getWritableCollection(&opCtx), desc, CreateIndexEntryFlags::kIsReady);
             wuow.commit();
         }
 
diff --git a/src/mongo/dbtests/indexupdatetests.cpp b/src/mongo/dbtests/indexupdatetests.cpp
index 2988f9f658a..ea7389acaee 100644
--- a/src/mongo/dbtests/indexupdatetests.cpp
+++ b/src/mongo/dbtests/indexupdatetests.cpp
@@ -100,7 +100,7 @@ protected:
             uassertStatusOK(indexer.insertAllDocumentsInCollection(_opCtx, collection().get()));
             WriteUnitOfWork wunit(_opCtx);
             ASSERT_OK(indexer.commit(_opCtx,
-                                     collection().getWritableCollection(),
+                                     collection().getWritableCollection(_opCtx),
                                      MultiIndexBlock::kNoopOnCreateEachFn,
                                      MultiIndexBlock::kNoopOnCommitFn));
             wunit.commit();
@@ -169,7 +169,7 @@ public:
 
         WriteUnitOfWork wunit(_opCtx);
         ASSERT_OK(indexer.commit(_opCtx,
-                                 coll.getWritableCollection(),
+                                 coll.getWritableCollection(_opCtx),
                                  MultiIndexBlock::kNoopOnCreateEachFn,
                                  MultiIndexBlock::kNoopOnCommitFn));
         wunit.commit();
@@ -250,8 +250,8 @@ public:
             {
                 WriteUnitOfWork wunit(_opCtx);
                 // Drop all indexes including id index.
-                coll.getWritableCollection()->getIndexCatalog()->dropAllIndexes(
-                    _opCtx, coll.getWritableCollection(), true, {});
+                coll.getWritableCollection(_opCtx)->getIndexCatalog()->dropAllIndexes(
+                    _opCtx, coll.getWritableCollection(_opCtx), true, {});
                 // Insert some documents.
                 int32_t nDocs = 1000;
                 OpDebug* const nullOpDebug = nullptr;
@@ -352,7 +352,7 @@ Status IndexBuildBase::createIndex(const BSONObj& indexSpec) {
     }
     WriteUnitOfWork wunit(_opCtx);
     ASSERT_OK(indexer.commit(_opCtx,
-                             collection().getWritableCollection(),
+                             collection().getWritableCollection(_opCtx),
                              MultiIndexBlock::kNoopOnCreateEachFn,
                              MultiIndexBlock::kNoopOnCommitFn));
     wunit.commit();
diff --git a/src/mongo/dbtests/mock/mock_dbclient_connection.cpp b/src/mongo/dbtests/mock/mock_dbclient_connection.cpp
index 122dd397a2d..957888cfa35 100644
--- a/src/mongo/dbtests/mock/mock_dbclient_connection.cpp
+++ b/src/mongo/dbtests/mock/mock_dbclient_connection.cpp
@@ -143,7 +143,9 @@ std::unique_ptr<DBClientCursor> MockDBClientConnection::bsonArrayToCursor(BSONAr
 }
 
 std::unique_ptr<DBClientCursor> MockDBClientConnection::find(
-    FindCommandRequest findRequest, const ReadPreferenceSetting& readPref) {
+    FindCommandRequest findRequest,
+    const ReadPreferenceSetting& /*unused*/,
+    ExhaustMode /*unused*/) {
     checkConnection();
     try {
         int nToSkip = nToSkipFromResumeAfter(findRequest.getResumeAfter());
@@ -158,55 +160,6 @@ std::unique_ptr<DBClientCursor> MockDBClientConnection::find(
     return nullptr;
 }
 
-std::unique_ptr<mongo::DBClientCursor> MockDBClientConnection::query_DEPRECATED(
-    const NamespaceStringOrUUID& nsOrUuid,
-    const BSONObj& filter,
-    const Query& querySettings,
-    int limit,
-    int nToSkip,
-    const BSONObj* fieldsToReturn,
-    int queryOptions,
-    int batchSize,
-    boost::optional<BSONObj> readConcernObj) {
-    checkConnection();
-
-    try {
-        mongo::BSONArray result(_remoteServer->query(_remoteServerInstanceID,
-                                                     nsOrUuid,
-                                                     filter,
-                                                     querySettings,
-                                                     limit,
-                                                     nToSkip,
-                                                     fieldsToReturn,
-                                                     queryOptions,
-                                                     batchSize,
-                                                     readConcernObj));
-
-        BSONArray resultsInCursor;
-
-        // A simple mock implementation of a resumable query, where we skip the first 'n' fields
-        // where 'n' is given by the mock resume token.
-        auto nToSkip = 0;
-        BSONObj querySettingsAsBSON = querySettings.getFullSettingsDeprecated();
-        if (querySettingsAsBSON.hasField("$_resumeAfter")) {
-            nToSkip = nToSkipFromResumeAfter(querySettingsAsBSON.getField("$_resumeAfter").Obj());
-        }
-
-        bool provideResumeToken = false;
-        if (querySettingsAsBSON.hasField("$_requestResumeToken")) {
-            provideResumeToken = true;
-        }
-
-
-        return bsonArrayToCursor(std::move(result), nToSkip, provideResumeToken, batchSize);
-    } catch (const mongo::DBException&) {
-        _failed.store(true);
-        throw;
-    }
-
-    return nullptr;
-}
-
 mongo::ConnectionString::ConnectionType MockDBClientConnection::type() const {
     return mongo::ConnectionString::ConnectionType::kCustom;
 }
diff --git a/src/mongo/dbtests/mock/mock_dbclient_connection.h b/src/mongo/dbtests/mock/mock_dbclient_connection.h
index 349ca7478cc..4b60f2bec4a 100644
--- a/src/mongo/dbtests/mock/mock_dbclient_connection.h
+++ b/src/mongo/dbtests/mock/mock_dbclient_connection.h
@@ -104,7 +104,6 @@ public:
     // DBClientBase methods
     //
     using DBClientBase::find;
-    using DBClientBase::query_DEPRECATED;
 
     bool connect(const char* hostName, StringData applicationName, std::string& errmsg);
 
@@ -122,18 +121,8 @@ public:
     std::pair<rpc::UniqueReply, DBClientBase*> runCommandWithTarget(OpMsgRequest request) override;
 
     std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
-                                         const ReadPreferenceSetting& readPref) override;
-
-    std::unique_ptr<mongo::DBClientCursor> query_DEPRECATED(
-        const NamespaceStringOrUUID& nsOrUuid,
-        const BSONObj& filter = BSONObj{},
-        const Query& querySettings = Query(),
-        int limit = 0,
-        int nToSkip = 0,
-        const mongo::BSONObj* fieldsToReturn = nullptr,
-        int queryOptions = 0,
-        int batchSize = 0,
-        boost::optional<BSONObj> readConcernObj = boost::none) override;
+                                         const ReadPreferenceSetting& /*unused*/,
+                                         ExhaustMode /*unused*/) override;
 
     uint64_t getSockCreationMicroSec() const override;
 
diff --git a/src/mongo/dbtests/mock/mock_remote_db_server.cpp b/src/mongo/dbtests/mock/mock_remote_db_server.cpp
index 90ea117509d..0b98308d1d2 100644
--- a/src/mongo/dbtests/mock/mock_remote_db_server.cpp
+++ b/src/mongo/dbtests/mock/mock_remote_db_server.cpp
@@ -228,20 +228,6 @@ mongo::BSONArray MockRemoteDBServer::find(MockRemoteDBServer::InstanceID id,
     return findImpl(id, findRequest.getNamespaceOrUUID(), findRequest.getProjection());
 }
 
-mongo::BSONArray MockRemoteDBServer::query(MockRemoteDBServer::InstanceID id,
-                                           const NamespaceStringOrUUID& nsOrUuid,
-                                           const BSONObj& filter,
-                                           const Query& querySettings,
-                                           int limit,
-                                           int nToSkip,
-                                           const BSONObj* fieldsToReturn,
-                                           int queryOptions,
-                                           int batchSize,
-                                           boost::optional<BSONObj> readConcernObj) {
-    BSONObj projection = fieldsToReturn ? *fieldsToReturn : BSONObj{};
-    return findImpl(id, nsOrUuid, std::move(projection));
-}
-
 mongo::ConnectionString::ConnectionType MockRemoteDBServer::type() const {
     return mongo::ConnectionString::ConnectionType::kCustom;
 }
diff --git a/src/mongo/dbtests/mock/mock_remote_db_server.h b/src/mongo/dbtests/mock/mock_remote_db_server.h
index 591b506a170..034ad8e7ea4 100644
--- a/src/mongo/dbtests/mock/mock_remote_db_server.h
+++ b/src/mongo/dbtests/mock/mock_remote_db_server.h
@@ -33,7 +33,6 @@
 #include <vector>
 
 #include "mongo/client/connection_string.h"
-#include "mongo/client/query.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/db/query/find_command_gen.h"
 #include "mongo/rpc/unique_message.h"
@@ -168,20 +167,6 @@ public:
      */
     mongo::BSONArray find(InstanceID id, const FindCommandRequest& findRequest);
 
-    /**
-     * Legacy query API: New callers should use 'find()' rather than this method.
-     */
-    mongo::BSONArray query(InstanceID id,
-                           const NamespaceStringOrUUID& nsOrUuid,
-                           const BSONObj& filter,
-                           const Query& querySettings,
-                           int limit = 0,
-                           int nToSkip = 0,
-                           const mongo::BSONObj* fieldsToReturn = nullptr,
-                           int queryOptions = 0,
-                           int batchSize = 0,
-                           boost::optional<BSONObj> readConcernObj = boost::none);
-
     //
     // Getters
     //
diff --git a/src/mongo/dbtests/mock_dbclient_conn_test.cpp b/src/mongo/dbtests/mock_dbclient_conn_test.cpp
index a0da2e717ea..b9228513cf6 100644
--- a/src/mongo/dbtests/mock_dbclient_conn_test.cpp
+++ b/src/mongo/dbtests/mock_dbclient_conn_test.cpp
@@ -77,45 +77,6 @@ TEST(MockDBClientConnTest, QueryCount) {
     }
 }
 
-// This test should be removed when the legacy query API is removed.
-TEST(MockDBClientConnTest, LegacyQueryApiBumpsQueryCount) {
-    MockRemoteDBServer server("test");
-    MockDBClientConnection conn(&server);
-    ASSERT_EQUALS(0U, server.getQueryCount());
-    conn.query_DEPRECATED(NamespaceString("foo.bar"));
-    ASSERT_EQUALS(1U, server.getQueryCount());
-}
-
-// This test should be removed when the legacy query API is removed.
-TEST(MockDBClientConnTest, LegacyQueryApiReturnsInsertedDocuments) {
-    MockRemoteDBServer server("test");
-    const std::string ns("test.user");
-
-    {
-        MockDBClientConnection conn(&server);
-        std::unique_ptr<mongo::DBClientCursor> cursor = conn.query_DEPRECATED(NamespaceString(ns));
-        ASSERT(!cursor->more());
-
-        server.insert(ns, BSON("x" << 1));
-        server.insert(ns, BSON("y" << 2));
-    }
-
-    {
-        MockDBClientConnection conn(&server);
-        std::unique_ptr<mongo::DBClientCursor> cursor = conn.query_DEPRECATED(NamespaceString(ns));
-
-        ASSERT(cursor->more());
-        BSONObj firstDoc = cursor->next();
-        ASSERT_EQUALS(1, firstDoc["x"].numberInt());
-
-        ASSERT(cursor->more());
-        BSONObj secondDoc = cursor->next();
-        ASSERT_EQUALS(2, secondDoc["y"].numberInt());
-
-        ASSERT(!cursor->more());
-    }
-}
-
 TEST(MockDBClientConnTest, SkipBasedOnResumeAfter) {
     MockRemoteDBServer server{"test"};
     const std::string ns{"test.user"};
@@ -714,15 +675,8 @@ TEST(MockDBClientConnTest, SimulateCallAndRecvResponses) {
     MockRemoteDBServer server("test");
     MockDBClientConnection conn(&server);
 
-    mongo::DBClientCursor cursor(&conn,
-                                 mongo::NamespaceStringOrUUID(nss),
-                                 BSONObj{},
-                                 Query(),
-                                 0,
-                                 0,
-                                 nullptr,
-                                 mongo::QueryOption_Exhaust,
-                                 0);
+    FindCommandRequest findCmd{nss};
+    mongo::DBClientCursor cursor(&conn, findCmd, ReadPreferenceSetting{}, true /*isExhaust*/);
     cursor.setBatchSize(2);
 
     // Two batches from the initial find and getMore command.
@@ -787,8 +741,7 @@ TEST(MockDBClientConnTest, SimulateCallErrors) {
     MockRemoteDBServer server("test");
     MockDBClientConnection conn(&server);
 
-    mongo::DBClientCursor cursor(
-        &conn, mongo::NamespaceStringOrUUID(nss), BSONObj{}, Query(), 0, 0, nullptr, 0, 0);
+    mongo::DBClientCursor cursor(&conn, FindCommandRequest{nss}, ReadPreferenceSetting{}, false);
 
     // Test network exception and error response for the initial find.
     MockDBClientConnection::Responses callResponses = {
@@ -835,15 +788,8 @@ TEST(MockDBClientConnTest, SimulateRecvErrors) {
     MockRemoteDBServer server("test");
     MockDBClientConnection conn(&server);
 
-    mongo::DBClientCursor cursor(&conn,
-                                 mongo::NamespaceStringOrUUID(nss),
-                                 BSONObj{},
-                                 Query(),
-                                 0,
-                                 0,
-                                 nullptr,
-                                 mongo::QueryOption_Exhaust,
-                                 0);
+    mongo::DBClientCursor cursor(
+        &conn, FindCommandRequest{nss}, ReadPreferenceSetting{}, true /*isExhaust*/);
 
     runUntilExhaustRecv(&conn, &cursor);
 
@@ -884,15 +830,8 @@ TEST(MockDBClientConnTest, BlockingNetwork) {
     MockRemoteDBServer server("test");
     MockDBClientConnection conn(&server);
 
-    mongo::DBClientCursor cursor(&conn,
-                                 mongo::NamespaceStringOrUUID(nss),
-                                 BSONObj{},
-                                 Query(),
-                                 0,
-                                 0,
-                                 nullptr,
-                                 mongo::QueryOption_Exhaust,
-                                 0);
+    mongo::DBClientCursor cursor(
+        &conn, FindCommandRequest{nss}, ReadPreferenceSetting{}, true /*isExhaust*/);
     cursor.setBatchSize(1);
 
     mongo::stdx::thread cursorThread([&] {
@@ -946,15 +885,8 @@ TEST(MockDBClientConnTest, ShutdownServerBeforeCall) {
 
     ASSERT_OK(
         conn.connect(mongo::HostAndPort("localhost", 12345), mongo::StringData(), boost::none));
-    mongo::DBClientCursor cursor(&conn,
-                                 mongo::NamespaceStringOrUUID(nss),
-                                 BSONObj{},
-                                 Query(),
-                                 0,
-                                 0,
-                                 nullptr,
-                                 mongo::QueryOption_Exhaust,
-                                 0);
+    mongo::DBClientCursor cursor(
+        &conn, FindCommandRequest{nss}, ReadPreferenceSetting{}, true /*isExhaust*/);
 
     // Shut down server before call.
     server.shutdown();
@@ -972,15 +904,8 @@ TEST(MockDBClientConnTest, ShutdownServerAfterCall) {
     MockRemoteDBServer server("test");
     MockDBClientConnection conn(&server);
 
-    mongo::DBClientCursor cursor(&conn,
-                                 mongo::NamespaceStringOrUUID(nss),
-                                 BSONObj{},
-                                 Query(),
-                                 0,
-                                 0,
-                                 nullptr,
-                                 mongo::QueryOption_Exhaust,
-                                 0);
+    mongo::DBClientCursor cursor(
+        &conn, FindCommandRequest{nss}, ReadPreferenceSetting{}, true /*isExhaust*/);
 
     mongo::stdx::thread cursorThread([&] {
         ASSERT_THROWS_CODE(cursor.init(), mongo::DBException, mongo::ErrorCodes::HostUnreachable);
@@ -1004,15 +929,8 @@ TEST(MockDBClientConnTest, ConnectionAutoReconnect) {
 
     ASSERT_OK(
         conn.connect(mongo::HostAndPort("localhost", 12345), mongo::StringData(), boost::none));
-    mongo::DBClientCursor cursor(&conn,
-                                 mongo::NamespaceStringOrUUID(nss),
-                                 BSONObj{},
-                                 Query(),
-                                 0,
-                                 0,
-                                 nullptr,
-                                 mongo::QueryOption_Exhaust,
-                                 0);
+    mongo::DBClientCursor cursor(
+        &conn, FindCommandRequest{nss}, ReadPreferenceSetting{}, true /*isExhaust*/);
 
     server.shutdown();
 
@@ -1037,15 +955,8 @@ TEST(MockDBClientConnTest, ShutdownServerBeforeRecv) {
     MockRemoteDBServer server("test");
     MockDBClientConnection conn(&server, autoReconnect);
 
-    mongo::DBClientCursor cursor(&conn,
-                                 mongo::NamespaceStringOrUUID(nss),
-                                 BSONObj{},
-                                 Query(),
-                                 0,
-                                 0,
-                                 nullptr,
-                                 mongo::QueryOption_Exhaust,
-                                 0);
+    mongo::DBClientCursor cursor(
+        &conn, FindCommandRequest{nss}, ReadPreferenceSetting{}, true /*isExhaust*/);
 
     runUntilExhaustRecv(&conn, &cursor);
 
@@ -1063,15 +974,8 @@ TEST(MockDBClientConnTest, ShutdownServerAfterRecv) {
     MockRemoteDBServer server("test");
     MockDBClientConnection conn(&server);
 
-    mongo::DBClientCursor cursor(&conn,
-                                 mongo::NamespaceStringOrUUID(nss),
-                                 BSONObj{},
-                                 Query(),
-                                 0,
-                                 0,
-                                 nullptr,
-                                 mongo::QueryOption_Exhaust,
-                                 0);
+    mongo::DBClientCursor cursor(
+        &conn, FindCommandRequest{nss}, ReadPreferenceSetting{}, true /*isExhaust*/);
 
     runUntilExhaustRecv(&conn, &cursor);
 
diff --git a/src/mongo/dbtests/querytests.cpp b/src/mongo/dbtests/querytests.cpp
index d27ef3ac998..24dca8deaed 100644
--- a/src/mongo/dbtests/querytests.cpp
+++ b/src/mongo/dbtests/querytests.cpp
@@ -136,7 +136,7 @@ protected:
         {
             WriteUnitOfWork wunit(&_opCtx);
             uassertStatusOK(indexer.commit(&_opCtx,
-                                           collection.getWritableCollection(),
+                                           collection.getWritableCollection(&_opCtx),
                                            MultiIndexBlock::kNoopOnCreateEachFn,
                                            MultiIndexBlock::kNoopOnCommitFn));
             wunit.commit();
@@ -1401,10 +1401,10 @@ public:
         ASSERT(Helpers::findOne(&_opCtx, ctx.getCollection(), BSON("_id" << 20), res));
         ASSERT_EQUALS(40, res["x"].numberInt());
 
-        ASSERT(Helpers::findById(&_opCtx, ctx.db(), ns(), BSON("_id" << 20), res));
+        ASSERT(Helpers::findById(&_opCtx, ns(), BSON("_id" << 20), res));
         ASSERT_EQUALS(40, res["x"].numberInt());
 
-        ASSERT(!Helpers::findById(&_opCtx, ctx.db(), ns(), BSON("_id" << 200), res));
+        ASSERT(!Helpers::findById(&_opCtx, ns(), BSON("_id" << 200), res));
 
         long long slow;
         long long fast;
@@ -1420,7 +1420,7 @@ public:
         {
             Timer t;
             for (int i = 0; i < n; i++) {
-                ASSERT(Helpers::findById(&_opCtx, ctx.db(), ns(), BSON("_id" << 20), res));
+                ASSERT(Helpers::findById(&_opCtx, ns(), BSON("_id" << 20), res));
             }
             fast = t.micros();
         }
@@ -1445,7 +1445,7 @@ public:
 
         BSONObj res;
         for (int i = 0; i < 1000; i++) {
-            bool found = Helpers::findById(&_opCtx, ctx.db(), ns(), BSON("_id" << i), res);
+            bool found = Helpers::findById(&_opCtx, ns(), BSON("_id" << i), res);
             ASSERT_EQUALS(i % 2, int(found));
         }
     }
diff --git a/src/mongo/dbtests/repltests.cpp b/src/mongo/dbtests/repltests.cpp
index c3b5952029c..a929a347030 100644
--- a/src/mongo/dbtests/repltests.cpp
+++ b/src/mongo/dbtests/repltests.cpp
@@ -129,6 +129,8 @@ public:
 
         createOplog(&_opCtx);
 
+        // Prevent upgrading from MODE_IX to MODE_X when deleteAll() is issued.
+        Lock::GlobalWrite lk(&_opCtx);
         dbtests::WriteContextForTests ctx(&_opCtx, ns());
         WriteUnitOfWork wuow(&_opCtx);
 
diff --git a/src/mongo/dbtests/rollbacktests.cpp b/src/mongo/dbtests/rollbacktests.cpp
index ad52aa873bb..368f3d4884e 100644
--- a/src/mongo/dbtests/rollbacktests.cpp
+++ b/src/mongo/dbtests/rollbacktests.cpp
@@ -86,7 +86,7 @@ Status renameCollection(OperationContext* opCtx,
 }
 Status truncateCollection(OperationContext* opCtx, const NamespaceString& nss) {
     CollectionWriter coll(opCtx, nss);
-    return coll.getWritableCollection()->truncate(opCtx);
+    return coll.getWritableCollection(opCtx)->truncate(opCtx);
 }
 
 void insertRecord(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& data) {
@@ -146,10 +146,11 @@ size_t getNumIndexEntries(OperationContext* opCtx,
 
 void dropIndex(OperationContext* opCtx, const NamespaceString& nss, const string& idxName) {
     CollectionWriter coll(opCtx, nss);
-    auto desc = coll.getWritableCollection()->getIndexCatalog()->findIndexByName(opCtx, idxName);
+    auto desc =
+        coll.getWritableCollection(opCtx)->getIndexCatalog()->findIndexByName(opCtx, idxName);
     ASSERT(desc);
-    ASSERT_OK(coll.getWritableCollection()->getIndexCatalog()->dropIndex(
-        opCtx, coll.getWritableCollection(), desc));
+    ASSERT_OK(coll.getWritableCollection(opCtx)->getIndexCatalog()->dropIndex(
+        opCtx, coll.getWritableCollection(opCtx), desc));
 }
 }  // namespace
 
@@ -499,9 +500,9 @@ public:
 
         {
             WriteUnitOfWork uow(&opCtx);
-            IndexCatalog* catalog = coll.getWritableCollection()->getIndexCatalog();
-            ASSERT_OK(
-                catalog->createIndexOnEmptyCollection(&opCtx, coll.getWritableCollection(), spec));
+            IndexCatalog* catalog = coll.getWritableCollection(&opCtx)->getIndexCatalog();
+            ASSERT_OK(catalog->createIndexOnEmptyCollection(
+                &opCtx, coll.getWritableCollection(&opCtx), spec));
             insertRecord(&opCtx, nss, BSON("a" << 1));
             insertRecord(&opCtx, nss, BSON("a" << 2));
             insertRecord(&opCtx, nss, BSON("a" << 3));
@@ -539,9 +540,9 @@ public:
 
         {
             WriteUnitOfWork uow(&opCtx);
-            IndexCatalog* catalog = coll.getWritableCollection()->getIndexCatalog();
-            ASSERT_OK(
-                catalog->createIndexOnEmptyCollection(&opCtx, coll.getWritableCollection(), spec));
+            IndexCatalog* catalog = coll.getWritableCollection(&opCtx)->getIndexCatalog();
+            ASSERT_OK(catalog->createIndexOnEmptyCollection(
+                &opCtx, coll.getWritableCollection(&opCtx), spec));
             insertRecord(&opCtx, nss, BSON("a" << 1));
             insertRecord(&opCtx, nss, BSON("a" << 2));
             insertRecord(&opCtx, nss, BSON("a" << 3));
@@ -594,10 +595,10 @@ public:
 
         {
             WriteUnitOfWork uow(&opCtx);
-            IndexCatalog* catalog = coll.getWritableCollection()->getIndexCatalog();
+            IndexCatalog* catalog = coll.getWritableCollection(&opCtx)->getIndexCatalog();
 
-            ASSERT_OK(
-                catalog->createIndexOnEmptyCollection(&opCtx, coll.getWritableCollection(), spec));
+            ASSERT_OK(catalog->createIndexOnEmptyCollection(
+                &opCtx, coll.getWritableCollection(&opCtx), spec));
             insertRecord(&opCtx, nss, BSON("a" << 1));
             insertRecord(&opCtx, nss, BSON("a" << 2));
             insertRecord(&opCtx, nss, BSON("a" << 3));
@@ -649,7 +650,7 @@ public:
             ASSERT_OK(ctx.db()->userCreateNS(&opCtx, nss, collectionOptions, false));
             ASSERT(collectionExists(&opCtx, &ctx, nss.ns()));
             CollectionWriter coll(&opCtx, nss);
-            auto writableColl = coll.getWritableCollection();
+            auto writableColl = coll.getWritableCollection(&opCtx);
             IndexCatalog* catalog = writableColl->getIndexCatalog();
 
             ASSERT_OK(catalog->createIndexOnEmptyCollection(&opCtx, writableColl, specA));
diff --git a/src/mongo/dbtests/validate_tests.cpp b/src/mongo/dbtests/validate_tests.cpp
index d86df4b11d4..2d93082bbb4 100644
--- a/src/mongo/dbtests/validate_tests.cpp
+++ b/src/mongo/dbtests/validate_tests.cpp
@@ -3961,7 +3961,8 @@ public:
 
             auto& indexMetadata = collMetadata->indexes[offset];
             indexMetadata.multikeyPaths = {};
-            writer.getWritableCollection()->replaceMetadata(&_opCtx, std::move(collMetadata));
+            writer.getWritableCollection(&_opCtx)->replaceMetadata(&_opCtx,
+                                                                   std::move(collMetadata));
             wunit.commit();
         }
 
@@ -3969,9 +3970,9 @@ public:
         auto descriptor = coll()->getIndexCatalog()->findIndexByName(&_opCtx, indexName);
         {
             WriteUnitOfWork wunit(&_opCtx);
-            auto writableCatalog = writer.getWritableCollection()->getIndexCatalog();
+            auto writableCatalog = writer.getWritableCollection(&_opCtx)->getIndexCatalog();
             descriptor = writableCatalog->refreshEntry(&_opCtx,
-                                                       writer.getWritableCollection(),
+                                                       writer.getWritableCollection(&_opCtx),
                                                        descriptor,
                                                        CreateIndexEntryFlags::kIsReady);
             wunit.commit();
diff --git a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
index 43bc44216d9..0f706367de3 100644
--- a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
+++ b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
@@ -219,7 +219,7 @@ protected:
 
         WriteUnitOfWork wunit(opCtx());
         ASSERT_OK(indexer.commit(opCtx(),
-                                 coll.getWritableCollection(),
+                                 coll.getWritableCollection(opCtx()),
                                  MultiIndexBlock::kNoopOnCreateEachFn,
                                  MultiIndexBlock::kNoopOnCommitFn));
         abortOnExit.dismiss();
diff --git a/src/mongo/embedded/SConscript b/src/mongo/embedded/SConscript
index b4243ca1ebf..735ba48d05b 100644
--- a/src/mongo/embedded/SConscript
+++ b/src/mongo/embedded/SConscript
@@ -80,6 +80,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/audit',
         '$BUILD_DIR/mongo/db/auth/auth',
         '$BUILD_DIR/mongo/db/catalog/catalog_impl',
+        '$BUILD_DIR/mongo/db/catalog/database_holder',
         '$BUILD_DIR/mongo/db/catalog/index_key_validate',
         '$BUILD_DIR/mongo/db/command_can_run_here',
         '$BUILD_DIR/mongo/db/commands',
@@ -93,6 +94,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/index_builds_coordinator_interface',
         '$BUILD_DIR/mongo/db/logical_session_cache',
         '$BUILD_DIR/mongo/db/logical_session_cache_impl',
+        '$BUILD_DIR/mongo/db/op_observer',
         '$BUILD_DIR/mongo/db/op_observer_impl',
         '$BUILD_DIR/mongo/db/pipeline/process_interface/mongod_process_interfaces',
         '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
diff --git a/src/mongo/embedded/embedded_auth_manager.cpp b/src/mongo/embedded/embedded_auth_manager.cpp
index 22b122dfe66..5473cbee59a 100644
--- a/src/mongo/embedded/embedded_auth_manager.cpp
+++ b/src/mongo/embedded/embedded_auth_manager.cpp
@@ -100,7 +100,7 @@ public:
     }
 
     Status getRoleDescriptionsForDB(OperationContext*,
-                                    const StringData,
+                                    const DatabaseName&,
                                     PrivilegeFormat,
                                     AuthenticationRestrictionsFormat,
                                     bool,
diff --git a/src/mongo/embedded/service_entry_point_embedded.cpp b/src/mongo/embedded/service_entry_point_embedded.cpp
index f2485cbd8dc..48160f99630 100644
--- a/src/mongo/embedded/service_entry_point_embedded.cpp
+++ b/src/mongo/embedded/service_entry_point_embedded.cpp
@@ -53,8 +53,8 @@ namespace {
  */
 class EmbeddedClientObserver final : public ServiceContext::ClientObserver {
     void onCreateClient(Client* client) {
-        auto seCtx = transport::ServiceExecutorContext{};
-        seCtx.setThreadingModel(transport::ServiceExecutor::ThreadingModel::kDedicated);
+        auto seCtx = std::make_unique<transport::ServiceExecutorContext>();
+        seCtx->setThreadingModel(transport::ServiceExecutor::ThreadingModel::kDedicated);
         transport::ServiceExecutorContext::set(client, std::move(seCtx));
     }
     void onDestroyClient(Client*) {}
diff --git a/src/mongo/executor/network_interface_tl.cpp b/src/mongo/executor/network_interface_tl.cpp
index 9ee2ffac0b6..d919bca3c3b 100644
--- a/src/mongo/executor/network_interface_tl.cpp
+++ b/src/mongo/executor/network_interface_tl.cpp
@@ -33,7 +33,7 @@
 #include <fmt/format.h>
 
 #include "mongo/config.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/server_options.h"
 #include "mongo/db/wire_version.h"
 #include "mongo/executor/connection_pool_tl.h"
@@ -71,9 +71,7 @@ Status appendMetadata(RemoteCommandRequestOnAny* request,
     if (!request->opCtx)
         return Status::OK();
 
-    if (auto securityToken = auth::getSecurityToken(request->opCtx)) {
-        request->securityToken = securityToken->toBSON();
-    }
+    request->validatedTenancyScope = auth::ValidatedTenancyScope::get(request->opCtx);
 
     return Status::OK();
 }
diff --git a/src/mongo/executor/remote_command_request.h b/src/mongo/executor/remote_command_request.h
index 7d232a34756..cca27b03e71 100644
--- a/src/mongo/executor/remote_command_request.h
+++ b/src/mongo/executor/remote_command_request.h
@@ -33,6 +33,7 @@
 #include <string>
 
 #include "mongo/base/error_codes.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/rpc/metadata.h"
 #include "mongo/transport/transport_layer.h"
@@ -73,7 +74,7 @@ struct RemoteCommandRequestBase {
     std::string dbname;
     BSONObj metadata{rpc::makeEmptyMetadata()};
     BSONObj cmdObj;
-    BSONObj securityToken;
+    boost::optional<auth::ValidatedTenancyScope> validatedTenancyScope;
 
     // OperationContext is added to each request to allow OP_Command metadata attachment access to
     // the Client object. The OperationContext is only accessed on the thread that calls
diff --git a/src/mongo/idl/basic_types.idl b/src/mongo/idl/basic_types.idl
index 07a8e5fbf00..883c7a61e34 100644
--- a/src/mongo/idl/basic_types.idl
+++ b/src/mongo/idl/basic_types.idl
@@ -157,6 +157,13 @@ types:
         cpp_type: "std::array<std::uint8_t, 16>"
         deserializer: "mongo::BSONElement::uuid"
 
+    bindata_encrypt:
+        bson_serialization_type: bindata
+        bindata_subtype: encrypt
+        description: "A BSON bindata of encrypt sub type"
+        cpp_type: "std::vector<std::uint8_t>"
+        deserializer: "mongo::BSONElement::_binDataVector"
+
     uuid:
         bson_serialization_type: bindata
         bindata_subtype: uuid
@@ -256,6 +263,7 @@ types:
         cpp_type: "mongo::IDLAnyTypeOwned"
         serializer: mongo::IDLAnyTypeOwned::serializeToBSON
         deserializer: mongo::IDLAnyTypeOwned::parseFromBSON
+
     tenant_id:
         bson_serialization_type: any
         description: "A struct representing a tenant id"
diff --git a/src/mongo/idl/cluster_server_parameter.idl b/src/mongo/idl/cluster_server_parameter.idl
index 9d99717cba5..14622beeae0 100644
--- a/src/mongo/idl/cluster_server_parameter.idl
+++ b/src/mongo/idl/cluster_server_parameter.idl
@@ -87,11 +87,6 @@ structs:
                 default: 0
 
 feature_flags:
-    featureFlagClusterWideConfig:
-        description: Mechanism for cluster-wide configuration options
-        cpp_varname: gFeatureFlagClusterWideConfig
-        default: true
-        version: 6.0
     featureFlagClusterWideConfigM2:
         description: Mechanism for cluster-wide configuration options, milestone 2
         cpp_varname: gFeatureFlagClusterWideConfigM2
diff --git a/src/mongo/idl/cluster_server_parameter_initializer.h b/src/mongo/idl/cluster_server_parameter_initializer.h
index f40e48548ab..15cbd6010c2 100644
--- a/src/mongo/idl/cluster_server_parameter_initializer.h
+++ b/src/mongo/idl/cluster_server_parameter_initializer.h
@@ -92,7 +92,7 @@ private:
 
         DBDirectClient client(opCtx);
         FindCommandRequest findRequest{NamespaceString::kClusterParametersNamespace};
-        client.find(std::move(findRequest), ReadPreferenceSetting{}, [&](BSONObj doc) {
+        client.find(std::move(findRequest), [&](BSONObj doc) {
             try {
                 onEntry(opCtx, doc, mode);
             } catch (const DBException& ex) {
diff --git a/src/mongo/idl/generic_argument.idl b/src/mongo/idl/generic_argument.idl
index fdf340cbef1..5cb308bd2e1 100644
--- a/src/mongo/idl/generic_argument.idl
+++ b/src/mongo/idl/generic_argument.idl
@@ -104,6 +104,7 @@ generic_argument_lists:
             mayBypassWriteBlocking:
                 forward_to_shards: true
 
+
 generic_reply_field_lists:
     generic_reply_fields_api_v1:
         description: "Fields that may appear in any command reply. These are guaranteed backwards-compatible for as long as the server supports API Version 1."
diff --git a/src/mongo/installer/msi/SConscript b/src/mongo/installer/msi/SConscript
index 9bc64115bde..a0d68195d88 100644
--- a/src/mongo/installer/msi/SConscript
+++ b/src/mongo/installer/msi/SConscript
@@ -121,7 +121,6 @@ candle_targets = env.Command(
         ' -dTargetExt=.msi'
         ' -dTargetFileName=${SERVER_ARCHIVE}'
         r' -dSaslSource=c:\sasl\bin'
-        r' -dSnmpSource=c:\snmp\bin'
         r' -dSslSource=' + env['WINDOWS_OPENSSL_BIN'] + ' -out ' + buildDir + r'\msi\\'
         ' -arch ' + msi_platform + ' -ext "$WIXUIEXT"'
         ' -ext "$WIXUTILEXT"'
diff --git a/src/mongo/installer/msi/wxs/BinaryFragment.wxs b/src/mongo/installer/msi/wxs/BinaryFragment.wxs
index d82cbb04536..c0e2ad23db8 100644
--- a/src/mongo/installer/msi/wxs/BinaryFragment.wxs
+++ b/src/mongo/installer/msi/wxs/BinaryFragment.wxs
@@ -72,14 +72,6 @@
           <File Id="f_saslPdb" Name="libsasl.pdb" Source="$(var.SaslSource)\libsasl.pdb"
                 DiskId="1" KeyPath="yes" />
         </Component>
-        <Component Id="c_snmp" Guid="F5E5A889-FC9F-4B9A-BEFD-C8ABC9A92D8D">
-          <File Id="f_ssnmp" Name="netsnmp.dll" Source="$(var.SnmpSource)\netsnmp.dll"
-                DiskId="1" KeyPath="yes" />
-        </Component>
-        <Component Id="c_snmpPdb" Guid="6AAB0ACE-C354-4D3A-B490-2AA235647AB3">
-          <File Id="f_snmpPdb" Name="netsnmp.pdb" Source="$(var.SnmpSource)\netsnmp.pdb"
-                DiskId="1" KeyPath="yes" />
-        </Component>
         <Component Id="c_mongocryptd" Guid="D243D194-B765-4DF8-BC67-8F4C329AD1B5">
           <File Id="f_mongocryptd" Name="mongocryptd.exe" Source="$(var.BinarySource)\mongocryptd.exe"
                 DiskId ="1" KeyPath="yes"/>
@@ -90,30 +82,6 @@
         </Component>
       <?endif ?>
     </DirectoryRef>
-    <?if $(var.Edition) = Enterprise ?>
-      <DirectoryRef Id="SNMP">
-      <Component Id="c_snmpReadme" Guid="B968FBAC-1813-4039-9FED-A607A0E4CBB3">
-          <File Id="f_snmpReadme" Name="README-snmp.txt" Source="$(var.EnterpriseBase)\docs\snmp\README-snmp.txt"
-                DiskId="1" KeyPath="yes" />
-        </Component>
-        <Component Id="c_snmpConfMaster" Guid="0C8CAA6C-1473-4B14-9EE5-AF5A35B1DD8D">
-          <File Id="f_snmpConfMaster" Name="mongod.conf.master" Source="$(var.EnterpriseBase)\docs\snmp\mongod.conf.master"
-                DiskId="1" KeyPath="yes" />
-        </Component>
-        <Component Id="c_snmpConfSubagent" Guid="6FB66102-41A7-41BD-BB1F-1987E150FA78">
-          <File Id="f_snmpConfSubagent" Name="mongod.conf.subagent" Source="$(var.EnterpriseBase)\docs\snmp\mongod.conf.subagent"
-                DiskId="1" KeyPath="yes" />
-        </Component>
-        <Component Id="c_snmpMongodMib" Guid="F3E98C6B-FE42-44E7-8A1F-E47BDDD0A3D7">
-          <File Id="f_snmpMongodMib" Name="MONGOD-MIB.txt" Source="$(var.EnterpriseBase)\docs\snmp\MONGOD-MIB.txt"
-                DiskId="1" KeyPath="yes" />
-        </Component>
-        <Component Id="c_snmpMongodbincMib" Guid="58c6bd8e-a785-48a0-af48-42f6bf9f68b4">
-          <File Id="f_snmpMongodbincMib" Name="MONGODBINC-MIB.txt" Source="$(var.EnterpriseBase)\docs\snmp\MONGODBINC-MIB.txt"
-                DiskId="1" KeyPath="yes" />
-        </Component>
-      </DirectoryRef>
-    <?endif ?>
     <DirectoryRef Id="MONGO_DATA_PATH" >
       <Component Id="c_MONGO_DATA_PATH" Guid="F695F048-E262-4871-A31B-0E2361BB4BCB">
           <CreateFolder Directory="MONGO_DATA_PATH" >
@@ -143,13 +111,6 @@
       </ComponentGroup>
       <ComponentGroup Id="cg_EnterpriseServer">
         <ComponentGroupRef Id="cg_EnterpriseBase" />
-        <ComponentRef Id="c_snmp" />
-        <ComponentRef Id="c_snmpPdb" />
-        <ComponentRef Id="c_snmpReadme" />
-        <ComponentRef Id="c_snmpConfMaster" />
-        <ComponentRef Id="c_snmpConfSubagent" />
-        <ComponentRef Id="c_snmpMongodMib" />
-        <ComponentRef Id="c_snmpMongodbincMib" />
       </ComponentGroup>
     <?endif ?>
   </Fragment>
diff --git a/src/mongo/installer/msi/wxs/Installer_64.wxs b/src/mongo/installer/msi/wxs/Installer_64.wxs
index 1eb5266fe3f..96f87899f1c 100644
--- a/src/mongo/installer/msi/wxs/Installer_64.wxs
+++ b/src/mongo/installer/msi/wxs/Installer_64.wxs
@@ -47,7 +47,6 @@
                       <Directory Id="BIN" Name="bin" />
                         <Directory Id="MONGO_DATA_PATH" Name="data" />
                         <Directory Id="MONGO_LOG_PATH" Name="log" />
-                      <Directory Id="SNMP" Name="snmp" />
                     </Directory>
                 </Directory>
             </Directory>
diff --git a/src/mongo/logv2/logv2_test.cpp b/src/mongo/logv2/logv2_test.cpp
index e9c2cbd76ee..0144f9a86b2 100644
--- a/src/mongo/logv2/logv2_test.cpp
+++ b/src/mongo/logv2/logv2_test.cpp
@@ -40,7 +40,7 @@
 #include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/bson/json.h"
 #include "mongo/bson/oid.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/tenant_id.h"
 #include "mongo/logv2/bson_formatter.h"
 #include "mongo/logv2/component_settings_filter.h"
diff --git a/src/mongo/rpc/SConscript b/src/mongo/rpc/SConscript
index a5734bc4f75..1184b58d111 100644
--- a/src/mongo/rpc/SConscript
+++ b/src/mongo/rpc/SConscript
@@ -37,6 +37,7 @@ protoEnv.Library(
     ],
     LIBDEPS=[
         '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/mongo/db/multitenancy',
         '$BUILD_DIR/mongo/db/wire_version',
     ],
     LIBDEPS_PRIVATE=[
@@ -191,6 +192,7 @@ if wiredtiger:
         LIBDEPS=[
             '$BUILD_DIR/mongo/client/clientdriver_minimal',
             '$BUILD_DIR/mongo/db/auth/auth',
+            '$BUILD_DIR/mongo/db/auth/authmocks',
             '$BUILD_DIR/mongo/db/multitenancy_params',
             '$BUILD_DIR/mongo/db/service_context_test_fixture',
             '$BUILD_DIR/third_party/wiredtiger/wiredtiger_checksum',
diff --git a/src/mongo/rpc/factory.cpp b/src/mongo/rpc/factory.cpp
index aa024ac2da8..7e001edc0af 100644
--- a/src/mongo/rpc/factory.cpp
+++ b/src/mongo/rpc/factory.cpp
@@ -57,12 +57,13 @@ std::unique_ptr<ReplyInterface> makeReply(const Message* unownedMessage) {
     }
 }
 
-OpMsgRequest opMsgRequestFromAnyProtocol(const Message& unownedMessage) {
+OpMsgRequest opMsgRequestFromAnyProtocol(const Message& unownedMessage, Client* client) {
     switch (unownedMessage.operation()) {
         case mongo::dbMsg:
-            return OpMsgRequest::parseOwned(unownedMessage);
-        case mongo::dbQuery:
+            return OpMsgRequest::parseOwned(unownedMessage, client);
+        case mongo::dbQuery: {
             return opMsgRequestFromLegacyRequest(unownedMessage);
+        }
         default:
             uasserted(ErrorCodes::UnsupportedFormat,
                       str::stream() << "Received a reply message with unexpected opcode: "
diff --git a/src/mongo/rpc/factory.h b/src/mongo/rpc/factory.h
index 9aec1557f19..514e7bd73c7 100644
--- a/src/mongo/rpc/factory.h
+++ b/src/mongo/rpc/factory.h
@@ -29,6 +29,7 @@
 
 #pragma once
 
+#include "mongo/db/client.h"
 #include "mongo/rpc/op_msg.h"
 #include "mongo/rpc/protocol.h"
 
@@ -54,7 +55,7 @@ std::unique_ptr<ReplyInterface> makeReply(const Message* unownedMessage);
 /**
  * Parses the message (from any protocol) into an OpMsgRequest.
  */
-OpMsgRequest opMsgRequestFromAnyProtocol(const Message& unownedMessage);
+OpMsgRequest opMsgRequestFromAnyProtocol(const Message& unownedMessage, Client* client = nullptr);
 
 /**
  * Returns the appropriate concrete ReplyBuilder.
diff --git a/src/mongo/rpc/metadata.cpp b/src/mongo/rpc/metadata.cpp
index 7c97409875f..b54a4474cc2 100644
--- a/src/mongo/rpc/metadata.cpp
+++ b/src/mongo/rpc/metadata.cpp
@@ -33,7 +33,7 @@
 
 #include "mongo/client/read_preference.h"
 #include "mongo/db/auth/authorization_session.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/dbmessage.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/db/logical_time_validator.h"
@@ -95,9 +95,7 @@ void readRequestMetadata(OperationContext* opCtx, const OpMsg& opMsg, bool cmdRe
     }
 
     readImpersonatedUserMetadata(impersonationElem, opCtx);
-    auth::readSecurityTokenMetadata(opCtx, opMsg.securityToken);
-
-    parseDollarTenantFromRequest(opCtx, opMsg);
+    auth::ValidatedTenancyScope::set(opCtx, opMsg.validatedTenancyScope);
 
     // We check for "$client" but not "client" here, because currentOp can filter on "client" as
     // a top-level field.
diff --git a/src/mongo/rpc/metadata/client_metadata.cpp b/src/mongo/rpc/metadata/client_metadata.cpp
index f66941d803f..626a340284e 100644
--- a/src/mongo/rpc/metadata/client_metadata.cpp
+++ b/src/mongo/rpc/metadata/client_metadata.cpp
@@ -307,7 +307,7 @@ void ClientMetadata::serialize(StringData driverName,
     ProcessInfo processInfo;
 
     std::string appName;
-    if (kDebugBuild) {
+    if (TestingProctor::instance().isEnabled()) {
         appName = processInfo.getProcessName();
         if (appName.length() > kMaxApplicationNameByteLength) {
             static constexpr auto kEllipsis = "..."_sd;
@@ -368,7 +368,7 @@ Status ClientMetadata::serializePrivate(StringData driverName,
         if (!appName.empty()) {
             BSONObjBuilder subObjBuilder(metaObjBuilder.subobjStart(kApplication));
             subObjBuilder.append(kName, appName);
-            if (kDebugBuild) {
+            if (TestingProctor::instance().isEnabled()) {
                 subObjBuilder.append(kPid, ProcessId::getCurrent().toString());
             }
         }
diff --git a/src/mongo/rpc/metadata/client_metadata_test.cpp b/src/mongo/rpc/metadata/client_metadata_test.cpp
index 9d4c000abb9..994cdf489e8 100644
--- a/src/mongo/rpc/metadata/client_metadata_test.cpp
+++ b/src/mongo/rpc/metadata/client_metadata_test.cpp
@@ -47,6 +47,7 @@
 #include "mongo/unittest/unittest.h"
 #include "mongo/util/processinfo.h"
 #include "mongo/util/scopeguard.h"
+#include "mongo/util/testing_proctor.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
@@ -104,8 +105,9 @@ TEST(ClientMetadataTest, TestLoopbackTest) {
                             .append(kApplication,
                                     BOB{}
                                         .append(kName, "g")
-                                        .appendElements(kDebugBuild ? BOB{}.append(kPid, pid).obj()
-                                                                    : BOB{}.obj())
+                                        .appendElements(TestingProctor::instance().isEnabled()
+                                                            ? BOB{}.append(kPid, pid).obj()
+                                                            : BOB{}.obj())
                                         .obj())
                             .append(kDriver, BOB{}.append(kName, "a").append(kVersion, "b").obj())
                             .append(kOperatingSystem,
@@ -329,7 +331,9 @@ TEST(ClientMetadataTest, TestMongoSAppend) {
             .append(kApplication,
                     BOB{}
                         .append(kName, "g")
-                        .appendElements(kDebugBuild ? BOB{}.append(kPid, pid).obj() : BOB{}.obj())
+                        .appendElements(TestingProctor::instance().isEnabled()
+                                            ? BOB{}.append(kPid, pid).obj()
+                                            : BOB{}.obj())
                         .obj())
             .append(kDriver, BOB{}.append(kName, "a").append(kVersion, "b").obj())
             .append(kOperatingSystem,
diff --git a/src/mongo/rpc/metadata/oplog_query_metadata.h b/src/mongo/rpc/metadata/oplog_query_metadata.h
index bd08f6a8b39..a2e42519ec5 100644
--- a/src/mongo/rpc/metadata/oplog_query_metadata.h
+++ b/src/mongo/rpc/metadata/oplog_query_metadata.h
@@ -59,6 +59,10 @@ public:
                        int currentPrimaryIndex,
                        int currentSyncSourceIndex,
                        std::string currentSyncSourceHost);
+    explicit OplogQueryMetadata(const OplogQueryMetadata&) = default;
+    OplogQueryMetadata(OplogQueryMetadata&&) = default;
+    OplogQueryMetadata& operator=(const OplogQueryMetadata&) = delete;
+    OplogQueryMetadata& operator=(OplogQueryMetadata&&) = default;
 
     /**
      * format:
diff --git a/src/mongo/rpc/metadata/repl_set_metadata.h b/src/mongo/rpc/metadata/repl_set_metadata.h
index 9b85168da70..88aabbc4d14 100644
--- a/src/mongo/rpc/metadata/repl_set_metadata.h
+++ b/src/mongo/rpc/metadata/repl_set_metadata.h
@@ -55,6 +55,10 @@ public:
                     OID id,
                     int currentSyncSourceIndex,
                     bool isPrimary);
+    explicit ReplSetMetadata(const ReplSetMetadata&) = default;
+    ReplSetMetadata(ReplSetMetadata&&) = default;
+    ReplSetMetadata& operator=(const ReplSetMetadata&) = delete;
+    ReplSetMetadata& operator=(ReplSetMetadata&&) = default;
 
     /**
      * format:
diff --git a/src/mongo/rpc/metadata/security_token_metadata_test.cpp b/src/mongo/rpc/metadata/security_token_metadata_test.cpp
index 40c0e8e3cea..98698572af1 100644
--- a/src/mongo/rpc/metadata/security_token_metadata_test.cpp
+++ b/src/mongo/rpc/metadata/security_token_metadata_test.cpp
@@ -31,8 +31,8 @@
 
 #include "mongo/bson/oid.h"
 #include "mongo/crypto/sha256_block.h"
-#include "mongo/db/auth/security_token.h"
 #include "mongo/db/auth/security_token_gen.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/client.h"
 #include "mongo/db/concurrency/locker_noop_service_context_test_fixture.h"
 #include "mongo/db/multitenancy_gen.h"
@@ -51,10 +51,20 @@ BSONObj makeSecurityToken(const UserName& userName) {
     constexpr auto authUserFieldName = auth::SecurityToken::kAuthenticatedUserFieldName;
     auto authUser = userName.toBSON(true /* serialize token */);
     ASSERT_EQ(authUser["tenant"_sd].type(), jstOID);
-    return auth::signSecurityToken(BSON(authUserFieldName << authUser));
+    using VTS = auth::ValidatedTenancyScope;
+    return VTS(BSON(authUserFieldName << authUser), VTS::TokenForTestingTag{})
+        .getOriginalToken()
+        .getOwned();
 }
 
-class SecurityTokenMetadataTest : public LockerNoopServiceContextTest {};
+class SecurityTokenMetadataTest : public LockerNoopServiceContextTest {
+protected:
+    void setUp() final {
+        client = getServiceContext()->makeClient("test");
+    }
+
+    ServiceContext::UniqueClient client;
+};
 
 TEST_F(SecurityTokenMetadataTest, SecurityTokenNotAccepted) {
     const auto kPingBody = BSON(kPingFieldName << 1);
@@ -77,16 +87,19 @@ TEST_F(SecurityTokenMetadataTest, BasicSuccess) {
     auto msg = OpMsgBytes{0, kBodySection, kPingBody, kSecurityTokenSection, kTokenBody}.parse();
     ASSERT_BSONOBJ_EQ(msg.body, kPingBody);
     ASSERT_EQ(msg.sequences.size(), 0u);
-    ASSERT_BSONOBJ_EQ(msg.securityToken, kTokenBody);
+    ASSERT_TRUE(msg.validatedTenancyScope != boost::none);
+    ASSERT_BSONOBJ_EQ(msg.validatedTenancyScope->getOriginalToken(), kTokenBody);
+    ASSERT_EQ(msg.validatedTenancyScope->tenantId(), kTenantId);
 
     auto opCtx = makeOperationContext();
-    ASSERT(auth::getSecurityToken(opCtx.get()) == boost::none);
+    ASSERT(auth::ValidatedTenancyScope::get(opCtx.get()) == boost::none);
 
-    auth::readSecurityTokenMetadata(opCtx.get(), msg.securityToken);
-    auto token = auth::getSecurityToken(opCtx.get());
+    auth::ValidatedTenancyScope::set(opCtx.get(), msg.validatedTenancyScope);
+    auto token = auth::ValidatedTenancyScope::get(opCtx.get());
     ASSERT(token != boost::none);
 
-    auto authedUser = token->getAuthenticatedUser();
+    ASSERT_TRUE(token->hasAuthenticatedUser());
+    auto authedUser = token->authenticatedUser();
     ASSERT_EQ(authedUser.getUser(), "user");
     ASSERT_EQ(authedUser.getDB(), "admin");
     ASSERT_TRUE(authedUser.getTenant() != boost::none);
diff --git a/src/mongo/rpc/op_msg.cpp b/src/mongo/rpc/op_msg.cpp
index f93aa859368..be420d55e50 100644
--- a/src/mongo/rpc/op_msg.cpp
+++ b/src/mongo/rpc/op_msg.cpp
@@ -132,7 +132,7 @@ void OpMsg::appendChecksum(Message* message) {
 #endif
 }
 
-OpMsg OpMsg::parse(const Message& message) try {
+OpMsg OpMsg::parse(const Message& message, Client* client) try {
     // It is the caller's responsibility to call the correct parser for a given message type.
     invariant(!message.empty());
     invariant(message.operation() == dbMsg);
@@ -159,6 +159,7 @@ OpMsg OpMsg::parse(const Message& message) try {
     // TODO some validation may make more sense in the IDL parser. I've tagged them with comments.
     bool haveBody = false;
     OpMsg msg;
+    BSONObj securityToken;
     while (!sectionsBuf.atEof()) {
         const auto sectionKind = sectionsBuf.read<Section>();
         switch (sectionKind) {
@@ -166,6 +167,10 @@ OpMsg OpMsg::parse(const Message& message) try {
                 uassert(40430, "Multiple body sections in message", !haveBody);
                 haveBody = true;
                 msg.body = sectionsBuf.read<Validated<BSONObj>>();
+
+                uassert(ErrorCodes::InvalidOptions,
+                        "Multitenancy not enabled, cannot set $tenant in command body",
+                        gMultitenancySupport || !msg.body["$tenant"_sd]);
                 break;
             }
 
@@ -197,7 +202,7 @@ OpMsg OpMsg::parse(const Message& message) try {
                 uassert(ErrorCodes::Unauthorized,
                         "Unsupported Security Token provided",
                         gMultitenancySupport);
-                msg.securityToken = sectionsBuf.read<Validated<BSONObj>>();
+                securityToken = sectionsBuf.read<Validated<BSONObj>>();
                 break;
             }
 
@@ -228,6 +233,10 @@ OpMsg OpMsg::parse(const Message& message) try {
                 *checksum == calculateChecksum(message));
     }
 #endif
+    if (gMultitenancySupport) {
+        msg.validatedTenancyScope =
+            auth::ValidatedTenancyScope::create(client, msg.body, securityToken);
+    }
 
     return msg;
 } catch (const DBException& ex) {
@@ -245,10 +254,13 @@ OpMsg OpMsg::parse(const Message& message) try {
 namespace {
 void serializeHelper(const std::vector<OpMsg::DocumentSequence>& sequences,
                      const BSONObj& body,
-                     const BSONObj& securityToken,
+                     const boost::optional<auth::ValidatedTenancyScope>& validatedTenancyScope,
                      OpMsgBuilder* output) {
-    if (securityToken.nFields() > 0) {
-        output->setSecurityToken(securityToken);
+    if (validatedTenancyScope) {
+        auto securityToken = validatedTenancyScope->getOriginalToken();
+        if (securityToken.nFields() > 0) {
+            output->setSecurityToken(securityToken);
+        }
     }
     for (auto&& seq : sequences) {
         auto docSeq = output->beginDocSequence(seq.name);
@@ -262,13 +274,13 @@ void serializeHelper(const std::vector<OpMsg::DocumentSequence>& sequences,
 
 Message OpMsg::serialize() const {
     OpMsgBuilder builder;
-    serializeHelper(sequences, body, securityToken, &builder);
+    serializeHelper(sequences, body, validatedTenancyScope, &builder);
     return builder.finish();
 }
 
 Message OpMsg::serializeWithoutSizeChecking() const {
     OpMsgBuilder builder;
-    serializeHelper(sequences, body, securityToken, &builder);
+    serializeHelper(sequences, body, validatedTenancyScope, &builder);
     return builder.finishWithoutSizeChecking();
 }
 
@@ -283,9 +295,6 @@ void OpMsg::shareOwnershipWith(const ConstSharedBuffer& buffer) {
             }
         }
     }
-    if (!securityToken.isOwned()) {
-        securityToken.shareOwnershipWith(buffer);
-    }
 }
 
 BSONObjBuilder OpMsgBuilder::beginSecurityToken() {
diff --git a/src/mongo/rpc/op_msg.h b/src/mongo/rpc/op_msg.h
index 74dbb74a9d5..243dbf9a344 100644
--- a/src/mongo/rpc/op_msg.h
+++ b/src/mongo/rpc/op_msg.h
@@ -30,11 +30,13 @@
 #pragma once
 
 #include <algorithm>
+#include <boost/optional.hpp>
 #include <string>
 #include <vector>
 
 #include "mongo/base/string_data.h"
 #include "mongo/bson/bsonobj.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/rpc/message.h"
 
@@ -120,13 +122,13 @@ struct OpMsg {
     /**
      * Parses and returns an OpMsg containing unowned BSON.
      */
-    static OpMsg parse(const Message& message);
+    static OpMsg parse(const Message& message, Client* client = nullptr);
 
     /**
      * Parses and returns an OpMsg containing owned BSON.
      */
-    static OpMsg parseOwned(const Message& message) {
-        auto msg = parse(message);
+    static OpMsg parseOwned(const Message& message, Client* client = nullptr) {
+        auto msg = parse(message, client);
         msg.shareOwnershipWith(message.sharedBuffer());
         return msg;
     }
@@ -157,8 +159,16 @@ struct OpMsg {
     }
 
     BSONObj body;
-    BSONObj securityToken;
     std::vector<DocumentSequence> sequences;
+
+    boost::optional<auth::ValidatedTenancyScope> validatedTenancyScope = boost::none;
+
+    boost::optional<TenantId> getValidatedTenantId() const {
+        if (!validatedTenancyScope) {
+            return boost::none;
+        }
+        return validatedTenancyScope->tenantId();
+    }
 };
 
 /**
@@ -170,12 +180,12 @@ struct OpMsgRequest : public OpMsg {
     OpMsgRequest() = default;
     explicit OpMsgRequest(OpMsg&& generic) : OpMsg(std::move(generic)) {}
 
-    static OpMsgRequest parse(const Message& message) {
-        return OpMsgRequest(OpMsg::parse(message));
+    static OpMsgRequest parse(const Message& message, Client* client = nullptr) {
+        return OpMsgRequest(OpMsg::parse(message, client));
     }
 
-    static OpMsgRequest parseOwned(const Message& message) {
-        return OpMsgRequest(OpMsg::parseOwned(message));
+    static OpMsgRequest parseOwned(const Message& message, Client* client = nullptr) {
+        return OpMsgRequest(OpMsg::parseOwned(message, client));
     }
 
     static OpMsgRequest fromDBAndBody(StringData db,
diff --git a/src/mongo/rpc/op_msg_integration_test.cpp b/src/mongo/rpc/op_msg_integration_test.cpp
index fa54c979648..887477fd64a 100644
--- a/src/mongo/rpc/op_msg_integration_test.cpp
+++ b/src/mongo/rpc/op_msg_integration_test.cpp
@@ -1240,15 +1240,10 @@ TEST(OpMsg, ExhaustWithDBClientCursorBehavesCorrectly) {
     LOGV2(22635, "Finished document insertion.");
 
     // Open an exhaust cursor.
-    int batchSize = 2;
-    auto cursor = conn->query_DEPRECATED(nss,
-                                         BSONObj{},
-                                         Query().sort(BSON("_id" << 1)),
-                                         0,
-                                         0,
-                                         nullptr,
-                                         QueryOption_Exhaust,
-                                         batchSize);
+    FindCommandRequest findCmd{nss};
+    findCmd.setSort(BSON("_id" << 1));
+    findCmd.setBatchSize(2);
+    auto cursor = conn->find(std::move(findCmd), ReadPreferenceSetting{}, ExhaustMode::kOn);
 
     // Verify that the documents are returned properly. Exhaust cursors should still receive results
     // in batches, so we check that these batches correspond to the given specified batch size.
diff --git a/src/mongo/rpc/op_msg_test.cpp b/src/mongo/rpc/op_msg_test.cpp
index ead4dbe1977..ea07a42ea31 100644
--- a/src/mongo/rpc/op_msg_test.cpp
+++ b/src/mongo/rpc/op_msg_test.cpp
@@ -34,7 +34,15 @@
 
 #include "mongo/base/static_assert.h"
 #include "mongo/bson/json.h"
+#include "mongo/db/auth/authorization_manager_impl.h"
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/auth/authorization_session_impl.h"
+#include "mongo/db/auth/authz_manager_external_state_mock.h"
+#include "mongo/db/auth/security_token_gen.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/jsobj.h"
+#include "mongo/db/multitenancy_gen.h"
+#include "mongo/db/service_context_test_fixture.h"
 #include "mongo/logv2/log.h"
 #include "mongo/unittest/log_test.h"
 #include "mongo/unittest/unittest.h"
@@ -44,6 +52,26 @@
 
 
 namespace mongo {
+
+class AuthorizationSessionImplTestHelper {
+public:
+    /**
+     * Synthesize a user with the useTenant privilege and add them to the authorization session.
+     */
+    static void grantUseTenant(Client& client) {
+        User user(UserName("useTenant", "admin"));
+        user.setPrivileges(
+            {Privilege(ResourcePattern::forClusterResource(), ActionType::useTenant)});
+        auto* as = dynamic_cast<AuthorizationSessionImpl*>(AuthorizationSession::get(client));
+        if (as->_authenticatedUser != boost::none) {
+            as->logoutAllDatabases(&client, "AuthorizationSessionImplTestHelper"_sd);
+        }
+        as->_authenticatedUser = std::move(user);
+        as->_authenticationMode = AuthorizationSession::AuthenticationMode::kConnection;
+        as->_updateInternalAuthorizationState();
+    }
+};
+
 namespace rpc {
 namespace test {
 namespace {
@@ -763,6 +791,122 @@ TEST(OpMsgSerializer, SetFlagWorks) {
     }
 }
 
+class OpMsgWithAuth : public mongo::ScopedGlobalServiceContextForTest, public unittest::Test {
+protected:
+    void setUp() final {
+        auto authzManagerState = std::make_unique<AuthzManagerExternalStateMock>();
+        auto authzManager = std::make_unique<AuthorizationManagerImpl>(
+            getServiceContext(), std::move(authzManagerState));
+        authzManager->setAuthEnabled(true);
+        AuthorizationManager::set(getServiceContext(), std::move(authzManager));
+
+        client = getServiceContext()->makeClient("test");
+    }
+
+    BSONObj makeSecurityToken(const UserName& userName) {
+        constexpr auto authUserFieldName = auth::SecurityToken::kAuthenticatedUserFieldName;
+        auto authUser = userName.toBSON(true /* serialize token */);
+        ASSERT_EQ(authUser["tenant"_sd].type(), jstOID);
+        using VTS = auth::ValidatedTenancyScope;
+        return VTS(BSON(authUserFieldName << authUser), VTS::TokenForTestingTag{})
+            .getOriginalToken();
+    }
+
+    ServiceContext::UniqueClient client;
+};
+
+TEST_F(OpMsgWithAuth, ParseValidatedTenancyScopeFromSecurityToken) {
+    gMultitenancySupport = true;
+
+    const auto kTenantId = TenantId(OID::gen());
+    const auto token = makeSecurityToken(UserName("user", "admin", kTenantId));
+    auto msg =
+        OpMsgBytes{
+            kNoFlags,  //
+            kBodySection,
+            fromjson("{ping: 1}"),
+
+            kDocSequenceSection,
+            Sized{
+                "docs",  //
+                fromjson("{a: 1}"),
+                fromjson("{a: 2}"),
+            },
+
+            kSecurityTokenSection,
+            token,
+        }
+            .parse(client.get());
+
+    auto body = BSON("ping" << 1);
+
+    ASSERT(msg.validatedTenancyScope);
+    ASSERT_EQ(msg.validatedTenancyScope->tenantId(), kTenantId);
+}
+
+TEST_F(OpMsgWithAuth, ParseValidatedTenancyScopeFromDollarTenant) {
+    gMultitenancySupport = true;
+    AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+
+    const auto kTenantId = TenantId(OID::gen());
+    const auto body = BSON("ping" << 1 << "$tenant" << kTenantId);
+    auto msg =
+        OpMsgBytes{
+            kNoFlags,  //
+            kBodySection,
+            body,
+
+            kDocSequenceSection,
+            Sized{
+                "docs",  //
+                fromjson("{a: 1}"),
+                fromjson("{a: 2}"),
+            },
+        }
+            .parse(client.get());
+
+    ASSERT(msg.validatedTenancyScope);
+    ASSERT_EQ(msg.validatedTenancyScope->tenantId(), kTenantId);
+}
+
+TEST_F(OpMsgWithAuth, ValidatedTenancyScopeShouldNotBeSerialized) {
+    gMultitenancySupport = true;
+    AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+
+    const auto kTenantId = TenantId(OID::gen());
+    const auto body = BSON("ping" << 1 << "$tenant" << kTenantId);
+    auto msgBytes = OpMsgBytes{
+        kNoFlags,  //
+        kBodySection,
+        body,
+
+        kDocSequenceSection,
+        Sized{
+            "docs",  //
+            fromjson("{a: 1}"),
+            fromjson("{a: 2}"),
+        },
+    };
+    auto msg = msgBytes.parse(client.get());
+    ASSERT(msg.validatedTenancyScope);
+
+    auto serializedMsg = msg.serialize();
+    testSerializer(serializedMsg,
+                   OpMsgBytes{
+                       kNoFlags,  //
+
+                       kDocSequenceSection,
+                       Sized{
+                           "docs",  //
+                           fromjson("{a: 1}"),
+                           fromjson("{a: 2}"),
+                       },
+
+                       kBodySection,
+                       body,
+                   });
+}
+
 TEST(OpMsgRequest, GetDatabaseWorks) {
     OpMsgRequest msg;
     msg.body = fromjson("{$db: 'foo'}");
diff --git a/src/mongo/rpc/op_msg_test.h b/src/mongo/rpc/op_msg_test.h
index 58f25f7417f..38cb4979b14 100644
--- a/src/mongo/rpc/op_msg_test.h
+++ b/src/mongo/rpc/op_msg_test.h
@@ -131,6 +131,10 @@ public:
         return OpMsg::parseOwned(done());
     }
 
+    OpMsg parse(Client* client) {
+        return OpMsg::parseOwned(done(), client);
+    }
+
     OpMsgBytes&& addToSize(int32_t extra) && {
         updateSize(extra);
         return std::move(*this);
diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript
index 3286415f86f..80cef8dfc81 100644
--- a/src/mongo/s/SConscript
+++ b/src/mongo/s/SConscript
@@ -183,7 +183,6 @@ env.Library(
         'database_version.idl',
         'mongod_and_mongos_server_parameters.idl',
         'pm2423_feature_flags.idl',
-        'pm2583_feature_flags.idl',
         'request_types/abort_reshard_collection.idl',
         'request_types/add_shard_request_type.cpp',
         'request_types/get_stats_for_balancing.idl',
@@ -193,7 +192,6 @@ env.Library(
         'request_types/balancer_collection_status.idl',
         'request_types/cleanup_reshard_collection.idl',
         'request_types/clone_catalog_data.idl',
-        'request_types/commit_chunk_migration_request_type.cpp',
         'request_types/commit_reshard_collection.idl',
         'request_types/configure_collection_balancing.idl',
         'request_types/drop_collection_if_uuid_not_matching.idl',
@@ -212,7 +210,6 @@ env.Library(
         'request_types/reshard_collection.idl',
         'request_types/resharding_operation_time.idl',
         'request_types/set_allow_migrations.idl',
-        'request_types/set_shard_version_request.cpp',
         'request_types/sharded_ddl_commands.idl',
         'request_types/update_zone_key_range_request_type.cpp',
         'request_types/wait_for_fail_point.idl',
@@ -477,6 +474,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/startup_warnings_common',
         '$BUILD_DIR/mongo/db/stats/counters',
         '$BUILD_DIR/mongo/db/windows_options' if env.TargetOSIs('windows') else [],
+        '$BUILD_DIR/mongo/logv2/logv2_options',
         '$BUILD_DIR/mongo/transport/message_compressor_options_server',
         '$BUILD_DIR/mongo/transport/service_entry_point',
         '$BUILD_DIR/mongo/transport/transport_layer_manager',
@@ -530,6 +528,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/audit',
         '$BUILD_DIR/mongo/db/auth/authmongos',
         '$BUILD_DIR/mongo/db/change_stream_options_manager',
+        '$BUILD_DIR/mongo/db/change_streams_cluster_parameter',
         '$BUILD_DIR/mongo/db/commands/rwc_defaults_commands',
         '$BUILD_DIR/mongo/db/ftdc/ftdc_mongos',
         '$BUILD_DIR/mongo/db/process_health/fault_manager',
@@ -635,17 +634,16 @@ env.CppUnitTest(
         'comparable_database_version_test.cpp',
         'hedge_options_util_test.cpp',
         'load_balancer_support_test.cpp',
+        'mongos_core_options_stub.cpp',
         'mock_ns_targeter.cpp',
         'mongos_topology_coordinator_test.cpp',
         'request_types/add_shard_request_test.cpp',
         'request_types/add_shard_to_zone_request_test.cpp',
         'request_types/balance_chunk_request_test.cpp',
-        'request_types/commit_chunk_migration_request_test.cpp',
         'request_types/merge_chunks_request_test.cpp',
         'request_types/migration_secondary_throttle_options_test.cpp',
         'request_types/move_chunk_request_test.cpp',
         'request_types/remove_shard_from_zone_request_test.cpp',
-        'request_types/set_shard_version_request_test.cpp',
         'request_types/update_zone_key_range_request_test.cpp',
         'routing_table_history_test.cpp',
         'sessions_collection_sharded_test.cpp',
@@ -670,6 +668,8 @@ env.CppUnitTest(
         '$BUILD_DIR/mongo/db/timeseries/timeseries_options',
         '$BUILD_DIR/mongo/dbtests/mocklib',
         '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client_mock',
+        '$BUILD_DIR/mongo/s/commands/cluster_commands',
+        '$BUILD_DIR/mongo/s/commands/cluster_commands_common',
         '$BUILD_DIR/mongo/s/write_ops/batch_write_types',
         '$BUILD_DIR/mongo/s/write_ops/cluster_write_ops',
         '$BUILD_DIR/mongo/util/net/network',
diff --git a/src/mongo/s/append_raw_responses_test.cpp b/src/mongo/s/append_raw_responses_test.cpp
index 528e8ba4876..1bcfb9c8bc8 100644
--- a/src/mongo/s/append_raw_responses_test.cpp
+++ b/src/mongo/s/append_raw_responses_test.cpp
@@ -27,10 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
-#include "mongo/unittest/unittest.h"
-
 #include "mongo/client/remote_command_targeter_mock.h"
 #include "mongo/db/commands.h"
 #include "mongo/rpc/get_status_from_command_result.h"
@@ -39,6 +35,7 @@
 #include "mongo/s/catalog/type_shard.h"
 #include "mongo/s/cluster_commands_helpers.h"
 #include "mongo/s/sharding_router_test_fixture.h"
+#include "mongo/unittest/unittest.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
@@ -200,7 +197,7 @@ protected:
                                              Timestamp timestamp{1, 0};
                                              return StaleConfigInfo(
                                                  NamespaceString("Foo.Bar"),
-                                                 ChunkVersion(1, 0, epoch, timestamp),
+                                                 ChunkVersion({epoch, timestamp}, {1, 0}),
                                                  boost::none,
                                                  ShardId{"dummy"});
                                          }(),
diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
index 2fceaeb3d4a..55ae5007f48 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
@@ -65,7 +65,6 @@
 #include "mongo/s/client/shard_remote_gen.h"
 #include "mongo/s/database_version.h"
 #include "mongo/s/grid.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
 #include "mongo/s/shard_key_pattern.h"
 #include "mongo/s/shard_util.h"
 #include "mongo/s/write_ops/batch_write_op.h"
diff --git a/src/mongo/s/catalog/sharding_catalog_client_test.cpp b/src/mongo/s/catalog/sharding_catalog_client_test.cpp
index effe9ca0a10..92f2ce78a8d 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_test.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_client_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include <pcrecpp.h>
 
 #include "mongo/bson/json.h"
@@ -60,7 +57,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 namespace {
 
@@ -375,7 +371,7 @@ TEST_F(ShardingCatalogClientTest, GetChunksForNSWithSortAndLimit) {
     chunkA.setCollectionUUID(collUuid);
     chunkA.setMin(BSON("a" << 1));
     chunkA.setMax(BSON("a" << 100));
-    chunkA.setVersion({1, 2, collEpoch, collTimestamp});
+    chunkA.setVersion(ChunkVersion({collEpoch, collTimestamp}, {1, 2}));
     chunkA.setShard(ShardId("shard0000"));
 
     ChunkType chunkB;
@@ -383,10 +379,10 @@ TEST_F(ShardingCatalogClientTest, GetChunksForNSWithSortAndLimit) {
     chunkB.setCollectionUUID(collUuid);
     chunkB.setMin(BSON("a" << 100));
     chunkB.setMax(BSON("a" << 200));
-    chunkB.setVersion({3, 4, collEpoch, collTimestamp});
+    chunkB.setVersion(ChunkVersion({collEpoch, collTimestamp}, {3, 4}));
     chunkB.setShard(ShardId("shard0001"));
 
-    ChunkVersion queryChunkVersion({1, 2, collEpoch, collTimestamp});
+    ChunkVersion queryChunkVersion({collEpoch, collTimestamp}, {1, 2});
 
     const BSONObj chunksQuery(
         BSON(ChunkType::collectionUUID()
@@ -458,7 +454,7 @@ TEST_F(ShardingCatalogClientTest, GetChunksForUUIDNoSortNoLimit) {
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(1, 1);
 
-    ChunkVersion queryChunkVersion({1, 2, collEpoch, collTimestamp});
+    ChunkVersion queryChunkVersion({collEpoch, collTimestamp}, {1, 2});
 
     const BSONObj chunksQuery(
         BSON(ChunkType::collectionUUID()
@@ -507,7 +503,7 @@ TEST_F(ShardingCatalogClientTest, GetChunksForNSInvalidChunk) {
     configTargeter()->setFindHostReturnValue(HostAndPort("TestHost1"));
 
     const auto collUuid = UUID::gen();
-    ChunkVersion queryChunkVersion({1, 2, OID::gen(), Timestamp(1, 1)});
+    ChunkVersion queryChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 2});
 
     const BSONObj chunksQuery(
         BSON(ChunkType::collectionUUID()
@@ -533,14 +529,14 @@ TEST_F(ShardingCatalogClientTest, GetChunksForNSInvalidChunk) {
         chunkA.setCollectionUUID(collUuid);
         chunkA.setMin(BSON("a" << 1));
         chunkA.setMax(BSON("a" << 100));
-        chunkA.setVersion({1, 2, OID::gen(), Timestamp(1, 1)});
+        chunkA.setVersion(ChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 2}));
         chunkA.setShard(ShardId("shard0000"));
 
         ChunkType chunkB;
         chunkB.setCollectionUUID(collUuid);
         chunkB.setMin(BSON("a" << 100));
         chunkB.setMax(BSON("a" << 200));
-        chunkB.setVersion({3, 4, OID::gen(), Timestamp(1, 1)});
+        chunkB.setVersion(ChunkVersion({OID::gen(), Timestamp(1, 1)}, {3, 4}));
         // Missing shard id
 
         return vector<BSONObj>{chunkA.toConfigBSON(), chunkB.toConfigBSON()};
diff --git a/src/mongo/s/catalog/type_chunk.cpp b/src/mongo/s/catalog/type_chunk.cpp
index 18c5cd63bb0..7ed4cc739cc 100644
--- a/src/mongo/s/catalog/type_chunk.cpp
+++ b/src/mongo/s/catalog/type_chunk.cpp
@@ -27,13 +27,8 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/s/catalog/type_chunk.h"
 
-#include <cstring>
-
 #include "mongo/base/status_with.h"
 #include "mongo/bson/bsonobj.h"
 #include "mongo/bson/bsonobjbuilder.h"
@@ -46,7 +41,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
-
 namespace mongo {
 
 const NamespaceString ChunkType::ConfigNS("config.chunks");
@@ -64,8 +58,6 @@ const BSONField<BSONObj> ChunkType::max("max");
 const BSONField<std::string> ChunkType::shard("shard");
 const BSONField<bool> ChunkType::jumbo("jumbo");
 const BSONField<Date_t> ChunkType::lastmod("lastmod");
-const BSONField<OID> ChunkType::epoch("lastmodEpoch");
-const BSONField<Timestamp> ChunkType::timestamp("lastmodTimestamp");
 const BSONField<BSONObj> ChunkType::history("history");
 const BSONField<int64_t> ChunkType::estimatedSizeBytes("estimatedDataSizeBytes");
 const BSONField<bool> ChunkType::historyIsAt40("historyIsAt40");
@@ -298,7 +290,7 @@ StatusWith<ChunkType> ChunkType::parseFromConfigBSON(const BSONObj& source,
         if (versionElem.type() == bsonTimestamp || versionElem.type() == Date) {
             auto chunkLastmod = Timestamp(versionElem._numberLong());
             chunk._version =
-                ChunkVersion(chunkLastmod.getSecs(), chunkLastmod.getInc(), epoch, timestamp);
+                ChunkVersion({epoch, timestamp}, {chunkLastmod.getSecs(), chunkLastmod.getInc()});
         } else {
             return {ErrorCodes::BadValue,
                     str::stream() << "The field " << ChunkType::lastmod() << " cannot be parsed."};
@@ -383,7 +375,7 @@ StatusWith<ChunkType> ChunkType::parseFromShardBSON(const BSONObj& source,
         if (lastmodElem.type() == bsonTimestamp || lastmodElem.type() == Date) {
             auto chunkLastmod = Timestamp(lastmodElem._numberLong());
             chunk._version =
-                ChunkVersion(chunkLastmod.getSecs(), chunkLastmod.getInc(), epoch, timestamp);
+                ChunkVersion({epoch, timestamp}, {chunkLastmod.getSecs(), chunkLastmod.getInc()});
         } else {
             return {ErrorCodes::NoSuchKey,
                     str::stream() << "Expected field " << ChunkType::lastmod() << " not found."};
@@ -393,7 +385,7 @@ StatusWith<ChunkType> ChunkType::parseFromShardBSON(const BSONObj& source,
     return chunk;
 }
 
-StatusWith<ChunkType> ChunkType::parseFromNetworkRequest(const BSONObj& source, bool requireUUID) {
+StatusWith<ChunkType> ChunkType::parseFromNetworkRequest(const BSONObj& source) {
     // Parse history and shard.
     StatusWith<ChunkType> chunkStatus = _parseChunkBase(source);
     if (!chunkStatus.isOK()) {
@@ -413,16 +405,12 @@ StatusWith<ChunkType> ChunkType::parseFromNetworkRequest(const BSONObj& source,
             }
             chunk._collectionUUID = swUUID.getValue();
         } else if (status == ErrorCodes::NoSuchKey) {
-            // Ignore NoSuchKey because before 5.0 chunks don't include a collectionUUID
+            return {ErrorCodes::FailedToParse, str::stream() << "There must be a UUID present"};
         } else {
             return status;
         }
     }
 
-    if (requireUUID && !chunk._collectionUUID) {
-        return {ErrorCodes::FailedToParse, str::stream() << "There must be a UUID present"};
-    }
-
     // Parse min and max.
     {
         auto chunkRangeStatus = ChunkRange::fromBSON(source);
@@ -448,7 +436,7 @@ StatusWith<ChunkType> ChunkType::parseFromNetworkRequest(const BSONObj& source,
     }
 
     // Parse version.
-    chunk._version = ChunkVersion::fromBSONLegacyOrNewerFormat(source, ChunkType::lastmod());
+    chunk._version = ChunkVersion::parse(source[ChunkType::lastmod()]);
 
     return chunk;
 }
diff --git a/src/mongo/s/catalog/type_chunk.h b/src/mongo/s/catalog/type_chunk.h
index c76a0d1c507..30de46edf3a 100644
--- a/src/mongo/s/catalog/type_chunk.h
+++ b/src/mongo/s/catalog/type_chunk.h
@@ -207,8 +207,6 @@ public:
     static const BSONField<std::string> shard;
     static const BSONField<bool> jumbo;
     static const BSONField<Date_t> lastmod;
-    static const BSONField<OID> epoch;
-    static const BSONField<Timestamp> timestamp;
     static const BSONField<BSONObj> history;
     static const BSONField<int64_t> estimatedSizeBytes;
     static const BSONField<bool> historyIsAt40;
@@ -221,7 +219,7 @@ public:
      * {min: <>, max: <>, shard: <>, uuid: <>, history: <>, jumbo: <>, lastmod: <>,
      * lastmodEpoch: <>, lastmodTimestamp: <>}
      */
-    static StatusWith<ChunkType> parseFromNetworkRequest(const BSONObj& source, bool requireUUID);
+    static StatusWith<ChunkType> parseFromNetworkRequest(const BSONObj& source);
 
     /**
      * Constructs a new ChunkType object from BSON with the following format:
@@ -261,15 +259,6 @@ public:
     /**
      * Getters and setters.
      */
-
-    // TODO (SERVER-60792): Get rid of this function once v6.0 branches out. Due to a missing
-    // addition of the UUID field in v5.0 BalanceChunkRequest, it can happen that the field is not
-    // set. Mark as "UNSAFE" to make it clear that this method is just intended to be used for this
-    // specific purpose.
-    bool hasCollectionUUID_UNSAFE() const {
-        return (bool)_collectionUUID;
-    }
-
     const UUID& getCollectionUUID() const {
         invariant(_collectionUUID);
         return *_collectionUUID;
diff --git a/src/mongo/s/catalog/type_chunk_test.cpp b/src/mongo/s/catalog/type_chunk_test.cpp
index 52c172a529a..18c199b69ea 100644
--- a/src/mongo/s/catalog/type_chunk_test.cpp
+++ b/src/mongo/s/catalog/type_chunk_test.cpp
@@ -27,19 +27,15 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
-#include "mongo/s/catalog/type_chunk.h"
-
 #include "mongo/base/status_with.h"
 #include "mongo/db/jsobj.h"
+#include "mongo/s/catalog/type_chunk.h"
 #include "mongo/unittest/unittest.h"
 #include "mongo/util/time_support.h"
 
 namespace mongo {
 namespace {
 
-using std::string;
 using unittest::assertGet;
 
 const BSONObj kMin = BSON("a" << 10);
@@ -51,29 +47,26 @@ TEST(ChunkType, MissingConfigRequiredFields) {
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(1, 1);
 
-    ChunkVersion chunkVersion(1, 2, collEpoch, collTimestamp);
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
 
     BSONObj objModNS =
         BSON(ChunkType::name(OID::gen())
              << ChunkType::min(BSON("a" << 10 << "b" << 10)) << ChunkType::max(BSON("a" << 20))
-             << "lastmod" << Timestamp(chunkVersion.toLong()) << "lastmodEpoch"
-             << chunkVersion.epoch() << ChunkType::shard("shard0001"));
+             << "lastmod" << Timestamp(chunkVersion.toLong()) << ChunkType::shard("shard0001"));
     StatusWith<ChunkType> chunkRes =
         ChunkType::parseFromConfigBSON(objModNS, collEpoch, collTimestamp);
     ASSERT_FALSE(chunkRes.isOK());
 
-    BSONObj objModKeys =
-        BSON(ChunkType::name(OID::gen()) << ChunkType::collectionUUID() << collUuid << "lastmod"
-                                         << Timestamp(chunkVersion.toLong()) << "lastmodEpoch"
-                                         << chunkVersion.epoch() << ChunkType::shard("shard0001"));
+    BSONObj objModKeys = BSON(ChunkType::name(OID::gen())
+                              << ChunkType::collectionUUID() << collUuid << "lastmod"
+                              << Timestamp(chunkVersion.toLong()) << ChunkType::shard("shard0001"));
     chunkRes = ChunkType::parseFromConfigBSON(objModKeys, collEpoch, collTimestamp);
     ASSERT_FALSE(chunkRes.isOK());
 
     BSONObj objModShard = BSON(
         ChunkType::name(OID::gen())
         << ChunkType::collectionUUID() << collUuid << ChunkType::min(BSON("a" << 10 << "b" << 10))
-        << ChunkType::max(BSON("a" << 20)) << "lastmod" << Timestamp(chunkVersion.toLong())
-        << "lastmodEpoch" << chunkVersion.epoch());
+        << ChunkType::max(BSON("a" << 20)) << "lastmod" << Timestamp(chunkVersion.toLong()));
     chunkRes = ChunkType::parseFromConfigBSON(objModShard, collEpoch, collTimestamp);
     ASSERT_FALSE(chunkRes.isOK());
 
@@ -88,7 +81,7 @@ TEST(ChunkType, MissingConfigRequiredFields) {
 TEST(ChunkType, MissingShardRequiredFields) {
     const OID epoch = OID::gen();
     const Timestamp timestamp(1, 1);
-    ChunkVersion chunkVersion(1, 2, epoch, timestamp);
+    ChunkVersion chunkVersion({epoch, timestamp}, {1, 2});
     const auto lastmod = Timestamp(chunkVersion.toLong());
 
     BSONObj objModMin =
@@ -116,15 +109,16 @@ TEST(ChunkType, MissingShardRequiredFields) {
 }
 
 TEST(ChunkType, ToFromShardBSON) {
-    const OID epoch = OID::gen();
-    const Timestamp timestamp(1, 1);
-    ChunkVersion chunkVersion(1, 2, epoch, timestamp);
+    const OID collEpoch = OID::gen();
+    const Timestamp collTimestamp(1, 1);
+
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
     auto lastmod = Timestamp(chunkVersion.toLong());
 
     BSONObj obj = BSON(ChunkType::minShardID(kMin)
                        << ChunkType::max(kMax) << ChunkType::shard(kShard.toString()) << "lastmod"
                        << lastmod);
-    ChunkType shardChunk = assertGet(ChunkType::parseFromShardBSON(obj, epoch, timestamp));
+    ChunkType shardChunk = assertGet(ChunkType::parseFromShardBSON(obj, collEpoch, collTimestamp));
 
     ASSERT_BSONOBJ_EQ(obj, shardChunk.toShardBSON());
 
@@ -139,7 +133,7 @@ TEST(ChunkType, MinAndMaxShardKeysDifferInNumberOfKeys) {
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(1);
 
-    ChunkVersion chunkVersion(1, 2, collEpoch, collTimestamp);
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
     BSONObj obj = BSON(
         ChunkType::name(OID::gen())
         << ChunkType::collectionUUID() << collUuid << ChunkType::min(BSON("a" << 10 << "b" << 10))
@@ -156,7 +150,7 @@ TEST(ChunkType, MinAndMaxShardKeysDifferInKeyNames) {
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(1);
 
-    ChunkVersion chunkVersion(1, 2, collEpoch, collTimestamp);
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
     BSONObj obj =
         BSON(ChunkType::name(OID::gen())
              << ChunkType::collectionUUID() << collUuid << ChunkType::min(BSON("a" << 10))
@@ -173,7 +167,7 @@ TEST(ChunkType, MinToMaxNotAscending) {
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(1);
 
-    ChunkVersion chunkVersion(1, 2, collEpoch, collTimestamp);
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
     BSONObj obj =
         BSON(ChunkType::name(OID::gen())
              << ChunkType::collectionUUID() << collUuid << ChunkType::min(BSON("a" << 20))
@@ -189,7 +183,7 @@ TEST(ChunkType, ToFromConfigBSON) {
     const auto collTimestamp = Timestamp(1);
 
     const auto chunkID = OID::gen();
-    ChunkVersion chunkVersion(1, 2, collEpoch, collTimestamp);
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
     BSONObj obj = BSON(ChunkType::name(chunkID)
                        << ChunkType::collectionUUID() << collUuid << ChunkType::min(BSON("a" << 10))
                        << ChunkType::max(BSON("a" << 20)) << ChunkType::shard("shard0001")
@@ -224,7 +218,7 @@ TEST(ChunkType, BothNsAndUUID) {
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(1);
 
-    ChunkVersion chunkVersion(1, 2, collEpoch, collTimestamp);
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
 
     BSONObj objModNS =
         BSON(ChunkType::name(OID::gen())
@@ -242,7 +236,7 @@ TEST(ChunkType, UUIDPresentAndNsMissing) {
     const auto collEpoch = OID::gen();
     const auto collTimestamp = Timestamp(1);
 
-    ChunkVersion chunkVersion(1, 2, collEpoch, collTimestamp);
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
 
     BSONObj objModNS = BSON(
         ChunkType::name(OID::gen())
@@ -255,32 +249,23 @@ TEST(ChunkType, UUIDPresentAndNsMissing) {
     ASSERT_TRUE(chunkRes.isOK());
 }
 
-TEST(ChunkType, NewAndOldChunkVersionFormat) {
+TEST(ChunkType, ParseFromNetworkRequest) {
     const auto collEpoch = OID::gen();
-    const auto collTimestamp = Timestamp(1);
+    const auto collTimestamp = Timestamp(1, 0);
 
-    ChunkVersion chunkVersion(1, 2, collEpoch, collTimestamp);
+    ChunkVersion chunkVersion({collEpoch, collTimestamp}, {1, 2});
 
-    BSONObj objModOldCVFormat =
+    auto chunk = assertGet(ChunkType::parseFromNetworkRequest(
         BSON(ChunkType::name(OID::gen())
              << ChunkType::collectionUUID() << mongo::UUID::gen()
              << ChunkType::min(BSON("a" << 10 << "b" << 10)) << ChunkType::max(BSON("a" << 20))
              << "lastmod"
-             << BSON("v" << Timestamp(chunkVersion.toLong()) << "e" << chunkVersion.epoch() << "t"
-                         << chunkVersion.getTimestamp())
-             << ChunkType::shard("shard0001"));
-    StatusWith<ChunkType> chunkRes = ChunkType::parseFromNetworkRequest(objModOldCVFormat, true);
+             << BSON("e" << chunkVersion.epoch() << "t" << chunkVersion.getTimestamp() << "v"
+                         << Timestamp(chunkVersion.toLong()))
+             << ChunkType::shard("shard0001"))));
 
-    ASSERT_TRUE(chunkRes.isOK());
-
-    BSONObj objModNewCVFormat = BSON(
-        ChunkType::name(OID::gen())
-        << ChunkType::collectionUUID() << mongo::UUID::gen()
-        << ChunkType::min(BSON("a" << 10 << "b" << 10)) << ChunkType::max(BSON("a" << 20))
-        << "lastmod" << Timestamp(chunkVersion.toLong()) << "lastmodEpoch" << chunkVersion.epoch()
-        << "lastmodTimestamp" << chunkVersion.getTimestamp() << ChunkType::shard("shard0001"));
-    chunkRes = ChunkType::parseFromNetworkRequest(objModNewCVFormat, true);
-    ASSERT_TRUE(chunkRes.isOK());
+    ASSERT_EQ("shard0001", chunk.getShard());
+    ASSERT_EQ(chunkVersion, chunk.getVersion());
 }
 
 TEST(ChunkRange, BasicBSONParsing) {
diff --git a/src/mongo/s/catalog_cache.cpp b/src/mongo/s/catalog_cache.cpp
index 98f54e9e9d3..512dd4d2e0b 100644
--- a/src/mongo/s/catalog_cache.cpp
+++ b/src/mongo/s/catalog_cache.cpp
@@ -27,13 +27,6 @@
  *    it in the license file.
  */
 
-
-#define LOGV2_FOR_CATALOG_REFRESH(ID, DLEVEL, MESSAGE, ...) \
-    LOGV2_DEBUG_OPTIONS(                                    \
-        ID, DLEVEL, {logv2::LogComponent::kShardingCatalogRefresh}, MESSAGE, ##__VA_ARGS__)
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/s/catalog_cache.h"
 
 #include "mongo/bson/bsonobjbuilder.h"
diff --git a/src/mongo/s/catalog_cache_loader.h b/src/mongo/s/catalog_cache_loader.h
index 18a26324fbc..811391ded61 100644
--- a/src/mongo/s/catalog_cache_loader.h
+++ b/src/mongo/s/catalog_cache_loader.h
@@ -181,3 +181,7 @@ protected:
 };
 
 }  // namespace mongo
+
+#define LOGV2_FOR_CATALOG_REFRESH(ID, DLEVEL, MESSAGE, ...) \
+    LOGV2_DEBUG_OPTIONS(                                    \
+        ID, DLEVEL, {logv2::LogComponent::kShardingCatalogRefresh}, MESSAGE, ##__VA_ARGS__)
diff --git a/src/mongo/s/catalog_cache_refresh_test.cpp b/src/mongo/s/catalog_cache_refresh_test.cpp
index 8df10d20a43..4b44f5693de 100644
--- a/src/mongo/s/catalog_cache_refresh_test.cpp
+++ b/src/mongo/s/catalog_cache_refresh_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/concurrency/locker_noop.h"
 #include "mongo/db/pipeline/aggregation_request_helper.h"
 #include "mongo/s/catalog/type_chunk.h"
@@ -42,7 +39,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
 
-
 namespace mongo {
 namespace {
 
@@ -115,7 +111,7 @@ TEST_F(CatalogCacheRefreshTest, FullLoad) {
 
     expectGetDatabase();
 
-    ChunkVersion version(1, 0, epoch, timestamp);
+    ChunkVersion version({epoch, timestamp}, {1, 0});
 
     ChunkType chunk1(reshardingUUID,
                      {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << -100)},
@@ -328,7 +324,7 @@ TEST_F(CatalogCacheRefreshTest, ChunksBSONCorrupted) {
         const auto chunk1 =
             ChunkType(coll.getUuid(),
                       {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)},
-                      ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+                      ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
                       {"0"});
         return std::vector<BSONObj>{/* collection */
                                     coll.toBSON(),
@@ -359,7 +355,7 @@ TEST_F(CatalogCacheRefreshTest, FullLoadMissingChunkWithLowestVersion) {
     expectGetDatabase();
 
     const auto incompleteChunks = [&]() {
-        ChunkVersion version(1, 0, epoch, timestamp);
+        ChunkVersion version({epoch, timestamp}, {1, 0});
 
         // Chunk from (MinKey, -100) is missing (as if someone is dropping the collection
         // concurrently) and has the lowest version.
@@ -415,7 +411,7 @@ TEST_F(CatalogCacheRefreshTest, FullLoadMissingChunkWithHighestVersion) {
     expectGetDatabase();
 
     const auto incompleteChunks = [&]() {
-        ChunkVersion version(1, 0, epoch, timestamp);
+        ChunkVersion version({epoch, timestamp}, {1, 0});
 
         // Chunk from (MinKey, -100) is missing (as if someone is dropping the collection
         // concurrently) and has the higest version.
@@ -473,7 +469,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadMissingChunkWithLowestVersion) {
     auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     const auto incompleteChunks = [&]() {
-        ChunkVersion version(1, 0, epoch, timestamp);
+        ChunkVersion version({epoch, timestamp}, {1, 0});
 
         // Chunk from (MinKey, -100) is missing (as if someone is dropping the collection
         // concurrently) and has the lowest version.
@@ -531,7 +527,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadMissingChunkWithHighestVersion) {
     auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     const auto incompleteChunks = [&]() {
-        ChunkVersion version(1, 0, epoch, timestamp);
+        ChunkVersion version({epoch, timestamp}, {1, 0});
 
         // Chunk from (MinKey, -100) is missing (as if someone is dropping the collection
         // concurrently) and has the higest version.
@@ -621,7 +617,7 @@ TEST_F(CatalogCacheRefreshTest, ChunkEpochChangeDuringIncrementalLoadRecoveryAft
         // recreated collection.
         ChunkType chunk3(coll.getUuid(),
                          {BSON("_id" << 100), shardKeyPattern.getKeyPattern().globalMax()},
-                         ChunkVersion(5, 2, newEpoch, newTimestamp),
+                         ChunkVersion({newEpoch, newTimestamp}, {5, 2}),
                          {"1"});
         chunk3.setName(OID::gen());
 
@@ -631,7 +627,7 @@ TEST_F(CatalogCacheRefreshTest, ChunkEpochChangeDuringIncrementalLoadRecoveryAft
     });
 
     // On the second retry attempt, return the correct set of chunks from the recreated collection
-    ChunkVersion newVersion(5, 0, newEpoch, newTimestamp);
+    ChunkVersion newVersion({newEpoch, newTimestamp}, {5, 0});
     onFindCommand([&](const RemoteCommandRequest& request) {
         const auto opMsg = OpMsgRequest::fromDBAndBody(request.dbname, request.cmdObj);
         const auto aggRequest = unittest::assertGet(
@@ -676,9 +672,9 @@ TEST_F(CatalogCacheRefreshTest, ChunkEpochChangeDuringIncrementalLoadRecoveryAft
     ASSERT(cm.isSharded());
     ASSERT_EQ(3, cm.numChunks());
     ASSERT_EQ(newVersion, cm.getVersion());
-    ASSERT_EQ(ChunkVersion(5, 1, newVersion.epoch(), newVersion.getTimestamp()),
+    ASSERT_EQ(ChunkVersion({newVersion.epoch(), newVersion.getTimestamp()}, {5, 1}),
               cm.getVersion({"0"}));
-    ASSERT_EQ(ChunkVersion(5, 2, newVersion.epoch(), newVersion.getTimestamp()),
+    ASSERT_EQ(ChunkVersion({newVersion.epoch(), newVersion.getTimestamp()}, {5, 2}),
               cm.getVersion({"1"}));
 }
 
@@ -693,7 +689,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterCollectionEpochChange) {
     auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     ChunkVersion oldVersion = initialRoutingInfo.getVersion();
-    ChunkVersion newVersion(1, 0, OID::gen(), Timestamp(2));
+    ChunkVersion newVersion({OID::gen(), Timestamp(2)}, {1, 0});
     const UUID uuid = initialRoutingInfo.getUUID();
 
     // Return collection with a different epoch and a set of chunks, which represent a split
@@ -736,9 +732,9 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterCollectionEpochChange) {
     ASSERT(cm.isSharded());
     ASSERT_EQ(2, cm.numChunks());
     ASSERT_EQ(newVersion, cm.getVersion());
-    ASSERT_EQ(ChunkVersion(1, 0, newVersion.epoch(), newVersion.getTimestamp()),
+    ASSERT_EQ(ChunkVersion({newVersion.epoch(), newVersion.getTimestamp()}, {1, 0}),
               cm.getVersion({"0"}));
-    ASSERT_EQ(ChunkVersion(1, 1, newVersion.epoch(), newVersion.getTimestamp()),
+    ASSERT_EQ(ChunkVersion({newVersion.epoch(), newVersion.getTimestamp()}, {1, 1}),
               cm.getVersion({"1"}));
 }
 
@@ -798,7 +794,8 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterSplit) {
     ASSERT_EQ(2, cm.numChunks());
     ASSERT_EQ(version, cm.getVersion());
     ASSERT_EQ(version, cm.getVersion({"0"}));
-    ASSERT_EQ(ChunkVersion(0, 0, version.epoch(), version.getTimestamp()), cm.getVersion({"1"}));
+    ASSERT_EQ(ChunkVersion({version.epoch(), version.getTimestamp()}, {0, 0}),
+              cm.getVersion({"1"}));
 }
 
 TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterMoveWithReshardingFieldsAdded) {
@@ -877,7 +874,8 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterMoveLastChunkWithReshardingF
     ASSERT(cm.isSharded());
     ASSERT_EQ(1, cm.numChunks());
     ASSERT_EQ(version, cm.getVersion());
-    ASSERT_EQ(ChunkVersion(0, 0, version.epoch(), version.getTimestamp()), cm.getVersion({"0"}));
+    ASSERT_EQ(ChunkVersion({version.epoch(), version.getTimestamp()}, {0, 0}),
+              cm.getVersion({"0"}));
     ASSERT_EQ(version, cm.getVersion({"1"}));
     ASSERT(boost::none == cm.getReshardingFields());
 }
diff --git a/src/mongo/s/catalog_cache_test.cpp b/src/mongo/s/catalog_cache_test.cpp
index bb22d6c0915..b41aafde12c 100644
--- a/src/mongo/s/catalog_cache_test.cpp
+++ b/src/mongo/s/catalog_cache_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include <boost/optional/optional_io.hpp>
 
 #include "mongo/s/catalog/type_database_gen.h"
@@ -264,7 +261,7 @@ TEST_F(CatalogCacheTest, OnStaleDatabaseVersionNoVersion) {
 
 TEST_F(CatalogCacheTest, OnStaleShardVersionWithSameVersion) {
     const auto dbVersion = DatabaseVersion(UUID::gen(), Timestamp(1, 1));
-    const auto cachedCollVersion = ChunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+    const auto cachedCollVersion = ChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
 
     loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], dbVersion)});
     loadCollection(cachedCollVersion);
@@ -275,7 +272,7 @@ TEST_F(CatalogCacheTest, OnStaleShardVersionWithSameVersion) {
 
 TEST_F(CatalogCacheTest, OnStaleShardVersionWithNoVersion) {
     const auto dbVersion = DatabaseVersion(UUID::gen(), Timestamp(1, 1));
-    const auto cachedCollVersion = ChunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+    const auto cachedCollVersion = ChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
 
     loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], dbVersion)});
     loadCollection(cachedCollVersion);
@@ -288,9 +285,9 @@ TEST_F(CatalogCacheTest, OnStaleShardVersionWithNoVersion) {
 
 TEST_F(CatalogCacheTest, OnStaleShardVersionWithGraterVersion) {
     const auto dbVersion = DatabaseVersion(UUID::gen(), Timestamp(1, 1));
-    const auto cachedCollVersion = ChunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+    const auto cachedCollVersion = ChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
     const auto wantedCollVersion =
-        ChunkVersion(2, 0, cachedCollVersion.epoch(), cachedCollVersion.getTimestamp());
+        ChunkVersion({cachedCollVersion.epoch(), cachedCollVersion.getTimestamp()}, {2, 0});
 
     loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], dbVersion)});
     loadCollection(cachedCollVersion);
@@ -304,7 +301,7 @@ TEST_F(CatalogCacheTest, OnStaleShardVersionWithGraterVersion) {
 TEST_F(CatalogCacheTest, TimeseriesFieldsAreProperlyPropagatedOnCC) {
     const auto dbVersion = DatabaseVersion(UUID::gen(), Timestamp(1, 1));
     const auto epoch = OID::gen();
-    const auto version = ChunkVersion(1, 0, epoch, Timestamp(42));
+    const auto version = ChunkVersion({epoch, Timestamp(42)}, {1, 0});
 
     loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], dbVersion)});
 
@@ -360,7 +357,7 @@ TEST_F(CatalogCacheTest, TimeseriesFieldsAreProperlyPropagatedOnCC) {
 TEST_F(CatalogCacheTest, LookupCollectionWithInvalidOptions) {
     const auto dbVersion = DatabaseVersion(UUID::gen(), Timestamp(1, 1));
     const auto epoch = OID::gen();
-    const auto version = ChunkVersion(1, 0, epoch, Timestamp(42));
+    const auto version = ChunkVersion({epoch, Timestamp(42)}, {1, 0});
 
     loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], dbVersion)});
 
diff --git a/src/mongo/s/catalog_cache_test_fixture.cpp b/src/mongo/s/catalog_cache_test_fixture.cpp
index 6e66a30d6b2..b83657c246e 100644
--- a/src/mongo/s/catalog_cache_test_fixture.cpp
+++ b/src/mongo/s/catalog_cache_test_fixture.cpp
@@ -27,14 +27,8 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/s/catalog_cache_test_fixture.h"
 
-#include <memory>
-#include <set>
-#include <vector>
-
 #include "mongo/client/remote_command_targeter_factory_mock.h"
 #include "mongo/client/remote_command_targeter_mock.h"
 #include "mongo/db/client.h"
@@ -130,7 +124,7 @@ ChunkManager CatalogCacheTestFixture::makeChunkManager(
     bool unique,
     const std::vector<BSONObj>& splitPoints,
     boost::optional<ReshardingFields> reshardingFields) {
-    ChunkVersion version(1, 0, OID::gen(), Timestamp(42) /* timestamp */);
+    ChunkVersion version({OID::gen(), Timestamp(42)}, {1, 0});
 
     DatabaseType db(nss.db().toString(), {"0"}, DatabaseVersion(UUID::gen(), Timestamp()));
 
@@ -270,7 +264,7 @@ ChunkManager CatalogCacheTestFixture::loadRoutingTableWithTwoChunksAndTwoShardsI
         CollectionType collType(
             nss, epoch, timestamp, Date_t::now(), uuid, shardKeyPattern.toBSON());
 
-        ChunkVersion version(1, 0, epoch, timestamp);
+        ChunkVersion version({epoch, timestamp}, {1, 0});
 
         ChunkType chunk1(
             uuid, {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)}, version, {"0"});
diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp
index 92e54aa83bb..593b475ed46 100644
--- a/src/mongo/s/chunk_manager.cpp
+++ b/src/mongo/s/chunk_manager.cpp
@@ -130,6 +130,11 @@ void validateChunkIsNotOlderThan(const std::shared_ptr<ChunkInfo>& chunk,
 
 }  // namespace
 
+ChunkMap::ChunkMap(OID epoch, const Timestamp& timestamp, size_t initialCapacity)
+    : _collectionVersion({epoch, timestamp}, {0, 0}) {
+    _chunkMap.reserve(initialCapacity);
+}
+
 ShardVersionMap ChunkMap::constructShardVersionMap() const {
     ShardVersionMap shardVersions;
     ChunkVector::const_iterator current = _chunkMap.cbegin();
@@ -212,10 +217,7 @@ void ChunkMap::appendChunk(const std::shared_ptr<ChunkInfo>& chunk) {
     appendChunkTo(_chunkMap, chunk);
     const auto chunkVersion = chunk->getLastmod();
     if (_collectionVersion.isOlderThan(chunkVersion)) {
-        _collectionVersion = ChunkVersion(chunkVersion.majorVersion(),
-                                          chunkVersion.minorVersion(),
-                                          chunkVersion.epoch(),
-                                          _collTimestamp);
+        _collectionVersion = chunkVersion;
     }
 }
 
@@ -317,7 +319,7 @@ ChunkMap::_overlappingBounds(const BSONObj& min, const BSONObj& max, bool isMaxI
 }
 
 ShardVersionTargetingInfo::ShardVersionTargetingInfo(const OID& epoch, const Timestamp& timestamp)
-    : shardVersion(0, 0, epoch, timestamp) {}
+    : shardVersion({epoch, timestamp}, {0, 0}) {}
 
 RoutingTableHistory::RoutingTableHistory(
     NamespaceString nss,
@@ -748,7 +750,7 @@ ChunkVersion RoutingTableHistory::_getVersion(const ShardId& shardName,
         // Shards without explicitly tracked shard versions (meaning they have no chunks) always
         // have a version of (0, 0, epoch, timestamp)
         const auto collVersion = _chunkMap.getVersion();
-        return ChunkVersion(0, 0, collVersion.epoch(), collVersion.getTimestamp());
+        return ChunkVersion({collVersion.epoch(), collVersion.getTimestamp()}, {0, 0});
     }
 
     if (throwOnStaleShard && gEnableFinerGrainedCatalogCacheRefresh) {
diff --git a/src/mongo/s/chunk_manager.h b/src/mongo/s/chunk_manager.h
index 00c75957d37..7114caed9e7 100644
--- a/src/mongo/s/chunk_manager.h
+++ b/src/mongo/s/chunk_manager.h
@@ -75,10 +75,7 @@ class ChunkMap {
     using ChunkVector = std::vector<std::shared_ptr<ChunkInfo>>;
 
 public:
-    explicit ChunkMap(OID epoch, const Timestamp& timestamp, size_t initialCapacity = 0)
-        : _collectionVersion(0, 0, epoch, timestamp), _collTimestamp(timestamp) {
-        _chunkMap.reserve(initialCapacity);
-    }
+    ChunkMap(OID epoch, const Timestamp& timestamp, size_t initialCapacity = 0);
 
     size_t size() const {
         return _chunkMap.size();
@@ -130,14 +127,6 @@ private:
 
     // Max version across all chunks
     ChunkVersion _collectionVersion;
-
-    // Represents the timestamp present in config.collections for this ChunkMap.
-    //
-    // Note that due to the way Phase 1 of the FCV upgrade writes timestamps to chunks
-    // (non-atomically), it is possible that chunks exist with timestamps, but the corresponding
-    // config.collections entry doesn't. In this case, the chunks timestamp should be ignored when
-    // computing the collection version and we should use _collTimestamp instead.
-    Timestamp _collTimestamp;
 };
 
 /**
diff --git a/src/mongo/s/chunk_manager_query_test.cpp b/src/mongo/s/chunk_manager_query_test.cpp
index c7a95b8020c..936175610f7 100644
--- a/src/mongo/s/chunk_manager_query_test.cpp
+++ b/src/mongo/s/chunk_manager_query_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include <set>
 
 #include "mongo/db/catalog/catalog_test_fixture.h"
@@ -42,7 +39,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
 
-
 namespace mongo {
 namespace {
 
@@ -506,7 +502,7 @@ TEST_F(ChunkManagerQueryTest, SimpleCollationNumbersMultiShard) {
 TEST_F(ChunkManagerQueryTest, SnapshotQueryWithMoreShardsThanLatestMetadata) {
     const auto uuid = UUID::gen();
     const auto epoch = OID::gen();
-    ChunkVersion version(1, 0, epoch, Timestamp(1, 1));
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     ChunkType chunk0(uuid, {BSON("x" << MINKEY), BSON("x" << 0)}, version, ShardId("0"));
     chunk0.setName(OID::gen());
diff --git a/src/mongo/s/chunk_manager_refresh_bm.cpp b/src/mongo/s/chunk_manager_refresh_bm.cpp
index 3c7f3adb6b3..12253be4ab2 100644
--- a/src/mongo/s/chunk_manager_refresh_bm.cpp
+++ b/src/mongo/s/chunk_manager_refresh_bm.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include <benchmark/benchmark.h>
 
 #include "mongo/base/init.h"
@@ -77,7 +75,7 @@ CollectionMetadata makeChunkManagerWithShardSelector(int nShards,
     for (uint32_t i = 0; i < nChunks; ++i) {
         chunks.emplace_back(collUuid,
                             getRangeForChunk(i, nChunks),
-                            ChunkVersion{i + 1, 0, collEpoch, Timestamp(1, 0)},
+                            ChunkVersion({collEpoch, Timestamp(1, 0)}, {i + 1, 0}),
                             selectShard(i, nShards, nChunks));
     }
 
@@ -169,7 +167,7 @@ auto BM_FullBuildOfChunkManager(benchmark::State& state, ShardSelectorFn selectS
     for (uint32_t i = 0; i < nChunks; ++i) {
         chunks.emplace_back(collUuid,
                             getRangeForChunk(i, nChunks),
-                            ChunkVersion{i + 1, 0, collEpoch, Timestamp(1, 0)},
+                            ChunkVersion({collEpoch, Timestamp(1, 0)}, {i + 1, 0}),
                             selectShard(i, nShards, nChunks));
     }
 
diff --git a/src/mongo/s/chunk_map_test.cpp b/src/mongo/s/chunk_map_test.cpp
index 6514fc00745..88378ff53e1 100644
--- a/src/mongo/s/chunk_map_test.cpp
+++ b/src/mongo/s/chunk_map_test.cpp
@@ -27,13 +27,10 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/s/chunk_manager.h"
 #include "mongo/unittest/unittest.h"
 
 namespace mongo {
-
 namespace {
 
 const NamespaceString kNss("TestDB", "TestColl");
@@ -58,7 +55,7 @@ private:
 
 TEST_F(ChunkMapTest, TestAddChunk) {
     const OID epoch = OID::gen();
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     auto chunk = std::make_shared<ChunkInfo>(
         ChunkType{uuid(),
@@ -75,7 +72,7 @@ TEST_F(ChunkMapTest, TestAddChunk) {
 TEST_F(ChunkMapTest, TestEnumerateAllChunks) {
     const OID epoch = OID::gen();
     ChunkMap chunkMap{epoch, Timestamp(1, 1)};
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     auto newChunkMap = chunkMap.createMerged(
         {std::make_shared<ChunkInfo>(
@@ -110,7 +107,7 @@ TEST_F(ChunkMapTest, TestEnumerateAllChunks) {
 TEST_F(ChunkMapTest, TestIntersectingChunk) {
     const OID epoch = OID::gen();
     ChunkMap chunkMap{epoch, Timestamp(1, 1)};
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     auto newChunkMap = chunkMap.createMerged(
         {std::make_shared<ChunkInfo>(
@@ -140,7 +137,7 @@ TEST_F(ChunkMapTest, TestIntersectingChunk) {
 TEST_F(ChunkMapTest, TestEnumerateOverlappingChunks) {
     const OID epoch = OID::gen();
     ChunkMap chunkMap{epoch, Timestamp(1, 1)};
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     auto newChunkMap = chunkMap.createMerged(
         {std::make_shared<ChunkInfo>(
diff --git a/src/mongo/s/chunk_test.cpp b/src/mongo/s/chunk_test.cpp
index 2902c0e41dd..d1c595c05e8 100644
--- a/src/mongo/s/chunk_test.cpp
+++ b/src/mongo/s/chunk_test.cpp
@@ -27,14 +27,11 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/db/namespace_string.h"
 #include "mongo/s/catalog/type_chunk.h"
 #include "mongo/s/chunk.h"
 #include "mongo/s/chunk_version.h"
 #include "mongo/s/shard_id.h"
-
 #include "mongo/unittest/unittest.h"
 
 namespace mongo {
@@ -48,7 +45,7 @@ const KeyPattern kShardKeyPattern(BSON("a" << 1));
 TEST(ChunkTest, HasMovedSincePinnedTimestamp) {
     const OID epoch = OID::gen();
     const UUID uuid = UUID::gen();
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     ChunkType chunkType(uuid,
                         ChunkRange{kShardKeyPattern.globalMin(), kShardKeyPattern.globalMax()},
@@ -65,7 +62,7 @@ TEST(ChunkTest, HasMovedSincePinnedTimestamp) {
 TEST(ChunkTest, HasMovedAndReturnedSincePinnedTimestamp) {
     const OID epoch = OID::gen();
     const UUID uuid = UUID::gen();
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     ChunkType chunkType(uuid,
                         ChunkRange{kShardKeyPattern.globalMin(), kShardKeyPattern.globalMax()},
@@ -83,7 +80,7 @@ TEST(ChunkTest, HasMovedAndReturnedSincePinnedTimestamp) {
 TEST(ChunkTest, HasNotMovedSincePinnedTimestamp) {
     const OID epoch = OID::gen();
     const UUID uuid = UUID::gen();
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     ChunkType chunkType(uuid,
                         ChunkRange{kShardKeyPattern.globalMin(), kShardKeyPattern.globalMax()},
@@ -101,7 +98,7 @@ TEST(ChunkTest, HasNotMovedSincePinnedTimestamp) {
 TEST(ChunkTest, HasNoHistoryValidForPinnedTimestamp_OneEntry) {
     const OID epoch = OID::gen();
     const UUID uuid = UUID::gen();
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     ChunkType chunkType(uuid,
                         ChunkRange{kShardKeyPattern.globalMin(), kShardKeyPattern.globalMax()},
@@ -117,7 +114,7 @@ TEST(ChunkTest, HasNoHistoryValidForPinnedTimestamp_OneEntry) {
 TEST(ChunkTest, HasNoHistoryValidForPinnedTimestamp_MoreThanOneEntry) {
     const OID epoch = OID::gen();
     const UUID uuid = UUID::gen();
-    ChunkVersion version{1, 0, epoch, Timestamp(1, 1)};
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
 
     ChunkType chunkType(uuid,
                         ChunkRange{kShardKeyPattern.globalMin(), kShardKeyPattern.globalMax()},
diff --git a/src/mongo/s/chunk_version.cpp b/src/mongo/s/chunk_version.cpp
index 8da764c2080..ca260776a34 100644
--- a/src/mongo/s/chunk_version.cpp
+++ b/src/mongo/s/chunk_version.cpp
@@ -30,7 +30,6 @@
 #include "mongo/s/chunk_version.h"
 
 #include "mongo/s/chunk_version_gen.h"
-#include "mongo/s/pm2583_feature_flags_gen.h"
 #include "mongo/util/str.h"
 
 namespace mongo {
@@ -57,165 +56,12 @@ std::string CollectionGeneration::toString() const {
     return str::stream() << _epoch << "|" << _timestamp;
 }
 
-ChunkVersion ChunkVersion::_parseArrayOrObjectPositionalFormat(const BSONObj& obj) {
-    BSONObjIterator it(obj);
-    uassert(ErrorCodes::BadValue, "Unexpected empty version array", it.more());
-
-    // Expect the major and minor versions (must be present)
-    uint64_t combined;
-    {
-        BSONElement tsPart = it.next();
-        uassert(ErrorCodes::TypeMismatch,
-                str::stream() << "Invalid type " << tsPart.type()
-                              << " for version major and minor part.",
-                tsPart.type() == bsonTimestamp);
-        combined = tsPart.timestamp().asULL();
-    }
-
-    // Expect the epoch OID (must be present)
-    boost::optional<OID> epoch;
-    {
-        BSONElement epochPart = it.next();
-        uassert(ErrorCodes::TypeMismatch,
-                str::stream() << "Invalid type " << epochPart.type() << " for version epoch part.",
-                epochPart.type() == jstOID);
-        epoch = epochPart.OID();
-    }
-
-    BSONElement nextElem = it.next();
-
-    // TODO SERVER-59105: remove once 6.0 is last-lts. For backward compatibility reasons 5.0
-    // routers sends canThrowSSVOnIgnored even though it is not used, so we attempt to parse and
-    // ignore it.
-    if (nextElem.type() == BSONType::Bool) {
-        nextElem = it.next();
-    }
-
-    // Check for timestamp
-    boost::optional<Timestamp> timestamp;
-    if (nextElem.type() == bsonTimestamp) {
-        timestamp = nextElem.timestamp();
-    } else if (nextElem.eoo() && (epoch == UNSHARDED().epoch() || epoch == IGNORED().epoch())) {
-        // In 5.0 binaries, the timestamp is not present in UNSHARDED and IGNORED versions
-        timestamp =
-            (epoch == UNSHARDED().epoch() ? UNSHARDED().getTimestamp() : IGNORED().getTimestamp());
-    } else {
-        uasserted(ErrorCodes::TypeMismatch,
-                  str::stream() << "Invalid type " << nextElem.type()
-                                << " for version timestamp part.");
-    }
-
-    ChunkVersion version;
-    version._combined = combined;
-    version._epoch = *epoch;
-    version._timestamp = *timestamp;
-    return version;
-}
-
-StatusWith<ChunkVersion> ChunkVersion::_parseLegacyWithField(const BSONObj& obj, StringData field) {
-    // Expect the major and minor (must always exist)
-    uint64_t combined;
-    {
-        auto versionElem = obj[field];
-        if (versionElem.eoo())
-            return {ErrorCodes::NoSuchKey,
-                    str::stream() << "Expected field " << field << " not found."};
-
-        if (versionElem.type() == bsonTimestamp || versionElem.type() == Date) {
-            combined = versionElem._numberLong();
-        } else {
-            return {ErrorCodes::TypeMismatch,
-                    str::stream() << "Invalid type " << versionElem.type()
-                                  << " for version major and minor part."};
-        }
-    }
-
-    // Expect the epoch OID
-    //
-    // TODO: Confirm whether the epoch can still be missing in upgrade chains that started from
-    //       pre-2.4 versions anymore (after FCV 4.4 -> 5.0 upgrade) ?
-    boost::optional<OID> epoch;
-    {
-        const auto epochField = field + "Epoch";
-        auto epochElem = obj[epochField];
-        if (epochElem.type() == jstOID) {
-            epoch = epochElem.OID();
-        } else if (!epochElem.eoo()) {
-            return {ErrorCodes::TypeMismatch,
-                    str::stream() << "Invalid type " << epochElem.type()
-                                  << " for version epoch part."};
-        }
-    }
-
-    // Expect the timestamp (can be missing only in the case of pre-5.0 UNSHARDED and IGNORED
-    // versions)
-    boost::optional<Timestamp> timestamp;
-    {
-        const auto timestampField = field + "Timestamp";
-        auto timestampElem = obj[timestampField];
-        if (timestampElem.type() == bsonTimestamp) {
-            timestamp = timestampElem.timestamp();
-        } else if (!timestampElem.eoo()) {
-            return {ErrorCodes::TypeMismatch,
-                    str::stream() << "Invalid type " << timestampElem.type()
-                                  << " for version timestamp part."};
-        }
-    }
-
-    if (epoch && timestamp) {
-        // Expected situation
-    } else if (epoch && !timestamp) {
-        if (epoch == UNSHARDED().epoch() || epoch == IGNORED().epoch()) {
-            // In 5.0 binaries, the timestamp is not present in UNSHARDED and IGNORED versions
-            timestamp = (epoch == UNSHARDED().epoch() ? UNSHARDED().getTimestamp()
-                                                      : IGNORED().getTimestamp());
-        } else {
-            uasserted(6278300, "Timestamp must be present if epoch exists.");
-        }
-    } else if (!epoch && timestamp) {
-        uasserted(6278301, "Epoch must be present if timestamp exists.");
-    } else {
-        // Can happen in upgrade chains that started from pre-2.4 versions or in the case of
-        // persistence for ShardCollectionType
-    }
-
-    ChunkVersion version;
-    version._combined = combined;
-    version._epoch = epoch.value_or(OID());
-    version._timestamp = timestamp.value_or(Timestamp());
-    return version;
-}
-
-ChunkVersion ChunkVersion::fromBSONLegacyOrNewerFormat(const BSONObj& obj, StringData field) {
-    // New format.
-    if (obj[field].isABSONObj()) {
-        return parse(obj[field]);
-    }
-
-    // Legacy format.
-    return uassertStatusOK(ChunkVersion::_parseLegacyWithField(obj, field));
-}
-
-ChunkVersion ChunkVersion::fromBSONPositionalOrNewerFormat(const BSONElement& element) {
-    auto obj = element.Obj();
-
-    // Positional or wrongly encoded format.
-    if (obj.couldBeArray()) {
-        return ChunkVersion::_parseArrayOrObjectPositionalFormat(obj);
-    }
-
-    // New format.
-    return parse(element);
-}
-
 ChunkVersion ChunkVersion::parse(const BSONElement& element) {
     auto parsedVersion =
         ChunkVersion60Format::parse(IDLParserErrorContext("ChunkVersion"), element.Obj());
     auto version = parsedVersion.getVersion();
-    return ChunkVersion(version.getSecs(),
-                        version.getInc(),
-                        parsedVersion.getEpoch(),
-                        parsedVersion.getTimestamp());
+    return ChunkVersion({parsedVersion.getEpoch(), parsedVersion.getTimestamp()},
+                        {version.getSecs(), version.getInc()});
 }
 
 void ChunkVersion::serializeToBSON(StringData field, BSONObjBuilder* builder) const {
@@ -225,40 +71,9 @@ void ChunkVersion::serializeToBSON(StringData field, BSONObjBuilder* builder) co
     builder->append(field, version.toBSON());
 }
 
-void ChunkVersion::appendLegacyWithField(BSONObjBuilder* out, StringData field) const {
-    if (feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabled(
-            serverGlobalParams.featureCompatibility)) {
-        ChunkVersion60Format version;
-        version.setGeneration({_epoch, _timestamp});
-        version.setPlacement(Timestamp(majorVersion(), minorVersion()));
-        out->append(field, version.toBSON());
-    } else {
-        out->appendTimestamp(field, _combined);
-        out->append(field + "Epoch", _epoch);
-        out->append(field + "Timestamp", _timestamp);
-    }
-}
-
 std::string ChunkVersion::toString() const {
     return str::stream() << majorVersion() << "|" << minorVersion() << "||" << _epoch << "||"
                          << _timestamp.toString();
 }
 
-ChunkVersion ChunkVersion::parseMajorMinorVersionOnlyFromShardCollectionType(
-    const BSONElement& element) {
-    uassert(ErrorCodes::TypeMismatch,
-            str::stream() << "Invalid type " << element.type()
-                          << " for version major and minor part.",
-            element.type() == bsonTimestamp || element.type() == Date);
-
-    ChunkVersion version;
-    version._combined = element._numberLong();
-    return version;
-}
-
-void ChunkVersion::serialiseMajorMinorVersionOnlyForShardCollectionType(StringData field,
-                                                                        BSONObjBuilder* out) const {
-    out->appendTimestamp(field, toLong());
-}
-
 }  // namespace mongo
diff --git a/src/mongo/s/chunk_version.h b/src/mongo/s/chunk_version.h
index 212b1218f41..2a7911bfefe 100644
--- a/src/mongo/s/chunk_version.h
+++ b/src/mongo/s/chunk_version.h
@@ -53,12 +53,55 @@ public:
 
     std::string toString() const;
 
+    // TODO: Do not add any new usages of these methods. Use isSameCollection instead.
+
+    const OID& epoch() const {
+        return _epoch;
+    }
+
+    const Timestamp& getTimestamp() const {
+        return _timestamp;
+    }
+
 protected:
     OID _epoch;
     Timestamp _timestamp;
 };
 
 /**
+ * Reflects the placement information for a collection. An object of this class has no meaning on
+ * its own without the Generation component above, that's why most of its methods are protected and
+ * are exposed as semantic checks in ChunkVersion below.
+ */
+class CollectionPlacement {
+public:
+    CollectionPlacement(uint32_t major, uint32_t minor)
+        : _combined(static_cast<uint64_t>(minor) | (static_cast<uint64_t>(major) << 32)) {}
+
+    // TODO: Do not add any new usages of these methods. Use isSamePlacement instead.
+
+    uint32_t majorVersion() const {
+        return _combined >> 32;
+    }
+
+    uint32_t minorVersion() const {
+        return _combined & 0xFFFFFFFF;
+    }
+
+protected:
+    /**
+     * Returns whether two collection placements are compatible with each other (meaning that they
+     * refer to the same distribution of chunks across the cluster).
+     */
+    bool isSamePlacement(const CollectionPlacement& other) const {
+        return majorVersion() == other.majorVersion();
+    }
+
+    // The combined major/minor version, which exists as subordinate to the collection generation
+    uint64_t _combined;
+};
+
+/**
  * ChunkVersions consist of a major/minor version scoped to a version epoch
  *
  * Version configurations (format: major version, epoch):
@@ -67,10 +110,8 @@ protected:
  * 2. (0, n), n > 0 - applicable only to shardVersion; shard has no chunk.
  * 3. (n, 0), n > 0 - invalid configuration.
  * 4. (n, m), n > 0, m > 0 - normal sharded collection version.
- *
- * TODO (SERVER-65530): Get rid of all the legacy format parsers/serialisers
  */
-class ChunkVersion : public CollectionGeneration {
+class ChunkVersion : public CollectionGeneration, public CollectionPlacement {
 public:
     /**
      * The name for the shard version information field, which shard-aware commands should include
@@ -78,28 +119,10 @@ public:
      */
     static constexpr StringData kShardVersionField = "shardVersion"_sd;
 
-    ChunkVersion(uint32_t major, uint32_t minor, const OID& epoch, const Timestamp& timestamp)
-        : CollectionGeneration(epoch, timestamp),
-          _combined(static_cast<uint64_t>(minor) | (static_cast<uint64_t>(major) << 32)) {}
+    ChunkVersion(CollectionGeneration geneneration, CollectionPlacement placement)
+        : CollectionGeneration(geneneration), CollectionPlacement(placement) {}
 
-    ChunkVersion() : ChunkVersion(0, 0, OID(), Timestamp()) {}
-
-    /**
-     * Allow parsing a chunk version with the following formats:
-     *  {<field>:(major, minor), <fieldEpoch>:epoch, <fieldTimestmap>:timestamp}
-     *  {<field>: {t:timestamp, e:epoch, v:(major, minor) }}
-     * TODO SERVER-63403: remove this function and only parse the new format.
-     */
-    static ChunkVersion fromBSONLegacyOrNewerFormat(const BSONObj& obj, StringData field = "");
-
-    /**
-     * Allow parsing a chunk version with the following formats:
-     *  [major, minor, epoch, <optional canThrowSSVOnIgnored>, timestamp]
-     *  {0:major, 1:minor, 2:epoch, 3:<optional canThrowSSVOnIgnored>, 4:timestamp}
-     *  {t:timestamp, e:epoch, v:(major, minor)}
-     * TODO SERVER-63403: remove this function and only parse the new format.
-     */
-    static ChunkVersion fromBSONPositionalOrNewerFormat(const BSONElement& element);
+    ChunkVersion() : ChunkVersion({OID(), Timestamp()}, {0, 0}) {}
 
     /**
      * Indicates that the collection is not sharded.
@@ -153,22 +176,6 @@ public:
         return _combined > 0;
     }
 
-    uint32_t majorVersion() const {
-        return _combined >> 32;
-    }
-
-    uint32_t minorVersion() const {
-        return _combined & 0xFFFFFFFF;
-    }
-
-    const OID& epoch() const {
-        return _epoch;
-    }
-
-    const Timestamp& getTimestamp() const {
-        return _timestamp;
-    }
-
     bool operator==(const ChunkVersion& otherVersion) const {
         return otherVersion.getTimestamp() == getTimestamp() && otherVersion._combined == _combined;
     }
@@ -179,7 +186,7 @@ public:
 
     // Can we write to this data and not have a problem?
     bool isWriteCompatibleWith(const ChunkVersion& other) const {
-        return isSameCollection(other) && majorVersion() == other.majorVersion();
+        return isSameCollection(other) && isSamePlacement(other);
     }
 
     // Unsharded timestamp cannot be compared with other timestamps
@@ -216,53 +223,7 @@ public:
     static ChunkVersion parse(const BSONElement& element);
     void serializeToBSON(StringData field, BSONObjBuilder* builder) const;
 
-    /**
-     * NOTE: This format is being phased out. Use serializeToBSON instead.
-     *
-     * Serializes the version held by this object to 'out' in the legacy form:
-     *  { ..., <field>: [ <combined major/minor> ],
-     *         <field>Epoch: [ <OID epoch> ],
-     *         <field>Timestamp: [ <Timestamp> ] ... }
-     *  or
-     *  { ..., <field> : {t: <Timestamp>, e: <OID>, v: <major/minor>}}.
-     *
-     * Depending on the FCV version
-     */
-    void appendLegacyWithField(BSONObjBuilder* out, StringData field) const;
-
     std::string toString() const;
-
-    // Methods that are here for the purposes of parsing of ShardCollectionType only
-    static ChunkVersion parseMajorMinorVersionOnlyFromShardCollectionType(
-        const BSONElement& element);
-    void serialiseMajorMinorVersionOnlyForShardCollectionType(StringData field,
-                                                              BSONObjBuilder* builder) const;
-
-private:
-    // The following static functions will be deprecated. Only one function should be used to parse
-    // ChunkVersion and is fromBSON.
-    /**
-     * The method below parse the "positional" formats of:
-     *
-     *  [major, minor, epoch, <optional canThrowSSVOnIgnored> timestamp]
-     *      OR
-     *  {0: major, 1:minor, 2:epoch, 3:<optional canThrowSSVOnIgnored>, 4:timestamp}
-     *
-     * The latter format was introduced by mistake in 4.4 and is no longer generated from 5.3
-     * onwards, but it is backwards compatible with the 5.2 and older binaries.
-     */
-    static ChunkVersion _parseArrayOrObjectPositionalFormat(const BSONObj& obj);
-
-    /**
-     * Parses the BSON formatted by appendLegacyWithField. If the field is missing, returns
-     * 'NoSuchKey', otherwise if the field is not properly formatted can return any relevant parsing
-     * error (BadValue, TypeMismatch, etc).
-     */
-    static StatusWith<ChunkVersion> _parseLegacyWithField(const BSONObj& obj, StringData field);
-
-private:
-    // The combined major/minor version, which exists as subordinate to the collection generation
-    uint64_t _combined;
 };
 
 inline std::ostream& operator<<(std::ostream& s, const ChunkVersion& v) {
diff --git a/src/mongo/s/chunk_version_test.cpp b/src/mongo/s/chunk_version_test.cpp
index f735bfd7ef0..7693ff55c85 100644
--- a/src/mongo/s/chunk_version_test.cpp
+++ b/src/mongo/s/chunk_version_test.cpp
@@ -40,13 +40,15 @@ TEST(ChunkVersionTest, EqualityOperators) {
     OID epoch = OID::gen();
     Timestamp timestamp = Timestamp(1);
 
-    ASSERT_EQ(ChunkVersion(3, 1, epoch, Timestamp(1, 1)),
-              ChunkVersion(3, 1, epoch, Timestamp(1, 1)));
-    ASSERT_EQ(ChunkVersion(3, 1, OID(), timestamp), ChunkVersion(3, 1, OID(), timestamp));
-
-    ASSERT_NE(ChunkVersion(3, 1, epoch, timestamp), ChunkVersion(3, 1, OID(), Timestamp(1, 1)));
-    ASSERT_NE(ChunkVersion(3, 1, OID(), Timestamp(1, 1)), ChunkVersion(3, 1, epoch, timestamp));
-    ASSERT_NE(ChunkVersion(4, 2, epoch, timestamp), ChunkVersion(4, 1, epoch, timestamp));
+    ASSERT_EQ(ChunkVersion({epoch, Timestamp(1, 1)}, {3, 1}),
+              ChunkVersion({epoch, Timestamp(1, 1)}, {3, 1}));
+    ASSERT_EQ(ChunkVersion({OID(), timestamp}, {3, 1}), ChunkVersion({OID(), timestamp}, {3, 1}));
+
+    ASSERT_NE(ChunkVersion({epoch, timestamp}, {3, 1}),
+              ChunkVersion({OID(), Timestamp(1, 1)}, {3, 1}));
+    ASSERT_NE(ChunkVersion({OID(), Timestamp(1, 1)}, {3, 1}),
+              ChunkVersion({epoch, timestamp}, {3, 1}));
+    ASSERT_NE(ChunkVersion({epoch, timestamp}, {4, 2}), ChunkVersion({epoch, timestamp}, {4, 1}));
 }
 
 TEST(ChunkVersionTest, OlderThan) {
@@ -54,19 +56,23 @@ TEST(ChunkVersionTest, OlderThan) {
     Timestamp timestamp(1);
     Timestamp newerTimestamp(2);
 
-    ASSERT(ChunkVersion(3, 1, epoch, timestamp).isOlderThan(ChunkVersion(4, 1, epoch, timestamp)));
-    ASSERT(!ChunkVersion(4, 1, epoch, timestamp).isOlderThan(ChunkVersion(3, 1, epoch, timestamp)));
+    ASSERT(ChunkVersion({epoch, timestamp}, {3, 1})
+               .isOlderThan(ChunkVersion({epoch, timestamp}, {4, 1})));
+    ASSERT(!ChunkVersion({epoch, timestamp}, {4, 1})
+                .isOlderThan(ChunkVersion({epoch, timestamp}, {3, 1})));
 
-    ASSERT(ChunkVersion(3, 1, epoch, timestamp).isOlderThan(ChunkVersion(3, 2, epoch, timestamp)));
-    ASSERT(!ChunkVersion(3, 2, epoch, timestamp).isOlderThan(ChunkVersion(3, 1, epoch, timestamp)));
+    ASSERT(ChunkVersion({epoch, timestamp}, {3, 1})
+               .isOlderThan(ChunkVersion({epoch, timestamp}, {3, 2})));
+    ASSERT(!ChunkVersion({epoch, timestamp}, {3, 2})
+                .isOlderThan(ChunkVersion({epoch, timestamp}, {3, 1})));
 
-    ASSERT(ChunkVersion(3, 1, epoch, timestamp)
-               .isOlderThan(ChunkVersion(3, 1, OID::gen(), newerTimestamp)));
-    ASSERT(!ChunkVersion(3, 1, epoch, newerTimestamp)
-                .isOlderThan(ChunkVersion(3, 1, OID::gen(), timestamp)));
+    ASSERT(ChunkVersion({epoch, timestamp}, {3, 1})
+               .isOlderThan(ChunkVersion({OID::gen(), newerTimestamp}, {3, 1})));
+    ASSERT(!ChunkVersion({epoch, newerTimestamp}, {3, 1})
+                .isOlderThan(ChunkVersion({OID::gen(), timestamp}, {3, 1})));
 
-    ASSERT(!ChunkVersion::UNSHARDED().isOlderThan(ChunkVersion(3, 1, epoch, timestamp)));
-    ASSERT(!ChunkVersion(3, 1, epoch, timestamp).isOlderThan(ChunkVersion::UNSHARDED()));
+    ASSERT(!ChunkVersion::UNSHARDED().isOlderThan(ChunkVersion({epoch, timestamp}, {3, 1})));
+    ASSERT(!ChunkVersion({epoch, timestamp}, {3, 1}).isOlderThan(ChunkVersion::UNSHARDED()));
 }
 
 TEST(ChunkVersionTest, CreateWithLargeValues) {
@@ -74,7 +80,7 @@ TEST(ChunkVersionTest, CreateWithLargeValues) {
     const uint32_t minorVersion = std::numeric_limits<uint32_t>::max();
     const auto epoch = OID::gen();
 
-    ChunkVersion version(majorVersion, minorVersion, epoch, Timestamp(1, 1));
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {majorVersion, minorVersion});
     ASSERT_EQ(majorVersion, version.majorVersion());
     ASSERT_EQ(minorVersion, version.minorVersion());
     ASSERT_EQ(epoch, version.epoch());
@@ -86,7 +92,7 @@ TEST(ChunkVersionTest, ThrowsErrorIfOverflowIsAttemptedForMajorVersion) {
     const uint32_t minorVersion = 0;
     const auto epoch = OID::gen();
 
-    ChunkVersion version(majorVersion, minorVersion, epoch, Timestamp(1, 1));
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {majorVersion, minorVersion});
     ASSERT_EQ(majorVersion, version.majorVersion());
     ASSERT_EQ(minorVersion, version.minorVersion());
     ASSERT_EQ(epoch, version.epoch());
@@ -99,7 +105,7 @@ TEST(ChunkVersionTest, ThrowsErrorIfOverflowIsAttemptedForMinorVersion) {
     const uint32_t minorVersion = std::numeric_limits<uint32_t>::max();
     const auto epoch = OID::gen();
 
-    ChunkVersion version(majorVersion, minorVersion, epoch, Timestamp(1, 1));
+    ChunkVersion version({epoch, Timestamp(1, 1)}, {majorVersion, minorVersion});
     ASSERT_EQ(majorVersion, version.majorVersion());
     ASSERT_EQ(minorVersion, version.minorVersion());
     ASSERT_EQ(epoch, version.epoch());
diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp
index 5bfacde260e..ecd18026ffd 100644
--- a/src/mongo/s/client/shard_registry.cpp
+++ b/src/mongo/s/client/shard_registry.cpp
@@ -426,11 +426,6 @@ SharedSemiFuture<ShardRegistry::Cache::ValueHandle> ShardRegistry::_reloadIntern
     return _getDataAsync();
 }
 
-void ShardRegistry::clearEntries() {
-    LOGV2_DEBUG(6471800, 1, "Invalidating Shard Registry");
-    _cache->invalidateAll();
-}
-
 void ShardRegistry::updateReplicaSetOnConfigServer(ServiceContext* serviceContext,
                                                    const ConnectionString& connStr) noexcept {
     ThreadClient tc("UpdateReplicaSetOnConfigServer", serviceContext);
@@ -570,18 +565,9 @@ std::pair<ShardRegistryData, Timestamp> ShardRegistryData::createFromCatalogClie
     OperationContext* opCtx, ShardFactory* shardFactory) {
     auto const catalogClient = Grid::get(opCtx)->catalogClient();
 
-    auto readConcern = repl::ReadConcernLevel::kMajorityReadConcern;
-
-    // ShardRemote requires a majority read. We can only allow a non-majority read if we are a
-    // config server.
-    if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer &&
-        !repl::ReadConcernArgs::get(opCtx).isEmpty()) {
-        readConcern = repl::ReadConcernArgs::get(opCtx).getLevel();
-    }
-
-    auto shardsAndOpTime =
-        uassertStatusOKWithContext(catalogClient->getAllShards(opCtx, readConcern),
-                                   "could not get updated shard list from config server");
+    auto shardsAndOpTime = uassertStatusOKWithContext(
+        catalogClient->getAllShards(opCtx, repl::ReadConcernLevel::kMajorityReadConcern),
+        "could not get updated shard list from config server");
 
     auto shards = std::move(shardsAndOpTime.value);
     auto reloadOpTime = std::move(shardsAndOpTime.opTime);
diff --git a/src/mongo/s/client/shard_registry.h b/src/mongo/s/client/shard_registry.h
index df7c5a75977..96b27923275 100644
--- a/src/mongo/s/client/shard_registry.h
+++ b/src/mongo/s/client/shard_registry.h
@@ -283,12 +283,6 @@ public:
     void reload(OperationContext* opCtx);
 
     /**
-     * Clears all entries from the shard registry entries, which will force the registry to do a
-     * reload on next access.
-     */
-    void clearEntries();
-
-    /**
      * For use in mongos which needs notifications about changes to shard replset membership to
      * update the config.shards collection.
      */
diff --git a/src/mongo/s/client/sharding_network_connection_hook.cpp b/src/mongo/s/client/sharding_network_connection_hook.cpp
index a6229316eaa..6ce6726eff8 100644
--- a/src/mongo/s/client/sharding_network_connection_hook.cpp
+++ b/src/mongo/s/client/sharding_network_connection_hook.cpp
@@ -39,7 +39,6 @@
 #include "mongo/rpc/get_status_from_command_result.h"
 #include "mongo/s/client/shard_registry.h"
 #include "mongo/s/grid.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
 #include "mongo/util/net/hostandport.h"
 #include "mongo/util/str.h"
 
diff --git a/src/mongo/s/commands/SConscript b/src/mongo/s/commands/SConscript
index d83d9c5c638..7e7e076e49d 100644
--- a/src/mongo/s/commands/SConscript
+++ b/src/mongo/s/commands/SConscript
@@ -36,6 +36,7 @@ env.Library(
         'cluster_commit_transaction_cmd_s.cpp',
         'cluster_compact_cmd.cpp',
         'cluster_convert_to_capped_cmd.cpp',
+        'cluster_coordinate_commit_txn.cpp',
         'cluster_count_cmd.cpp',
         'cluster_create_cmd.cpp',
         'cluster_create_indexes_cmd.cpp',
diff --git a/src/mongo/s/commands/cluster_abort_transaction_cmd.h b/src/mongo/s/commands/cluster_abort_transaction_cmd.h
index 7b7b738fdaa..1cbbe0dad9a 100644
--- a/src/mongo/s/commands/cluster_abort_transaction_cmd.h
+++ b/src/mongo/s/commands/cluster_abort_transaction_cmd.h
@@ -107,6 +107,14 @@ public:
         return Impl::checkAuthForOperation(opCtx);
     }
 
+    bool isTransactionCommand() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     bool runWithRequestParser(OperationContext* opCtx,
                               const std::string& dbName,
                               const BSONObj& cmdObj,
diff --git a/src/mongo/s/commands/cluster_command_test_fixture.cpp b/src/mongo/s/commands/cluster_command_test_fixture.cpp
index c8a8a208237..3292f62a7ab 100644
--- a/src/mongo/s/commands/cluster_command_test_fixture.cpp
+++ b/src/mongo/s/commands/cluster_command_test_fixture.cpp
@@ -124,8 +124,8 @@ DbResponse ClusterCommandTestFixture::runCommand(BSONObj cmd) {
         // Have the new client use the dedicated threading model. This ensures the synchronous
         // execution of the command by the client thread.
         stdx::lock_guard lk(*client.get());
-        auto seCtx = transport::ServiceExecutorContext{};
-        seCtx.setThreadingModel(transport::ServiceExecutor::ThreadingModel::kDedicated);
+        auto seCtx = std::make_unique<transport::ServiceExecutorContext>();
+        seCtx->setThreadingModel(transport::ServiceExecutor::ThreadingModel::kDedicated);
         transport::ServiceExecutorContext::set(client.get(), std::move(seCtx));
     }
 
diff --git a/src/mongo/s/commands/cluster_commands.idl b/src/mongo/s/commands/cluster_commands.idl
index 26d5ad8760c..bc5edc8c7a4 100644
--- a/src/mongo/s/commands/cluster_commands.idl
+++ b/src/mongo/s/commands/cluster_commands.idl
@@ -107,3 +107,52 @@ commands:
                 description: "The shard key value that is within a chunk's boundaries.
                               Cannot be used on collections with hashed shard keys."
                 optional: true
+
+    moveChunk:
+        description : "The public moveChunk command on mongos."
+        command_name : moveChunk 
+        command_alias: movechunk
+        cpp_name: ClusterMoveChunkRequest 
+        strict: false
+        namespace: type
+        api_version: ""
+        type: namespacestring
+        fields:
+            bounds:
+                type: array<object>
+                description: "The bounds of a specific chunk to move. The array must consist of two documents that specify the lower and upper shard key values of a chunk to move. Specify either the bounds field or the find field but not both."
+                optional: true
+            find:
+                type: object
+                description: "An equality match on the shard key that specifies the shard-key value of the chunk to move. Specify either the bounds field or the find field but not both."
+                optional: true         
+            to:
+                type: string
+                description: "The name of the destination shard for the chunk."
+
+            forceJumbo:
+                type: bool
+                description: "Specifies whether or not forcing jumbo chunks move"
+                default: false
+
+            writeConcern:
+                type: object_owned
+                description: "A document that expresses the write concern that the _secondaryThrottle will use to wait for secondaries during the chunk migration." 
+                default: BSONObj()
+
+            # Secondary throttle can be specified by passing one of the following 2 parameters
+            secondaryThrottle:
+                type: optionalBool
+                description: "Secondary throttle policy to adopt during the migration"
+            _secondaryThrottle:
+                type: optionalBool
+                description: "Secondary throttle policy to adopt during the migration"
+
+           # Wait for delete can be specified with one of the following 2 parameters
+            waitForDelete:
+                type: optionalBool 
+                description: "Internal option for testing purposes. The default is false. If set to true, the delete phase of a moveChunk operation blocks."
+            _waitForDelete:
+                type: optionalBool 
+                description: "Internal option for testing purposes. The default is false. If set to true, the delete phase of a moveChunk operation blocks."
+
diff --git a/src/mongo/s/commands/cluster_commit_transaction_cmd.h b/src/mongo/s/commands/cluster_commit_transaction_cmd.h
index 975ced1752a..0a7a4e9fd1c 100644
--- a/src/mongo/s/commands/cluster_commit_transaction_cmd.h
+++ b/src/mongo/s/commands/cluster_commit_transaction_cmd.h
@@ -88,6 +88,14 @@ public:
         return Impl::checkAuthForOperation(opCtx);
     }
 
+    bool isTransactionCommand() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     bool runWithRequestParser(OperationContext* opCtx,
                               const std::string& dbName,
                               const BSONObj& cmdObj,
diff --git a/src/mongo/s/commands/cluster_coordinate_commit_txn.cpp b/src/mongo/s/commands/cluster_coordinate_commit_txn.cpp
new file mode 100644
index 00000000000..c1c33413ee9
--- /dev/null
+++ b/src/mongo/s/commands/cluster_coordinate_commit_txn.cpp
@@ -0,0 +1,88 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/commands.h"
+#include "mongo/db/commands/test_commands_enabled.h"
+#include "mongo/db/commands/txn_two_phase_commit_cmds_gen.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+
+class CoordinateCommitTransactionCmd : public TypedCommand<CoordinateCommitTransactionCmd> {
+public:
+    using Request = CoordinateCommitTransaction;
+
+    bool skipApiVersionCheck() const override {
+        // Internal command (server to server).
+        return true;
+    }
+
+    bool isTransactionCommand() const final {
+        return true;
+    }
+
+    bool shouldCheckoutSession() const final {
+        return false;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
+    AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
+        return AllowedOnSecondary::kNever;
+    }
+
+    class Invocation final : public InvocationBase {
+    public:
+        using InvocationBase::InvocationBase;
+
+        void typedRun(OperationContext* opCtx) {
+            // Unimplemented in mongos, only serve as a stub for functions like
+            // isTransactionCommand. An example where isTransactionCommand can be called on this
+            // command is in the TransactionRouter.
+            uasserted(6510100, "Not implemented");
+        }
+
+    private:
+        bool supportsWriteConcern() const override {
+            return true;
+        }
+
+        NamespaceString ns() const override {
+            return NamespaceString(request().getDbName(), "");
+        }
+
+        void doCheckAuthorization(OperationContext* opCtx) const override {
+            uasserted(ErrorCodes::Unauthorized, "Unauthorized");
+        }
+    };
+} coordinateCommitTransactionCmd;
+
+}  // namespace mongo
diff --git a/src/mongo/s/commands/cluster_create_cmd.cpp b/src/mongo/s/commands/cluster_create_cmd.cpp
index f6496645e22..eba994dfb74 100644
--- a/src/mongo/s/commands/cluster_create_cmd.cpp
+++ b/src/mongo/s/commands/cluster_create_cmd.cpp
@@ -94,6 +94,10 @@ public:
         return false;
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public InvocationBaseGen {
     public:
         using InvocationBaseGen::InvocationBaseGen;
diff --git a/src/mongo/s/commands/cluster_create_indexes_cmd.cpp b/src/mongo/s/commands/cluster_create_indexes_cmd.cpp
index 2487e4da680..80793637487 100644
--- a/src/mongo/s/commands/cluster_create_indexes_cmd.cpp
+++ b/src/mongo/s/commands/cluster_create_indexes_cmd.cpp
@@ -78,6 +78,10 @@ public:
         return true;
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     bool runWithRequestParser(OperationContext* opCtx,
                               const std::string& dbName,
                               const BSONObj& cmdObj,
diff --git a/src/mongo/s/commands/cluster_distinct_cmd.cpp b/src/mongo/s/commands/cluster_distinct_cmd.cpp
index 07c814515f6..41a6bee6d61 100644
--- a/src/mongo/s/commands/cluster_distinct_cmd.cpp
+++ b/src/mongo/s/commands/cluster_distinct_cmd.cpp
@@ -93,6 +93,10 @@ public:
         out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions));
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     Status explain(OperationContext* opCtx,
                    const OpMsgRequest& opMsgRequest,
                    ExplainOptions::Verbosity verbosity,
diff --git a/src/mongo/s/commands/cluster_explain_cmd.cpp b/src/mongo/s/commands/cluster_explain_cmd.cpp
index e7734ec43fb..00ed9bfd521 100644
--- a/src/mongo/s/commands/cluster_explain_cmd.cpp
+++ b/src/mongo/s/commands/cluster_explain_cmd.cpp
@@ -190,6 +190,7 @@ std::unique_ptr<CommandInvocation> ClusterExplainCmd::parse(OperationContext* op
             str::stream() << "Explain failed due to unknown command: " << cmdName,
             explainedCommand);
     auto innerRequest = std::make_unique<OpMsgRequest>(OpMsg{explainedObj});
+    innerRequest->validatedTenancyScope = request.validatedTenancyScope;
     auto innerInvocation = explainedCommand->parseForExplain(opCtx, *innerRequest, verbosity);
     return std::make_unique<Invocation>(
         this, request, std::move(verbosity), std::move(innerRequest), std::move(innerInvocation));
diff --git a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp
index 2982ae57e56..dd6fc5751c0 100644
--- a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp
+++ b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp
@@ -448,6 +448,14 @@ public:
                                                   &bodyBuilder);
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
     bool run(OperationContext* opCtx,
              const std::string& dbName,
              const BSONObj& cmdObj,
diff --git a/src/mongo/s/commands/cluster_find_cmd.h b/src/mongo/s/commands/cluster_find_cmd.h
index c438b60ed72..899cba75722 100644
--- a/src/mongo/s/commands/cluster_find_cmd.h
+++ b/src/mongo/s/commands/cluster_find_cmd.h
@@ -87,6 +87,10 @@ public:
         return false;
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     std::string help() const override {
         return "query for documents";
     }
@@ -133,6 +137,8 @@ public:
                 const auto explainCmd =
                     ClusterExplain::wrapAsExplain(findCommand->toBSON(BSONObj()), verbosity);
 
+                Impl::checkCanRunHere(opCtx);
+
                 long long millisElapsed;
                 std::vector<AsyncRequestsSender::Response> shardResponses;
 
diff --git a/src/mongo/s/commands/cluster_getmore_cmd.h b/src/mongo/s/commands/cluster_getmore_cmd.h
index 45441cbdeb7..3fec3150c0b 100644
--- a/src/mongo/s/commands/cluster_getmore_cmd.h
+++ b/src/mongo/s/commands/cluster_getmore_cmd.h
@@ -70,6 +70,10 @@ public:
         return std::make_unique<Invocation>(this, opMsgRequest);
     }
 
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
     class Invocation final : public CommandInvocation {
     public:
         Invocation(Command* cmd, const OpMsgRequest& request)
diff --git a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
index b1149c67c5c..9ab1d5a45ae 100644
--- a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
+++ b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
@@ -42,6 +42,7 @@
 #include "mongo/s/catalog_cache.h"
 #include "mongo/s/client/shard_registry.h"
 #include "mongo/s/cluster_commands_helpers.h"
+#include "mongo/s/commands/cluster_commands_gen.h"
 #include "mongo/s/grid.h"
 #include "mongo/s/request_types/migration_secondary_throttle_options.h"
 #include "mongo/s/request_types/move_range_request_gen.h"
@@ -53,20 +54,22 @@
 namespace mongo {
 namespace {
 
-class MoveChunkCmd : public ErrmsgCommandDeprecated {
+class MoveChunkCmd final : public TypedCommand<MoveChunkCmd> {
 public:
-    MoveChunkCmd() : ErrmsgCommandDeprecated("moveChunk", "movechunk") {}
+    MoveChunkCmd()
+        : TypedCommand(ClusterMoveChunkRequest::kCommandName,
+                       ClusterMoveChunkRequest::kCommandAlias) {}
+
+    using Request = ClusterMoveChunkRequest;
 
     AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
         return AllowedOnSecondary::kAlways;
     }
+
     bool adminOnly() const override {
         return true;
     }
 
-    bool supportsWriteConcern(const BSONObj& cmd) const override {
-        return true;
-    }
 
     std::string help() const override {
         return "Example: move chunk that contains the doc {num : 7} to shard001\n"
@@ -76,148 +79,150 @@ public:
                " , to : 'shard001' }\n";
     }
 
-    Status checkAuthForCommand(Client* client,
-                               const std::string& dbname,
-                               const BSONObj& cmdObj) const override {
-        if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
-                ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))),
-                ActionType::moveChunk)) {
-            return Status(ErrorCodes::Unauthorized, "Unauthorized");
-        }
-
-        return Status::OK();
-    }
+    class Invocation : public MinimalInvocationBase {
+    public:
+        using MinimalInvocationBase::MinimalInvocationBase;
 
-    std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override {
-        return CommandHelpers::parseNsFullyQualified(cmdObj);
-    }
+    private:
+        bool supportsWriteConcern() const override {
+            return true;
+        }
 
-    bool errmsgRun(OperationContext* opCtx,
-                   const std::string& dbname,
-                   const BSONObj& cmdObj,
-                   std::string& errmsg,
-                   BSONObjBuilder& result) override {
-        Timer t;
-
-        const NamespaceString nss(parseNs(dbname, cmdObj));
-
-        const auto cm = uassertStatusOK(
-            Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx,
-                                                                                         nss));
-
-        const auto toElt = cmdObj["to"];
-        uassert(ErrorCodes::TypeMismatch,
-                "'to' must be of type String",
-                toElt.type() == BSONType::String);
-        const std::string toString = toElt.str();
-        if (!toString.size()) {
-            errmsg = "you have to specify where you want to move the chunk";
-            return false;
+        void doCheckAuthorization(OperationContext* opCtx) const override {
+            uassert(ErrorCodes::Unauthorized,
+                    "Unauthorized",
+                    AuthorizationSession::get(opCtx->getClient())
+                        ->isAuthorizedForActionsOnResource(ResourcePattern::forExactNamespace(ns()),
+                                                           ActionType::moveChunk));
         }
 
-        const auto toStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, toString);
-        if (!toStatus.isOK()) {
-            LOGV2_OPTIONS(22755,
-                          {logv2::UserAssertAfterLog(ErrorCodes::ShardNotFound)},
-                          "Could not move chunk in {namespace} to {toShardId} because that shard"
-                          " does not exist",
-                          "moveChunk destination shard does not exist",
-                          "toShardId"_attr = toString,
-                          "namespace"_attr = nss.ns());
+        NamespaceString ns() const override {
+            return request().getCommandParameter();
         }
 
-        const auto to = toStatus.getValue();
-        const auto forceJumboElt = cmdObj["forceJumbo"];
-        const auto forceJumbo = forceJumboElt && forceJumboElt.Bool();
+        void run(OperationContext* opCtx, rpc::ReplyBuilderInterface* result) {
 
-        BSONObj find = cmdObj.getObjectField("find");
-        BSONObj bounds = cmdObj.getObjectField("bounds");
+            Timer t;
+            const auto chunkManager = uassertStatusOK(
+                Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx,
+                                                                                             ns()));
 
-        // check that only one of the two chunk specification methods is used
-        if (find.isEmpty() == bounds.isEmpty()) {
-            errmsg = "need to specify either a find query, or both lower and upper bounds.";
-            return false;
-        }
+            uassert(ErrorCodes::InvalidOptions,
+                    "bounds can only have exactly 2 elements",
+                    !request().getBounds() || request().getBounds()->size() == 2);
 
-        boost::optional<Chunk> chunk;
+            uassert(ErrorCodes::InvalidOptions,
+                    "cannot specify bounds and query at the same time",
+                    !(request().getFind() && request().getBounds()));
 
-        if (!find.isEmpty()) {
-            // find
-            BSONObj shardKey =
-                uassertStatusOK(cm.getShardKeyPattern().extractShardKeyFromQuery(opCtx, nss, find));
-            if (shardKey.isEmpty()) {
-                errmsg = str::stream() << "no shard key found in chunk query " << find;
-                return false;
-            }
+            uassert(ErrorCodes::InvalidOptions,
+                    "need to specify query or bounds",
+                    request().getFind() || request().getBounds());
 
-            chunk.emplace(cm.findIntersectingChunkWithSimpleCollation(shardKey));
-        } else {
-            // bounds
-            if (!cm.getShardKeyPattern().isShardKey(bounds[0].Obj()) ||
-                !cm.getShardKeyPattern().isShardKey(bounds[1].Obj())) {
-                errmsg = str::stream()
-                    << "shard key bounds "
-                    << "[" << bounds[0].Obj() << "," << bounds[1].Obj() << ")"
-                    << " are not valid for shard key pattern " << cm.getShardKeyPattern().toBSON();
-                return false;
+
+            std::string destination = request().getTo().toString();
+            const auto toStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, destination);
+
+            if (!toStatus.isOK()) {
+                LOGV2_OPTIONS(
+                    22755,
+                    {logv2::UserAssertAfterLog(ErrorCodes::ShardNotFound)},
+                    "Could not move chunk in {namespace} to {toShardId} because that shard"
+                    " does not exist",
+                    "moveChunk destination shard does not exist",
+                    "toShardId"_attr = destination,
+                    "namespace"_attr = ns());
             }
 
-            BSONObj minKey = cm.getShardKeyPattern().normalizeShardKey(bounds[0].Obj());
-            BSONObj maxKey = cm.getShardKeyPattern().normalizeShardKey(bounds[1].Obj());
 
-            chunk.emplace(cm.findIntersectingChunkWithSimpleCollation(minKey));
+            const auto to = toStatus.getValue();
+
+            auto find = request().getFind();
+            auto bounds = request().getBounds();
 
-            if (chunk->getMin().woCompare(minKey) != 0 || chunk->getMax().woCompare(maxKey) != 0) {
-                errmsg = str::stream() << "no chunk found with the shard key bounds "
-                                       << ChunkRange(minKey, maxKey).toString();
-                return false;
+
+            boost::optional<Chunk> chunk;
+
+            if (find) {
+                // find
+                BSONObj shardKey = uassertStatusOK(
+                    chunkManager.getShardKeyPattern().extractShardKeyFromQuery(opCtx, ns(), *find));
+
+                uassert(656450,
+                        str::stream() << "no shard key found in chunk query " << *find,
+                        !shardKey.isEmpty());
+
+                chunk.emplace(chunkManager.findIntersectingChunkWithSimpleCollation(shardKey));
+            } else {
+
+                auto minBound = bounds->front();
+                auto maxBound = bounds->back();
+                uassert(656451,
+                        str::stream() << "shard key bounds "
+                                      << "[" << minBound << "," << maxBound << ")"
+                                      << " are not valid for shard key pattern "
+                                      << chunkManager.getShardKeyPattern().toBSON(),
+                        chunkManager.getShardKeyPattern().isShardKey(minBound) &&
+                            chunkManager.getShardKeyPattern().isShardKey(maxBound));
+
+                BSONObj minKey = chunkManager.getShardKeyPattern().normalizeShardKey(minBound);
+                BSONObj maxKey = chunkManager.getShardKeyPattern().normalizeShardKey(maxBound);
+
+                chunk.emplace(chunkManager.findIntersectingChunkWithSimpleCollation(minKey));
+                uassert(656452,
+                        str::stream() << "no chunk found with the shard key bounds "
+                                      << ChunkRange(minKey, maxKey).toString(),
+                        chunk->getMin().woCompare(minKey) == 0 &&
+                            chunk->getMax().woCompare(maxKey) == 0);
             }
-        }
 
-        const auto secondaryThrottle =
-            uassertStatusOK(MigrationSecondaryThrottleOptions::createFromCommand(cmdObj));
 
-        const bool waitForDelete =
-            cmdObj["_waitForDelete"].trueValue() || cmdObj["waitForDelete"].trueValue();
+            MoveRangeRequestBase moveRangeReq;
+            moveRangeReq.setToShard(to->getId());
+            moveRangeReq.setMin(chunk->getMin());
+            moveRangeReq.setMax(chunk->getMax());
+            moveRangeReq.setWaitForDelete(request().getWaitForDelete().value_or(false) ||
+                                          request().get_waitForDelete().value_or(false));
+
+
+            ConfigsvrMoveRange configsvrRequest(ns());
+            configsvrRequest.setDbName(NamespaceString::kAdminDb);
+            configsvrRequest.setMoveRangeRequestBase(moveRangeReq);
 
-        MoveRangeRequestBase moveRangeReq;
-        moveRangeReq.setToShard(to->getId());
-        moveRangeReq.setMin(chunk->getMin());
-        moveRangeReq.setMax(chunk->getMax());
-        moveRangeReq.setWaitForDelete(waitForDelete);
+            const auto secondaryThrottle = uassertStatusOK(
+                MigrationSecondaryThrottleOptions::createFromCommand(request().toBSON({})));
 
-        ConfigsvrMoveRange configsvrRequest(nss);
-        configsvrRequest.setDbName(NamespaceString::kAdminDb);
-        configsvrRequest.setMoveRangeRequestBase(moveRangeReq);
-        configsvrRequest.setForceJumbo(forceJumbo ? ForceJumbo::kForceManual
-                                                  : ForceJumbo::kDoNotForce);
-        if (secondaryThrottle.getSecondaryThrottle() == MigrationSecondaryThrottleOptions::kOn) {
             configsvrRequest.setSecondaryThrottle(secondaryThrottle);
+
+            configsvrRequest.setForceJumbo(request().getForceJumbo() ? ForceJumbo::kForceManual
+                                                                     : ForceJumbo::kDoNotForce);
+
+            auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+            auto commandResponse = configShard->runCommandWithFixedRetryAttempts(
+                opCtx,
+                ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+                NamespaceString::kAdminDb.toString(),
+                CommandHelpers::appendMajorityWriteConcern(configsvrRequest.toBSON({})),
+                Shard::RetryPolicy::kIdempotent);
+            uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(std::move(commandResponse)));
+
+            Grid::get(opCtx)
+                ->catalogCache()
+                ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+                    ns(), boost::none, chunk->getShardId());
+            Grid::get(opCtx)
+                ->catalogCache()
+                ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+                    ns(), boost::none, to->getId());
+
+            BSONObjBuilder resultbson;
+            resultbson.append("millis", t.millis());
+            result->getBodyBuilder().appendElements(resultbson.obj());
         }
+    };
 
-        auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
-        auto commandResponse = configShard->runCommand(
-            opCtx,
-            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
-            NamespaceString::kAdminDb.toString(),
-            CommandHelpers::appendMajorityWriteConcern(configsvrRequest.toBSON({})),
-            Shard::RetryPolicy::kIdempotent);
-        uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(std::move(commandResponse)));
-
-        Grid::get(opCtx)
-            ->catalogCache()
-            ->invalidateShardOrEntireCollectionEntryForShardedCollection(
-                nss, boost::none, chunk->getShardId());
-        Grid::get(opCtx)
-            ->catalogCache()
-            ->invalidateShardOrEntireCollectionEntryForShardedCollection(
-                nss, boost::none, to->getId());
-
-        result.append("millis", t.millis());
-        return true;
-    }
 
-} moveChunk;
+} clusterMoveChunk;
 
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/s/commands/cluster_pipeline_cmd.h b/src/mongo/s/commands/cluster_pipeline_cmd.h
index 0554d2a97e5..c33eca0f9a5 100644
--- a/src/mongo/s/commands/cluster_pipeline_cmd.h
+++ b/src/mongo/s/commands/cluster_pipeline_cmd.h
@@ -190,6 +190,10 @@ public:
     const AuthorizationContract* getAuthorizationContract() const final {
         return &::mongo::AggregateCommandRequest::kAuthorizationContract;
     }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
 };
 
 }  // namespace
diff --git a/src/mongo/s/commands/cluster_write_cmd.h b/src/mongo/s/commands/cluster_write_cmd.h
index bfb9f6d81ad..afcb386f976 100644
--- a/src/mongo/s/commands/cluster_write_cmd.h
+++ b/src/mongo/s/commands/cluster_write_cmd.h
@@ -50,6 +50,14 @@ public:
         return AllowedOnSecondary::kNever;
     }
 
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
+
 protected:
     class InvocationBase;
 
diff --git a/src/mongo/s/commands/internal_transactions_test_command.h b/src/mongo/s/commands/internal_transactions_test_command.h
index 48f2b94b4dd..bbbea80de38 100644
--- a/src/mongo/s/commands/internal_transactions_test_command.h
+++ b/src/mongo/s/commands/internal_transactions_test_command.h
@@ -172,6 +172,14 @@ public:
     BasicCommand::AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
         return BasicCommand::AllowedOnSecondary::kNever;
     }
+
+    bool supportsRetryableWrite() const final {
+        return true;
+    }
+
+    bool allowedInTransactions() const final {
+        return true;
+    }
 };
 
 }  // namespace mongo
diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
index 17d0d7e09b9..17a7a591807 100644
--- a/src/mongo/s/commands/strategy.cpp
+++ b/src/mongo/s/commands/strategy.cpp
@@ -571,6 +571,8 @@ void ParseAndRunCommand::_parseCommand() {
         APIParameters::get(opCtx) = APIParameters::fromClient(apiParamsFromClient);
     }
 
+    rpc::readRequestMetadata(opCtx, request, command->requiresAuth());
+
     _invocation = command->parse(opCtx, request);
     CommandInvocation::set(opCtx, _invocation);
 
@@ -663,8 +665,6 @@ Status ParseAndRunCommand::RunInvocation::_setup() {
         apiVersionMetrics.update(appName, apiParams);
     }
 
-    rpc::readRequestMetadata(opCtx, request, command->requiresAuth());
-
     CommandHelpers::evaluateFailCommandFailPoint(opCtx, invocation.get());
     bool startTransaction = false;
     if (_parc->_osi->getAutocommit()) {
@@ -712,7 +712,7 @@ Status ParseAndRunCommand::RunInvocation::_setup() {
          (opCtx->getClient()->session()->getTags() & transport::Session::kInternalClient));
 
     if (supportsWriteConcern && !clientSuppliedWriteConcern &&
-        (!TransactionRouter::get(opCtx) || isTransactionCommand(_parc->_commandName)) &&
+        (!TransactionRouter::get(opCtx) || command->isTransactionCommand()) &&
         !opCtx->getClient()->isInDirectClient()) {
         if (isInternalClient) {
             uassert(
@@ -1193,7 +1193,8 @@ private:
 void ClientCommand::_parseMessage() try {
     const auto& msg = _rec->getMessage();
     _rec->setReplyBuilder(rpc::makeReplyBuilder(rpc::protocolForMessage(msg)));
-    auto opMsgReq = rpc::opMsgRequestFromAnyProtocol(msg);
+    auto opMsgReq = rpc::opMsgRequestFromAnyProtocol(msg, _rec->getOpCtx()->getClient());
+
     if (msg.operation() == dbQuery) {
         checkAllowedOpQueryCommand(*(_rec->getOpCtx()->getClient()), opMsgReq.getCommandName());
     }
diff --git a/src/mongo/s/comparable_chunk_version_test.cpp b/src/mongo/s/comparable_chunk_version_test.cpp
index a5d47981709..63f6ca4a59c 100644
--- a/src/mongo/s/comparable_chunk_version_test.cpp
+++ b/src/mongo/s/comparable_chunk_version_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/s/chunk_manager.h"
 #include "mongo/unittest/unittest.h"
 
@@ -38,15 +36,15 @@ namespace {
 TEST(ComparableChunkVersionTest, VersionsEqual) {
     const auto epoch = OID::gen();
     const Timestamp timestamp(1, 1);
-    const ChunkVersion v1(1, 0, epoch, timestamp);
-    const ChunkVersion v2(1, 0, epoch, timestamp);
+    const ChunkVersion v1({epoch, timestamp}, {1, 0});
+    const ChunkVersion v2({epoch, timestamp}, {1, 0});
     const auto version1 = ComparableChunkVersion::makeComparableChunkVersion(v1);
     const auto version2 = ComparableChunkVersion::makeComparableChunkVersion(v2);
     ASSERT(version1 == version2);
 }
 
 TEST(ComparableChunkVersionTest, VersionsEqualAfterCopy) {
-    const ChunkVersion chunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
     const auto version1 = ComparableChunkVersion::makeComparableChunkVersion(chunkVersion);
     const auto version2 = version1;
     ASSERT(version1 == version2);
@@ -54,8 +52,8 @@ TEST(ComparableChunkVersionTest, VersionsEqualAfterCopy) {
 
 
 TEST(ComparableChunkVersionTest, CompareDifferentTimestamps) {
-    const ChunkVersion v1(2, 0, OID::gen(), Timestamp(1));
-    const ChunkVersion v2(1, 0, OID::gen(), Timestamp(2));
+    const ChunkVersion v1({OID::gen(), Timestamp(1)}, {2, 0});
+    const ChunkVersion v2({OID::gen(), Timestamp(2)}, {1, 0});
     const auto version1 = ComparableChunkVersion::makeComparableChunkVersion(v1);
     const auto version2 = ComparableChunkVersion::makeComparableChunkVersion(v2);
     ASSERT(version2 != version1);
@@ -65,9 +63,9 @@ TEST(ComparableChunkVersionTest, CompareDifferentTimestamps) {
 
 TEST(ComparableChunkVersionTest, CompareDifferentVersionsTimestampsIgnoreSequenceNumber) {
     const auto version1 = ComparableChunkVersion::makeComparableChunkVersion(
-        ChunkVersion(2, 0, OID::gen(), Timestamp(2)));
+        ChunkVersion({OID::gen(), Timestamp(2)}, {2, 0}));
     const auto version2 = ComparableChunkVersion::makeComparableChunkVersion(
-        ChunkVersion(2, 0, OID::gen(), Timestamp(1)));
+        ChunkVersion({OID::gen(), Timestamp(1)}, {2, 0}));
     ASSERT(version1 != version2);
     ASSERT(version1 > version2);
     ASSERT_FALSE(version1 < version2);
@@ -76,9 +74,9 @@ TEST(ComparableChunkVersionTest, CompareDifferentVersionsTimestampsIgnoreSequenc
 TEST(ComparableChunkVersionTest, VersionGreaterSameTimestamps) {
     const auto epoch = OID::gen();
     const Timestamp timestamp(1, 1);
-    const ChunkVersion v1(1, 0, epoch, timestamp);
-    const ChunkVersion v2(1, 2, epoch, timestamp);
-    const ChunkVersion v3(2, 0, epoch, timestamp);
+    const ChunkVersion v1({epoch, timestamp}, {1, 0});
+    const ChunkVersion v2({epoch, timestamp}, {1, 2});
+    const ChunkVersion v3({epoch, timestamp}, {2, 0});
     const auto version1 = ComparableChunkVersion::makeComparableChunkVersion(v1);
     const auto version2 = ComparableChunkVersion::makeComparableChunkVersion(v2);
     const auto version3 = ComparableChunkVersion::makeComparableChunkVersion(v3);
@@ -93,9 +91,9 @@ TEST(ComparableChunkVersionTest, VersionGreaterSameTimestamps) {
 TEST(ComparableChunkVersionTest, VersionLessSameTimestamps) {
     const auto epoch = OID::gen();
     const Timestamp timestamp(1, 1);
-    const ChunkVersion v1(1, 0, epoch, timestamp);
-    const ChunkVersion v2(1, 2, epoch, timestamp);
-    const ChunkVersion v3(2, 0, epoch, timestamp);
+    const ChunkVersion v1({epoch, timestamp}, {1, 0});
+    const ChunkVersion v2({epoch, timestamp}, {1, 2});
+    const ChunkVersion v3({epoch, timestamp}, {2, 0});
     const auto version1 = ComparableChunkVersion::makeComparableChunkVersion(v1);
     const auto version2 = ComparableChunkVersion::makeComparableChunkVersion(v2);
     const auto version3 = ComparableChunkVersion::makeComparableChunkVersion(v3);
@@ -115,7 +113,7 @@ TEST(ComparableChunkVersionTest, DefaultConstructedVersionsAreEqual) {
 }
 
 TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLessThanWithChunksVersion) {
-    const ChunkVersion chunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
     const ComparableChunkVersion defaultVersion{};
     const auto withChunksVersion = ComparableChunkVersion::makeComparableChunkVersion(chunkVersion);
     ASSERT(defaultVersion != withChunksVersion);
@@ -124,7 +122,7 @@ TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLessThanWithCh
 }
 
 TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLessThanNoChunksVersion) {
-    const ChunkVersion chunkVersion(0, 0, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1, 1)}, {0, 0});
     const ComparableChunkVersion defaultVersion{};
     const auto noChunksVersion = ComparableChunkVersion::makeComparableChunkVersion(chunkVersion);
     ASSERT(defaultVersion != noChunksVersion);
@@ -143,8 +141,8 @@ TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLessThanUnshar
 
 TEST(ComparableChunkVersionTest, TwoNoChunksVersionsAreTheSame) {
     const auto oid = OID::gen();
-    const ChunkVersion v1(0, 0, oid, Timestamp(1, 1));
-    const ChunkVersion v2(0, 0, oid, Timestamp(1, 1));
+    const ChunkVersion v1({oid, Timestamp(1, 1)}, {0, 0});
+    const ChunkVersion v2({oid, Timestamp(1, 1)}, {0, 0});
     const auto noChunksVersion1 = ComparableChunkVersion::makeComparableChunkVersion(v1);
     const auto noChunksVersion2 = ComparableChunkVersion::makeComparableChunkVersion(v2);
     ASSERT(noChunksVersion1 == noChunksVersion2);
@@ -155,9 +153,9 @@ TEST(ComparableChunkVersionTest, TwoNoChunksVersionsAreTheSame) {
 TEST(ComparableChunkVersionTest, NoChunksComparedBySequenceNum) {
     const auto oid = OID::gen();
     const Timestamp timestamp(1);
-    const ChunkVersion v1(1, 0, oid, timestamp);
-    const ChunkVersion v2(0, 0, oid, timestamp);
-    const ChunkVersion v3(2, 0, oid, timestamp);
+    const ChunkVersion v1({oid, timestamp}, {1, 0});
+    const ChunkVersion v2({oid, timestamp}, {0, 0});
+    const ChunkVersion v3({oid, timestamp}, {2, 0});
     const auto version1 = ComparableChunkVersion::makeComparableChunkVersion(v1);
     const auto noChunksVersion2 = ComparableChunkVersion::makeComparableChunkVersion(v2);
     const auto version3 = ComparableChunkVersion::makeComparableChunkVersion(v3);
@@ -168,7 +166,7 @@ TEST(ComparableChunkVersionTest, NoChunksComparedBySequenceNum) {
 }
 
 TEST(ComparableChunkVersionTest, NoChunksGreaterThanUnshardedBySequenceNum) {
-    const ChunkVersion chunkVersion(0, 0, OID::gen(), Timestamp(1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1)}, {0, 0});
     const auto unsharded =
         ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
     const auto noChunkSV = ComparableChunkVersion::makeComparableChunkVersion(chunkVersion);
@@ -177,7 +175,7 @@ TEST(ComparableChunkVersionTest, NoChunksGreaterThanUnshardedBySequenceNum) {
 }
 
 TEST(ComparableChunkVersionTest, UnshardedGreaterThanNoChunksBySequenceNum) {
-    const ChunkVersion chunkVersion(0, 0, OID::gen(), Timestamp(1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1)}, {0, 0});
     const auto noChunkSV = ComparableChunkVersion::makeComparableChunkVersion(chunkVersion);
     const auto unsharded =
         ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
@@ -186,7 +184,7 @@ TEST(ComparableChunkVersionTest, UnshardedGreaterThanNoChunksBySequenceNum) {
 }
 
 TEST(ComparableChunkVersionTest, NoChunksGreaterThanDefault) {
-    const ChunkVersion chunkVersion(0, 0, OID::gen(), Timestamp(1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1)}, {0, 0});
     const auto noChunkSV = ComparableChunkVersion::makeComparableChunkVersion(chunkVersion);
     const ComparableChunkVersion defaultVersion{};
     ASSERT(noChunkSV != defaultVersion);
@@ -194,7 +192,7 @@ TEST(ComparableChunkVersionTest, NoChunksGreaterThanDefault) {
 }
 
 TEST(ComparableChunkVersionTest, CompareForcedRefreshVersionVersusValidChunkVersion) {
-    const ChunkVersion chunkVersion(100, 0, OID::gen(), Timestamp(1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1)}, {100, 0});
     const ComparableChunkVersion defaultVersionBeforeForce;
     const auto versionBeforeForce =
         ComparableChunkVersion::makeComparableChunkVersion(chunkVersion);
diff --git a/src/mongo/db/initialize_snmp.cpp b/src/mongo/s/mongos_core_options_stub.cpp
index c623b8c91f7..2de5990ece1 100644
--- a/src/mongo/db/initialize_snmp.cpp
+++ b/src/mongo/s/mongos_core_options_stub.cpp
@@ -1,5 +1,5 @@
 /**
- *    Copyright (C) 2018-present MongoDB, Inc.
+ *    Copyright (C) 2022-present MongoDB, Inc.
  *
  *    This program is free software: you can redistribute it and/or modify
  *    it under the terms of the Server Side Public License, version 1,
@@ -27,22 +27,14 @@
  *    it in the license file.
  */
 
-#include "mongo/db/initialize_snmp.h"
-#include "mongo/util/assert_util.h"
+#include "mongo/base/init.h"
+#include "mongo/base/status.h"
 
 namespace mongo {
 namespace {
-bool initSet = false;
-std::function<void()> snmpInitializer = [] {};
+MONGO_INITIALIZER_GENERAL(CoreOptions_Store,
+                          ("BeginStartupOptionStorage"),
+                          ("EndStartupOptionStorage"))
+(InitializerContext* context) {}
 }  // namespace
 }  // namespace mongo
-
-void mongo::registerSNMPInitializer(std::function<void()> init) {
-    invariant(!initSet);
-    snmpInitializer = std::move(init);
-    initSet = true;
-}
-
-void mongo::initializeSNMP() {
-    return snmpInitializer();
-}
diff --git a/src/mongo/s/mongos_main.cpp b/src/mongo/s/mongos_main.cpp
index 0a1ffa80f9a..222aabccdf3 100644
--- a/src/mongo/s/mongos_main.cpp
+++ b/src/mongo/s/mongos_main.cpp
@@ -812,6 +812,10 @@ ExitCode runMongosServer(ServiceContext* serviceContext) {
         return EXIT_NET_ERROR;
     }
 
+    if (!initialize_server_global_state::writePidFile()) {
+        return EXIT_ABRUPT;
+    }
+
     // Startup options are written to the audit log at the end of startup so that cluster server
     // parameters are guaranteed to have been initialized from disk at this point.
     audit::logStartupOptions(tc.get(), serverGlobalParams.parsedOpts);
@@ -819,7 +823,7 @@ ExitCode runMongosServer(ServiceContext* serviceContext) {
     serviceContext->notifyStartupComplete();
 
 #if !defined(_WIN32)
-    signalForkSuccess();
+    initialize_server_global_state::signalForkSuccess();
 #else
     if (ntservice::shouldStartService()) {
         ntservice::reportStatus(SERVICE_RUNNING);
@@ -885,7 +889,7 @@ ExitCode main(ServiceContext* serviceContext) {
 
 MONGO_INITIALIZER_GENERAL(ForkServer, ("EndStartupOptionHandling"), ("default"))
 (InitializerContext* context) {
-    forkServerOrDie();
+    initialize_server_global_state::forkServerOrDie();
 }
 
 // Initialize the featureCompatibilityVersion server parameter since mongos does not have a
@@ -969,7 +973,7 @@ ExitCode mongos_main(int argc, char* argv[]) {
     logCommonStartupWarnings(serverGlobalParams);
 
     try {
-        if (!initializeServerGlobalState(service))
+        if (!initialize_server_global_state::checkSocketPath())
             return EXIT_ABRUPT;
 
         startSignalProcessingThread();
diff --git a/src/mongo/s/query/cluster_exchange_test.cpp b/src/mongo/s/query/cluster_exchange_test.cpp
index 8b33c58e843..96815019657 100644
--- a/src/mongo/s/query/cluster_exchange_test.cpp
+++ b/src/mongo/s/query/cluster_exchange_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/client/remote_command_targeter_factory_mock.h"
 #include "mongo/client/remote_command_targeter_mock.h"
 #include "mongo/db/pipeline/document_source_group.h"
@@ -533,7 +531,7 @@ TEST_F(ClusterExchangeTest, CompoundShardKeyThreeShards) {
     const std::vector<std::string> xBoundaries = {"a", "g", "m", "r", "u"};
     auto chunks = [&]() {
         std::vector<ChunkType> chunks;
-        ChunkVersion version(1, 0, epoch, timestamp);
+        ChunkVersion version({epoch, timestamp}, {1, 0});
         chunks.emplace_back(uuid,
                             ChunkRange{BSON("x" << MINKEY << "y" << MINKEY),
                                        BSON("x" << xBoundaries[0] << "y" << MINKEY)},
diff --git a/src/mongo/s/query/sharded_agg_test_fixture.h b/src/mongo/s/query/sharded_agg_test_fixture.h
index d5c02d84b3c..f36ae36eabb 100644
--- a/src/mongo/s/query/sharded_agg_test_fixture.h
+++ b/src/mongo/s/query/sharded_agg_test_fixture.h
@@ -80,7 +80,7 @@ public:
                                       const OID epoch,
                                       const Timestamp timestamp,
                                       std::vector<std::pair<ChunkRange, ShardId>> chunkInfos) {
-        ChunkVersion version(1, 0, epoch, timestamp);
+        ChunkVersion version({epoch, timestamp}, {1, 0});
         std::vector<ChunkType> chunks;
         for (auto&& pair : chunkInfos) {
             chunks.emplace_back(uuid, pair.first, version, pair.second);
diff --git a/src/mongo/s/request_types/balance_chunk_request_test.cpp b/src/mongo/s/request_types/balance_chunk_request_test.cpp
index 82a938dc1ae..a1475991bd2 100644
--- a/src/mongo/s/request_types/balance_chunk_request_test.cpp
+++ b/src/mongo/s/request_types/balance_chunk_request_test.cpp
@@ -41,7 +41,7 @@ using unittest::assertGet;
 
 TEST(BalanceChunkRequest, RoundTrip) {
     UUID uuid{UUID::gen()};
-    ChunkVersion version(30, 1, OID::gen(), Timestamp{2, 0});
+    ChunkVersion version({OID::gen(), Timestamp(2, 0)}, {30, 1});
     auto obj = BalanceChunkRequest::serializeToRebalanceCommandForConfig(
         NamespaceString("DB.Test"),
         ChunkRange(BSON("A" << 100), BSON("A" << 200)),
@@ -49,8 +49,7 @@ TEST(BalanceChunkRequest, RoundTrip) {
         ShardId("TestShard"),
         version);
 
-    auto request =
-        assertGet(BalanceChunkRequest::parseFromConfigCommand(obj, false /* requireUUID */));
+    auto request = assertGet(BalanceChunkRequest::parseFromConfigCommand(obj));
     ASSERT_EQ(NamespaceString("DB.Test"), request.getNss());
     ASSERT_BSONOBJ_EQ(ChunkRange(BSON("A" << 100), BSON("A" << 200)).toBSON(),
                       request.getChunk().getRange().toBSON());
@@ -59,16 +58,17 @@ TEST(BalanceChunkRequest, RoundTrip) {
 }
 
 TEST(BalanceChunkRequest, ParseFromConfigCommandNoSecondaryThrottle) {
-    const ChunkVersion version(1, 0, OID::gen(), Timestamp(1, 1));
+    const auto uuid{UUID::gen()};
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 0});
     auto request = assertGet(BalanceChunkRequest::parseFromConfigCommand(
-        BSON("_configsvrMoveChunk"
-             << 1 << "ns"
-             << "TestDB.TestColl"
-             << "min" << BSON("a" << -100LL) << "max" << BSON("a" << 100LL) << "shard"
-             << "TestShard0000"
-             << "lastmod" << Date_t::fromMillisSinceEpoch(version.toLong()) << "lastmodEpoch"
-             << version.epoch() << "lastmodTimestamp" << version.getTimestamp()),
-        false /* requireUUID */));
+        BSON("_configsvrMoveChunk" << 1 << "ns"
+                                   << "TestDB.TestColl"
+                                   << "uuid" << uuid << "min" << BSON("a" << -100LL) << "max"
+                                   << BSON("a" << 100LL) << "shard"
+                                   << "TestShard0000"
+                                   << "lastmod"
+                                   << BSON("e" << version.epoch() << "t" << version.getTimestamp()
+                                               << "v" << Timestamp(version.toLong())))));
     const auto& chunk = request.getChunk();
     ASSERT_EQ("TestDB.TestColl", request.getNss().ns());
     ASSERT_BSONOBJ_EQ(BSON("a" << -100LL), chunk.getMin());
@@ -81,21 +81,18 @@ TEST(BalanceChunkRequest, ParseFromConfigCommandNoSecondaryThrottle) {
               secondaryThrottle.getSecondaryThrottle());
 }
 
-// TODO (SERVER-60792): Get rid of the collection namespace from BSON once v6.0 branches out, as it
-// will become a no longer mandatory argument. Ideally both variants should be tested.
-TEST(BalanceChunkRequest, ParseFromConfigCommandWithUUID) {
+TEST(BalanceChunkRequest, ParseFromConfigCommandWithUUIDNoSecondaryThrottle) {
     const auto uuid = UUID::gen();
-    const ChunkVersion version(1, 0, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 0});
     auto request = assertGet(BalanceChunkRequest::parseFromConfigCommand(
         BSON("_configsvrMoveChunk" << 1 << "ns"
                                    << "TestDB.TestColl"
                                    << "uuid" << uuid << "min" << BSON("a" << -100LL) << "max"
                                    << BSON("a" << 100LL) << "shard"
                                    << "TestShard0000"
-                                   << "lastmod" << Date_t::fromMillisSinceEpoch(version.toLong())
-                                   << "lastmodEpoch" << version.epoch() << "lastmodTimestamp"
-                                   << version.getTimestamp()),
-        true /* requireUUID */));
+                                   << "lastmod"
+                                   << BSON("e" << version.epoch() << "t" << version.getTimestamp()
+                                               << "v" << Timestamp(version.toLong())))));
     const auto& chunk = request.getChunk();
     ASSERT_EQ(uuid, chunk.getCollectionUUID());
     ASSERT_BSONOBJ_EQ(BSON("a" << -100LL), chunk.getMin());
@@ -109,18 +106,20 @@ TEST(BalanceChunkRequest, ParseFromConfigCommandWithUUID) {
 }
 
 TEST(BalanceChunkRequest, ParseFromConfigCommandWithSecondaryThrottle) {
-    const ChunkVersion version(1, 0, OID::gen(), Timestamp(1, 1));
+    const auto uuid{UUID::gen()};
+    const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 0});
     auto request = assertGet(BalanceChunkRequest::parseFromConfigCommand(
         BSON("_configsvrMoveChunk"
              << 1 << "ns"
              << "TestDB.TestColl"
-             << "min" << BSON("a" << -100LL) << "max" << BSON("a" << 100LL) << "shard"
+             << "uuid" << uuid << "min" << BSON("a" << -100LL) << "max" << BSON("a" << 100LL)
+             << "shard"
              << "TestShard0000"
-             << "lastmod" << Date_t::fromMillisSinceEpoch(version.toLong()) << "lastmodEpoch"
-             << version.epoch() << "lastmodTimestamp" << version.getTimestamp()
+             << "lastmod"
+             << BSON("e" << version.epoch() << "t" << version.getTimestamp() << "v"
+                         << Timestamp(version.toLong()))
              << "secondaryThrottle"
-             << BSON("_secondaryThrottle" << true << "writeConcern" << BSON("w" << 2))),
-        false /* requireUUID */));
+             << BSON("_secondaryThrottle" << true << "writeConcern" << BSON("w" << 2)))));
     const auto& chunk = request.getChunk();
     ASSERT_EQ("TestDB.TestColl", request.getNss().ns());
     ASSERT_BSONOBJ_EQ(BSON("a" << -100LL), chunk.getMin());
diff --git a/src/mongo/s/request_types/balance_chunk_request_type.cpp b/src/mongo/s/request_types/balance_chunk_request_type.cpp
index c69291300fc..f94b61db12e 100644
--- a/src/mongo/s/request_types/balance_chunk_request_type.cpp
+++ b/src/mongo/s/request_types/balance_chunk_request_type.cpp
@@ -56,8 +56,7 @@ BalanceChunkRequest::BalanceChunkRequest(ChunkType chunk,
                                          MigrationSecondaryThrottleOptions secondaryThrottle)
     : _chunk(std::move(chunk)), _secondaryThrottle(std::move(secondaryThrottle)) {}
 
-StatusWith<BalanceChunkRequest> BalanceChunkRequest::parseFromConfigCommand(const BSONObj& obj,
-                                                                            bool requireUUID) {
+StatusWith<BalanceChunkRequest> BalanceChunkRequest::parseFromConfigCommand(const BSONObj& obj) {
 
     NamespaceString nss;
     {
@@ -69,7 +68,7 @@ StatusWith<BalanceChunkRequest> BalanceChunkRequest::parseFromConfigCommand(cons
         nss = NamespaceString(ns);
     }
 
-    const auto chunkStatus = ChunkType::parseFromNetworkRequest(obj, requireUUID);
+    const auto chunkStatus = ChunkType::parseFromNetworkRequest(obj);
     if (!chunkStatus.isOK()) {
         return chunkStatus.getStatus();
     }
@@ -154,7 +153,7 @@ BSONObj BalanceChunkRequest::serializeToRebalanceCommandForConfig(
     range.append(&cmdBuilder);
     cmdBuilder.append(ChunkType::shard(), owningShard);
     collectionUUID.appendToBuilder(&cmdBuilder, ChunkType::collectionUUID());
-    expectedChunkVersion.appendLegacyWithField(&cmdBuilder, ChunkType::lastmod());
+    expectedChunkVersion.serializeToBSON(ChunkType::lastmod(), &cmdBuilder);
     cmdBuilder.append(WriteConcernOptions::kWriteConcernField,
                       kMajorityWriteConcernNoTimeout.toBSON());
 
diff --git a/src/mongo/s/request_types/balance_chunk_request_type.h b/src/mongo/s/request_types/balance_chunk_request_type.h
index d7397585d5c..1eff27a0b2d 100644
--- a/src/mongo/s/request_types/balance_chunk_request_type.h
+++ b/src/mongo/s/request_types/balance_chunk_request_type.h
@@ -52,10 +52,7 @@ public:
      * Parses the provided BSON content and if it is correct construct a request object with the
      * request parameters. If the '_id' field is missing in obj, ignore it.
      */
-    // TODO (SERVER-60792): Get rid of "requireUUID" once v6.0 branches out. Starting from v5.1, the
-    // collection UUID will always be present in the chunk.
-    static StatusWith<BalanceChunkRequest> parseFromConfigCommand(const BSONObj& obj,
-                                                                  bool requireUUID = true);
+    static StatusWith<BalanceChunkRequest> parseFromConfigCommand(const BSONObj& obj);
 
     /**
      * Produces a BSON object for the variant of the command, which requests the balancer to pick a
@@ -71,12 +68,6 @@ public:
         return _nss;
     }
 
-    // TODO (SERVER-60792): Get rid of setCollectionUUID() once v6.0 branches out. Starting from
-    // v5.1, the collection UUID will always be present in the chunk.
-    void setCollectionUUID(UUID const& uuid) {
-        _chunk.setCollectionUUID(uuid);
-    }
-
     const ChunkType& getChunk() const {
         return _chunk;
     }
diff --git a/src/mongo/s/request_types/commit_chunk_migration_request_test.cpp b/src/mongo/s/request_types/commit_chunk_migration_request_test.cpp
deleted file mode 100644
index 38254acf008..00000000000
--- a/src/mongo/s/request_types/commit_chunk_migration_request_test.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/bson/bsonmisc.h"
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/s/request_types/commit_chunk_migration_request_type.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo {
-
-using unittest::assertGet;
-
-namespace {
-
-const auto kNamespaceString = NamespaceString("TestDB", "TestColl");
-
-const auto kShardId0 = ShardId("shard0");
-const auto kShardId1 = ShardId("shard1");
-
-const auto kKey0 = BSON("Key" << -100);
-const auto kKey1 = BSON("Key" << 100);
-const auto kKey2 = BSON("Key" << -50);
-const auto kKey3 = BSON("Key" << 50);
-
-const char kConfigSvrCommitChunkMigration[] = "_configsvrCommitChunkMigration";
-
-TEST(CommitChunkMigrationRequest, WithoutControlChunk) {
-    BSONObjBuilder builder;
-
-    ChunkType migratedChunk;
-    migratedChunk.setCollectionUUID(UUID::gen());
-    migratedChunk.setMin(kKey0);
-    migratedChunk.setMax(kKey1);
-    migratedChunk.setVersion({12, 7, OID::gen(), Timestamp(1, 1)});
-
-    ChunkVersion fromShardCollectionVersion(1, 2, OID::gen(), Timestamp(1, 1));
-
-    Timestamp validAfter{1};
-
-    CommitChunkMigrationRequest::appendAsCommand(&builder,
-                                                 kNamespaceString,
-                                                 kShardId0,
-                                                 kShardId1,
-                                                 migratedChunk,
-                                                 fromShardCollectionVersion,
-                                                 validAfter);
-
-    BSONObj cmdObj = builder.obj();
-
-    auto request = assertGet(CommitChunkMigrationRequest::createFromCommand(
-        NamespaceString(cmdObj[kConfigSvrCommitChunkMigration].String()), cmdObj));
-
-    ASSERT_EQ(kNamespaceString, request.getNss());
-    ASSERT_EQ(kShardId0, request.getFromShard());
-    ASSERT_EQ(kShardId1, request.getToShard());
-    ASSERT_BSONOBJ_EQ(kKey0, request.getMigratedChunk().getMin());
-    ASSERT_BSONOBJ_EQ(kKey1, request.getMigratedChunk().getMax());
-    ASSERT_TRUE(request.getMigratedChunk().isVersionSet() &&
-                request.getMigratedChunk().getVersion().isSet() &&
-                request.getMigratedChunk().getVersion().epoch().isSet());
-    ASSERT_EQ(fromShardCollectionVersion.epoch(), request.getCollectionEpoch());
-}
-
-}  // namespace
-}  // namespace mongo
diff --git a/src/mongo/s/request_types/commit_chunk_migration_request_type.cpp b/src/mongo/s/request_types/commit_chunk_migration_request_type.cpp
deleted file mode 100644
index 00c2f90f65b..00000000000
--- a/src/mongo/s/request_types/commit_chunk_migration_request_type.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/s/request_types/commit_chunk_migration_request_type.h"
-
-#include "mongo/bson/util/bson_extract.h"
-
-namespace mongo {
-namespace {
-
-const char kConfigSvrCommitChunkMigration[] = "_configsvrCommitChunkMigration";
-const char kFromShard[] = "fromShard";
-const char kToShard[] = "toShard";
-const char kMigratedChunk[] = "migratedChunk";
-const char kFromShardCollectionVersion[] = "fromShardCollectionVersion";
-const char kValidAfter[] = "validAfter";
-
-/**
- * Attempts to parse a (range-only!) ChunkType from "field" in "source".
- */
-StatusWith<ChunkType> extractChunk(const BSONObj& source, StringData field) {
-    BSONElement fieldElement;
-    auto status = bsonExtractTypedField(source, field, BSONType::Object, &fieldElement);
-    if (!status.isOK())
-        return status;
-
-    const auto fieldObj = fieldElement.Obj();
-
-    auto rangeWith = ChunkRange::fromBSON(fieldObj);
-    if (!rangeWith.isOK())
-        return rangeWith.getStatus();
-
-    ChunkVersion version;
-    try {
-        version = ChunkVersion::parse(fieldObj[ChunkType::lastmod()]);
-        uassert(644490, "Version must be set", version.isSet());
-    } catch (const DBException& ex) {
-        return ex.toStatus();
-    }
-
-    ChunkType chunk;
-    chunk.setMin(rangeWith.getValue().getMin());
-    chunk.setMax(rangeWith.getValue().getMax());
-    chunk.setVersion(version);
-    return chunk;
-}
-
-/**
- * Attempts to parse a ShardId from "field" in "source".
- */
-StatusWith<ShardId> extractShardId(const BSONObj& source, StringData field) {
-    std::string stringResult;
-
-    auto status = bsonExtractStringField(source, field, &stringResult);
-    if (!status.isOK()) {
-        return status;
-    }
-
-    if (stringResult.empty()) {
-        return Status(ErrorCodes::UnsupportedFormat,
-                      "The field '" + field.toString() + "' cannot be empty");
-    }
-
-    return ShardId(stringResult);
-}
-
-}  // namespace
-
-StatusWith<CommitChunkMigrationRequest> CommitChunkMigrationRequest::createFromCommand(
-    const NamespaceString& nss, const BSONObj& obj) {
-
-    auto migratedChunk = extractChunk(obj, kMigratedChunk);
-    if (!migratedChunk.isOK()) {
-        return migratedChunk.getStatus();
-    }
-
-    CommitChunkMigrationRequest request(nss, std::move(migratedChunk.getValue()));
-
-    {
-        auto fromShard = extractShardId(obj, kFromShard);
-        if (!fromShard.isOK()) {
-            return fromShard.getStatus();
-        }
-
-        request._fromShard = std::move(fromShard.getValue());
-    }
-
-    {
-        auto toShard = extractShardId(obj, kToShard);
-        if (!toShard.isOK()) {
-            return toShard.getStatus();
-        }
-
-        request._toShard = std::move(toShard.getValue());
-    }
-
-    try {
-        auto fromShardVersion =
-            ChunkVersion::fromBSONPositionalOrNewerFormat(obj[kFromShardCollectionVersion]);
-        request._collectionEpoch = fromShardVersion.epoch();
-        request._collectionTimestamp = fromShardVersion.getTimestamp();
-    } catch (const DBException& ex) {
-        return ex.toStatus();
-    }
-
-    {
-        Timestamp validAfter;
-        auto status = bsonExtractTimestampField(obj, kValidAfter, &validAfter);
-        if (!status.isOK() && status != ErrorCodes::NoSuchKey) {
-            return status;
-        }
-
-        if (status.isOK()) {
-            request._validAfter = validAfter;
-        } else {
-            request._validAfter = boost::none;
-        }
-    }
-
-    return request;
-}
-
-void CommitChunkMigrationRequest::appendAsCommand(BSONObjBuilder* builder,
-                                                  const NamespaceString& nss,
-                                                  const ShardId& fromShard,
-                                                  const ShardId& toShard,
-                                                  const ChunkType& migratedChunk,
-                                                  const ChunkVersion& fromShardCollectionVersion,
-                                                  const Timestamp& validAfter) {
-    invariant(builder->asTempObj().isEmpty());
-    invariant(nss.isValid());
-
-    builder->append(kConfigSvrCommitChunkMigration, nss.ns());
-    builder->append(kFromShard, fromShard.toString());
-    builder->append(kToShard, toShard.toString());
-    {
-        BSONObjBuilder migrateChunk(builder->subobjStart(kMigratedChunk));
-        migratedChunk.getRange().append(&migrateChunk);
-        migratedChunk.getVersion().appendLegacyWithField(&migrateChunk, ChunkType::lastmod());
-    }
-    fromShardCollectionVersion.serializeToBSON(kFromShardCollectionVersion, builder);
-    builder->append(kValidAfter, validAfter);
-}
-
-}  // namespace mongo
diff --git a/src/mongo/s/request_types/commit_chunk_migration_request_type.h b/src/mongo/s/request_types/commit_chunk_migration_request_type.h
deleted file mode 100644
index 16d5f0ef8ce..00000000000
--- a/src/mongo/s/request_types/commit_chunk_migration_request_type.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <string>
-
-#include "mongo/db/namespace_string.h"
-#include "mongo/s/catalog/type_chunk.h"
-
-namespace mongo {
-
-/**
- * Creates and parses commit chunk migration command BSON objects.
- */
-class CommitChunkMigrationRequest {
-public:
-    CommitChunkMigrationRequest(const NamespaceString& nss, const ChunkType& chunk)
-        : _nss(nss), _migratedChunk(chunk) {}
-
-    /**
-     * Parses the input command and produces a request corresponding to its arguments.
-     */
-    static StatusWith<CommitChunkMigrationRequest> createFromCommand(const NamespaceString& nss,
-                                                                     const BSONObj& obj);
-
-    /**
-     * Constructs a commitChunkMigration command with the specified parameters and writes it to
-     * the builder, without closing the builder. The builder must be empty, but callers are free
-     * to append more fields once the command has been constructed.
-     */
-    static void appendAsCommand(BSONObjBuilder* builder,
-                                const NamespaceString& nss,
-                                const ShardId& fromShard,
-                                const ShardId& toShard,
-                                const ChunkType& migratedChunkType,
-                                const ChunkVersion& fromShardChunkVersion,
-                                const Timestamp& validAfter);
-
-    const NamespaceString& getNss() const {
-        return _nss;
-    }
-    const ShardId& getFromShard() const {
-        return _fromShard;
-    }
-    const ShardId& getToShard() const {
-        return _toShard;
-    }
-    const ChunkType& getMigratedChunk() const {
-        return _migratedChunk;
-    }
-    const OID& getCollectionEpoch() {
-        return _collectionEpoch;
-    }
-    const Timestamp& getCollectionTimestamp() {
-        return _collectionTimestamp;
-    }
-    const boost::optional<Timestamp>& getValidAfter() {
-        return _validAfter;
-    }
-
-private:
-    // The collection for which this request applies.
-    NamespaceString _nss;
-
-    // The source shard name.
-    ShardId _fromShard;
-
-    // The recipient shard name.
-    ShardId _toShard;
-
-    // The chunk being moved.
-    ChunkType _migratedChunk;
-
-    // Epoch/Timestamp of the collection, matches the ones set in `_migratedChunk`.
-    OID _collectionEpoch;
-    Timestamp _collectionTimestamp;
-
-    // The time of the move
-    boost::optional<Timestamp> _validAfter;
-};
-
-}  // namespace mongo
diff --git a/src/mongo/s/request_types/configure_collection_balancing.idl b/src/mongo/s/request_types/configure_collection_balancing.idl
index fbbed4cea39..7c39e968222 100644
--- a/src/mongo/s/request_types/configure_collection_balancing.idl
+++ b/src/mongo/s/request_types/configure_collection_balancing.idl
@@ -34,13 +34,6 @@ global:
 imports:
     - "mongo/idl/basic_types.idl"
 
-feature_flags:
-  featureFlagPerCollBalancingSettings:
-    description: "Add capability to configure per collection balancing settings."
-    cpp_varname: feature_flags::gPerCollBalancingSettings
-    default: true
-    version: 5.3
-
 structs:
     configure_coll_balancing_params:
         description: "Parameters for configureCollectionBalancing command"
diff --git a/src/mongo/s/request_types/move_chunk_request_test.cpp b/src/mongo/s/request_types/move_chunk_request_test.cpp
index e8020086d35..688d117b5c2 100644
--- a/src/mongo/s/request_types/move_chunk_request_test.cpp
+++ b/src/mongo/s/request_types/move_chunk_request_test.cpp
@@ -49,7 +49,7 @@ const int kMaxChunkSizeBytes = 1024;
 const bool kWaitForDelete = true;
 
 TEST(MoveChunkRequest, Roundtrip) {
-    const ChunkVersion chunkVersion(3, 1, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1, 1)}, {3, 1});
 
     BSONObjBuilder builder;
     MoveChunkRequest::appendAsCommand(
@@ -81,7 +81,7 @@ TEST(MoveChunkRequest, Roundtrip) {
 }
 
 TEST(MoveChunkRequest, EqualityOperatorSameValue) {
-    const ChunkVersion chunkVersion(3, 1, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1, 1)}, {3, 1});
 
     BSONObjBuilder builder;
     MoveChunkRequest::appendAsCommand(
@@ -106,7 +106,7 @@ TEST(MoveChunkRequest, EqualityOperatorSameValue) {
 }
 
 TEST(MoveChunkRequest, EqualityOperatorDifferentValues) {
-    const ChunkVersion chunkVersion(3, 1, OID::gen(), Timestamp(1, 1));
+    const ChunkVersion chunkVersion({OID::gen(), Timestamp(1, 1)}, {3, 1});
 
     BSONObjBuilder builder1;
     MoveChunkRequest::appendAsCommand(
diff --git a/src/mongo/s/request_types/set_shard_version_request.cpp b/src/mongo/s/request_types/set_shard_version_request.cpp
deleted file mode 100644
index f342c5c8da7..00000000000
--- a/src/mongo/s/request_types/set_shard_version_request.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/s/request_types/set_shard_version_request.h"
-
-#include "mongo/base/status_with.h"
-#include "mongo/bson/bsonobj.h"
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/bson/util/bson_extract.h"
-#include "mongo/util/assert_util.h"
-#include "mongo/util/str.h"
-
-namespace mongo {
-namespace {
-
-const char kCmdName[] = "setShardVersion";
-const char kForceRefresh[] = "forceRefresh";
-const char kAuthoritative[] = "authoritative";
-const char kNoConnectionVersioning[] =
-    "noConnectionVersioning";  // TODO (SERVER-47956): Remove after 5.0 becomes last-lts.
-
-}  // namespace
-
-constexpr StringData SetShardVersionRequest::kVersion;
-
-SetShardVersionRequest::SetShardVersionRequest(NamespaceString nss,
-                                               ChunkVersion version,
-                                               bool isAuthoritative,
-                                               bool forceRefresh)
-    : _isAuthoritative(isAuthoritative),
-      _forceRefresh(forceRefresh),
-      _nss(std::move(nss)),
-      _version(std::move(version)) {}
-
-SetShardVersionRequest::SetShardVersionRequest() = default;
-
-StatusWith<SetShardVersionRequest> SetShardVersionRequest::parseFromBSON(const BSONObj& cmdObj) {
-    SetShardVersionRequest request;
-
-    {
-        Status status = bsonExtractBooleanFieldWithDefault(
-            cmdObj, kForceRefresh, false, &request._forceRefresh);
-        if (!status.isOK())
-            return status;
-    }
-
-    {
-        Status status = bsonExtractBooleanFieldWithDefault(
-            cmdObj, kAuthoritative, false, &request._isAuthoritative);
-        if (!status.isOK())
-            return status;
-    }
-
-    {
-        std::string ns;
-        Status status = bsonExtractStringField(cmdObj, kCmdName, &ns);
-        if (!status.isOK())
-            return status;
-
-        NamespaceString nss(ns);
-
-        if (!nss.isValid()) {
-            return {ErrorCodes::InvalidNamespace,
-                    str::stream() << ns << " is not a valid namespace"};
-        }
-
-        request._nss = std::move(nss);
-    }
-
-    {
-        try {
-            request._version = ChunkVersion::parse(cmdObj[kVersion]);
-        } catch (const DBException& ex) {
-            return ex.toStatus();
-        }
-    }
-
-    {
-        bool noConnectionVersioning;
-        Status status = bsonExtractBooleanFieldWithDefault(
-            cmdObj, kNoConnectionVersioning, true, &noConnectionVersioning);
-        if (!status.isOK())
-            return status;
-        if (!noConnectionVersioning)
-            return {ErrorCodes::Error(47841),
-                    "This is a request with noConnectionVersioning:false, which means it comes "
-                    "from an older version of the server and is not supported."};
-    }
-
-    return request;
-}
-
-BSONObj SetShardVersionRequest::toBSON() const {
-    BSONObjBuilder cmdBuilder;
-
-    cmdBuilder.append(kCmdName, _nss.get().ns());
-    cmdBuilder.append(kForceRefresh, _forceRefresh);
-    cmdBuilder.append(kAuthoritative, _isAuthoritative);
-    cmdBuilder.append(kNoConnectionVersioning, true);
-
-    _version->appendLegacyWithField(&cmdBuilder, kVersion);
-
-    return cmdBuilder.obj();
-}
-
-const NamespaceString& SetShardVersionRequest::getNS() const {
-    return _nss.get();
-}
-
-ChunkVersion SetShardVersionRequest::getNSVersion() const {
-    return _version.get();
-}
-
-}  // namespace mongo
diff --git a/src/mongo/s/request_types/set_shard_version_request.h b/src/mongo/s/request_types/set_shard_version_request.h
deleted file mode 100644
index c0d9aa1c0c4..00000000000
--- a/src/mongo/s/request_types/set_shard_version_request.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <boost/optional.hpp>
-#include <string>
-
-#include "mongo/client/connection_string.h"
-#include "mongo/db/namespace_string.h"
-#include "mongo/s/chunk_version.h"
-#include "mongo/s/shard_id.h"
-
-namespace mongo {
-
-class BSONObj;
-template <typename T>
-class StatusWith;
-
-/**
- * Encapsulates the parsing and construction logic for the SetShardVersion command.
- */
-class SetShardVersionRequest {
-public:
-    static constexpr StringData kVersion = "version"_sd;
-
-    SetShardVersionRequest(NamespaceString nss,
-                           ChunkVersion version,
-                           bool isAuthoritative,
-                           bool forceRefresh = false);
-
-    /**
-     * Parses an SSV request from a set shard version command.
-     */
-    static StatusWith<SetShardVersionRequest> parseFromBSON(const BSONObj& cmdObj);
-
-    /**
-     * Produces a BSON representation of the request, which can be used for sending as a command.
-     */
-    BSONObj toBSON() const;
-
-    /**
-     * Returns whether this request should force the version to be set instead of it being reloaded
-     * and recalculated from the metadata.
-     */
-    bool isAuthoritative() const {
-        return _isAuthoritative;
-    }
-
-    /**
-     * Returns whether the set shard version catalog refresh is allowed to join
-     * an in-progress refresh triggered by an other thread, or whether it's
-     * required to either a) trigger its own refresh or b) wait for a refresh
-     * to be started after it has entered the getCollectionRoutingInfoWithRefresh function
-     */
-    bool shouldForceRefresh() const {
-        return _forceRefresh;
-    }
-    /**
-     * Returns the namespace associated with this set shard version request. It is illegal to access
-     * this field if isInit() returns true.
-     */
-    const NamespaceString& getNS() const;
-
-    /**
-     * Returns the version of the namespace associated with this set shard version request. It is
-     * illegal to access this field if isInit() returns true.
-     */
-    ChunkVersion getNSVersion() const;
-
-private:
-    SetShardVersionRequest();
-
-    bool _isAuthoritative{false};
-    // TODO (SERVER-50812) remove this flag that isn't used anymore
-    bool _forceRefresh{false};
-
-    boost::optional<NamespaceString> _nss;
-    boost::optional<ChunkVersion> _version;
-};
-
-}  // namespace mongo
diff --git a/src/mongo/s/request_types/set_shard_version_request_test.cpp b/src/mongo/s/request_types/set_shard_version_request_test.cpp
deleted file mode 100644
index 59ece1174cf..00000000000
--- a/src/mongo/s/request_types/set_shard_version_request_test.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- *    Copyright (C) 2018-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/bson/bsonmisc.h"
-#include "mongo/bson/oid.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo {
-namespace {
-
-using unittest::assertGet;
-
-TEST(SetShardVersionRequestTest, ParseFull) {
-    const ChunkVersion chunkVersion(1, 2, OID::gen(), Timestamp(1, 1));
-
-    SetShardVersionRequest request = assertGet(SetShardVersionRequest::parseFromBSON([&] {
-        BSONObjBuilder builder(BSON("setShardVersion"
-                                    << "db.coll"));
-        chunkVersion.serializeToBSON("version", &builder);
-        return builder.obj();
-    }()));
-
-    ASSERT(!request.shouldForceRefresh());
-    ASSERT(!request.isAuthoritative());
-    ASSERT_EQ(request.getNS().toString(), "db.coll");
-    ASSERT_EQ(request.getNSVersion().majorVersion(), chunkVersion.majorVersion());
-    ASSERT_EQ(request.getNSVersion().minorVersion(), chunkVersion.minorVersion());
-    ASSERT_EQ(request.getNSVersion().epoch(), chunkVersion.epoch());
-}
-
-TEST(SetShardVersionRequestTest, ParseFullWithAuthoritative) {
-    const ChunkVersion chunkVersion(1, 2, OID::gen(), Timestamp(1, 1));
-
-    SetShardVersionRequest request = assertGet(SetShardVersionRequest::parseFromBSON([&] {
-        BSONObjBuilder builder(BSON("setShardVersion"
-                                    << "db.coll"
-                                    << "authoritative" << true));
-        chunkVersion.serializeToBSON("version", &builder);
-        return builder.obj();
-    }()));
-
-    ASSERT(!request.shouldForceRefresh());
-    ASSERT(request.isAuthoritative());
-    ASSERT_EQ(request.getNS().toString(), "db.coll");
-    ASSERT_EQ(request.getNSVersion().majorVersion(), chunkVersion.majorVersion());
-    ASSERT_EQ(request.getNSVersion().minorVersion(), chunkVersion.minorVersion());
-    ASSERT_EQ(request.getNSVersion().epoch(), chunkVersion.epoch());
-}
-
-TEST(SetShardVersionRequestTest, ParseFullNoNS) {
-    const ChunkVersion chunkVersion(1, 2, OID::gen(), Timestamp(1, 1));
-
-    auto ssvStatus = SetShardVersionRequest::parseFromBSON([&] {
-        BSONObjBuilder builder(BSON("setShardVersion"
-                                    << ""
-                                    << "authoritative" << true));
-        chunkVersion.serializeToBSON("version", &builder);
-        return builder.obj();
-    }());
-
-    ASSERT_EQ(ErrorCodes::InvalidNamespace, ssvStatus.getStatus().code());
-}
-
-TEST(SetShardVersionRequestTest, ParseFullNSContainsDBOnly) {
-    const ChunkVersion chunkVersion(1, 2, OID::gen(), Timestamp(1, 1));
-
-    auto ssvStatus = SetShardVersionRequest::parseFromBSON([&] {
-        BSONObjBuilder builder(BSON("setShardVersion"
-                                    << "DBOnly"
-                                    << "authoritative" << true));
-        chunkVersion.serializeToBSON("version", &builder);
-        return builder.obj();
-    }());
-
-    ASSERT_EQ(ErrorCodes::InvalidNamespace, ssvStatus.getStatus().code());
-}
-
-}  // namespace
-}  // namespace mongo
diff --git a/src/mongo/s/request_types/sharded_ddl_commands.idl b/src/mongo/s/request_types/sharded_ddl_commands.idl
index fef00a65b57..b3cc1ed7246 100644
--- a/src/mongo/s/request_types/sharded_ddl_commands.idl
+++ b/src/mongo/s/request_types/sharded_ddl_commands.idl
@@ -117,9 +117,6 @@ structs:
     CreateCollectionRequest:
         description: "All the parameters sent by the router."
         generate_comparison_operators: false
-        # TODO SERVER-64720 remove the following comment
-        # WARN if you add any new field to this request you must also include it in
-        # CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields
         strict: false
         fields:
             shardKey:
diff --git a/src/mongo/s/routing_table_history_test.cpp b/src/mongo/s/routing_table_history_test.cpp
index 9651911ee64..7c8973a7237 100644
--- a/src/mongo/s/routing_table_history_test.cpp
+++ b/src/mongo/s/routing_table_history_test.cpp
@@ -154,7 +154,7 @@ public:
         const UUID uuid = UUID::gen();
         const OID epoch = OID::gen();
         const Timestamp timestamp(1);
-        ChunkVersion version{1, 0, epoch, timestamp};
+        ChunkVersion version({epoch, timestamp}, {1, 0});
 
         auto initChunk =
             ChunkType{uuid,
@@ -332,7 +332,7 @@ TEST_F(RoutingTableHistoryTest, TestSplits) {
     const UUID uuid = UUID::gen();
     const OID epoch = OID::gen();
     const Timestamp timestamp(1);
-    ChunkVersion version{1, 0, epoch, timestamp};
+    ChunkVersion version({epoch, timestamp}, {1, 0});
 
     auto chunkAll =
         ChunkType{uuid,
@@ -356,35 +356,35 @@ TEST_F(RoutingTableHistoryTest, TestSplits) {
     std::vector<ChunkType> chunks1 = {
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 0)},
-                  ChunkVersion{2, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 1}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 0), getShardKeyPattern().globalMax()},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard}};
 
     auto rt1 =
         rt.makeUpdated(boost::none /* timeseriesFields */, boost::none, boost::none, true, chunks1);
-    auto v1 = ChunkVersion{2, 2, epoch, timestamp};
+    auto v1 = ChunkVersion({epoch, timestamp}, {2, 2});
     ASSERT_EQ(v1, rt1.getVersion(kThisShard));
 
     std::vector<ChunkType> chunks2 = {
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 0), getShardKeyPattern().globalMax()},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << -1)},
-                  ChunkVersion{3, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {3, 1}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << -1), BSON("a" << 0)},
-                  ChunkVersion{3, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {3, 2}),
                   kThisShard}};
 
     auto rt2 = rt1.makeUpdated(
         boost::none /* timeseriesFields */, boost::none, boost::none, true, chunks2);
-    auto v2 = ChunkVersion{3, 2, epoch, timestamp};
+    auto v2 = ChunkVersion({epoch, timestamp}, {3, 2});
     ASSERT_EQ(v2, rt2.getVersion(kThisShard));
 }
 
@@ -396,7 +396,7 @@ TEST_F(RoutingTableHistoryTest, TestReplaceEmptyChunk) {
     std::vector<ChunkType> initialChunks = {
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), getShardKeyPattern().globalMax()},
-                  ChunkVersion{1, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {1, 0}),
                   kThisShard}};
 
     auto rt = RoutingTableHistory::makeNew(kNss,
@@ -416,16 +416,16 @@ TEST_F(RoutingTableHistoryTest, TestReplaceEmptyChunk) {
     std::vector<ChunkType> changedChunks = {
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 0)},
-                  ChunkVersion{2, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 1}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 0), getShardKeyPattern().globalMax()},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard}};
 
     auto rt1 = rt.makeUpdated(
         boost::none /* timeseriesFields */, boost::none, boost::none, true, changedChunks);
-    auto v1 = ChunkVersion{2, 2, epoch, timestamp};
+    auto v1 = ChunkVersion({epoch, timestamp}, {2, 2});
     ASSERT_EQ(v1, rt1.getVersion(kThisShard));
     ASSERT_EQ(rt1.numChunks(), 2);
 
@@ -451,7 +451,7 @@ TEST_F(RoutingTableHistoryTest, TestUseLatestVersions) {
     std::vector<ChunkType> initialChunks = {
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), getShardKeyPattern().globalMax()},
-                  ChunkVersion{1, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {1, 0}),
                   kThisShard}};
 
     auto rt = RoutingTableHistory::makeNew(kNss,
@@ -471,20 +471,20 @@ TEST_F(RoutingTableHistoryTest, TestUseLatestVersions) {
     std::vector<ChunkType> changedChunks = {
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), getShardKeyPattern().globalMax()},
-                  ChunkVersion{1, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {1, 0}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 0)},
-                  ChunkVersion{2, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 1}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 0), getShardKeyPattern().globalMax()},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard}};
 
     auto rt1 = rt.makeUpdated(
         boost::none /* timeseriesFields */, boost::none, boost::none, true, changedChunks);
-    auto v1 = ChunkVersion{2, 2, epoch, timestamp};
+    auto v1 = ChunkVersion({epoch, timestamp}, {2, 2});
     ASSERT_EQ(v1, rt1.getVersion(kThisShard));
     ASSERT_EQ(rt1.numChunks(), 2);
 }
@@ -497,11 +497,11 @@ TEST_F(RoutingTableHistoryTest, TestOutOfOrderVersion) {
     std::vector<ChunkType> initialChunks = {
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 0)},
-                  ChunkVersion{2, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 1}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 0), getShardKeyPattern().globalMax()},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard}};
 
     auto rt = RoutingTableHistory::makeNew(kNss,
@@ -521,21 +521,21 @@ TEST_F(RoutingTableHistoryTest, TestOutOfOrderVersion) {
     std::vector<ChunkType> changedChunks = {
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 0), getShardKeyPattern().globalMax()},
-                  ChunkVersion{3, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {3, 0}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 0)},
-                  ChunkVersion{3, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {3, 1}),
                   kThisShard}};
 
     auto rt1 = rt.makeUpdated(
         boost::none /* timeseriesFields */, boost::none, boost::none, true, changedChunks);
-    auto v1 = ChunkVersion{3, 1, epoch, timestamp};
+    auto v1 = ChunkVersion({epoch, timestamp}, {3, 1});
     ASSERT_EQ(v1, rt1.getVersion(kThisShard));
     ASSERT_EQ(rt1.numChunks(), 2);
 
     auto chunk1 = rt1.findIntersectingChunk(BSON("a" << 0));
-    ASSERT_EQ(chunk1->getLastmod(), ChunkVersion(3, 0, epoch, timestamp));
+    ASSERT_EQ(chunk1->getLastmod(), ChunkVersion({epoch, timestamp}, {3, 0}));
     ASSERT_EQ(chunk1->getMin().woCompare(BSON("a" << 0)), 0);
     ASSERT_EQ(chunk1->getMax().woCompare(getShardKeyPattern().globalMax()), 0);
 }
@@ -548,15 +548,15 @@ TEST_F(RoutingTableHistoryTest, TestMergeChunks) {
     std::vector<ChunkType> initialChunks = {
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 0), BSON("a" << 10)},
-                  ChunkVersion{2, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 0}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 0)},
-                  ChunkVersion{2, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 1}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 10), getShardKeyPattern().globalMax()},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard}};
 
     auto rt = RoutingTableHistory::makeNew(kNss,
@@ -572,21 +572,21 @@ TEST_F(RoutingTableHistoryTest, TestMergeChunks) {
                                            true,
                                            initialChunks);
     ASSERT_EQ(rt.numChunks(), 3);
-    ASSERT_EQ(rt.getVersion(), ChunkVersion(2, 2, epoch, timestamp));
+    ASSERT_EQ(rt.getVersion(), ChunkVersion({epoch, timestamp}, {2, 2}));
 
     std::vector<ChunkType> changedChunks = {
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 10), getShardKeyPattern().globalMax()},
-                  ChunkVersion{3, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {3, 0}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 10)},
-                  ChunkVersion{3, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {3, 1}),
                   kThisShard}};
 
     auto rt1 = rt.makeUpdated(
         boost::none /* timeseriesFields */, boost::none, boost::none, true, changedChunks);
-    auto v1 = ChunkVersion{3, 1, epoch, timestamp};
+    auto v1 = ChunkVersion({epoch, timestamp}, {3, 1});
     ASSERT_EQ(v1, rt1.getVersion(kThisShard));
     ASSERT_EQ(rt1.numChunks(), 2);
 }
@@ -599,15 +599,15 @@ TEST_F(RoutingTableHistoryTest, TestMergeChunksOrdering) {
     std::vector<ChunkType> initialChunks = {
         ChunkType{uuid,
                   ChunkRange{BSON("a" << -10), getShardKeyPattern().globalMax()},
-                  ChunkVersion{2, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 0}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << -500)},
-                  ChunkVersion{2, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 1}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << -500), BSON("a" << -10)},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard}};
 
     auto rt = RoutingTableHistory::makeNew(kNss,
@@ -623,26 +623,26 @@ TEST_F(RoutingTableHistoryTest, TestMergeChunksOrdering) {
                                            true,
                                            initialChunks);
     ASSERT_EQ(rt.numChunks(), 3);
-    ASSERT_EQ(rt.getVersion(), ChunkVersion(2, 2, epoch, timestamp));
+    ASSERT_EQ(rt.getVersion(), ChunkVersion({epoch, timestamp}, {2, 2}));
 
     std::vector<ChunkType> changedChunks = {
         ChunkType{uuid,
                   ChunkRange{BSON("a" << -500), BSON("a" << -10)},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << -10)},
-                  ChunkVersion{3, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {3, 1}),
                   kThisShard}};
 
     auto rt1 = rt.makeUpdated(
         boost::none /* timeseriesFields */, boost::none, boost::none, true, changedChunks);
-    auto v1 = ChunkVersion{3, 1, epoch, timestamp};
+    auto v1 = ChunkVersion({epoch, timestamp}, {3, 1});
     ASSERT_EQ(v1, rt1.getVersion(kThisShard));
     ASSERT_EQ(rt1.numChunks(), 2);
 
     auto chunk1 = rt1.findIntersectingChunk(BSON("a" << -500));
-    ASSERT_EQ(chunk1->getLastmod(), ChunkVersion(3, 1, epoch, timestamp));
+    ASSERT_EQ(chunk1->getLastmod(), ChunkVersion({epoch, timestamp}, {3, 1}));
     ASSERT_EQ(chunk1->getMin().woCompare(getShardKeyPattern().globalMin()), 0);
     ASSERT_EQ(chunk1->getMax().woCompare(BSON("a" << -10)), 0);
 }
@@ -655,27 +655,27 @@ TEST_F(RoutingTableHistoryTest, TestFlatten) {
     std::vector<ChunkType> initialChunks = {
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 10)},
-                  ChunkVersion{2, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 0}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 10), BSON("a" << 20)},
-                  ChunkVersion{2, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 1}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 20), getShardKeyPattern().globalMax()},
-                  ChunkVersion{2, 2, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {2, 2}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), getShardKeyPattern().globalMax()},
-                  ChunkVersion{3, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {3, 0}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{getShardKeyPattern().globalMin(), BSON("a" << 10)},
-                  ChunkVersion{4, 0, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {4, 0}),
                   kThisShard},
         ChunkType{uuid,
                   ChunkRange{BSON("a" << 10), getShardKeyPattern().globalMax()},
-                  ChunkVersion{4, 1, epoch, timestamp},
+                  ChunkVersion({epoch, timestamp}, {4, 1}),
                   kThisShard},
     };
 
@@ -692,10 +692,10 @@ TEST_F(RoutingTableHistoryTest, TestFlatten) {
                                            true,
                                            initialChunks);
     ASSERT_EQ(rt.numChunks(), 2);
-    ASSERT_EQ(rt.getVersion(), ChunkVersion(4, 1, epoch, timestamp));
+    ASSERT_EQ(rt.getVersion(), ChunkVersion({epoch, timestamp}, {4, 1}));
 
     auto chunk1 = rt.findIntersectingChunk(BSON("a" << 0));
-    ASSERT_EQ(chunk1->getLastmod(), ChunkVersion(4, 0, epoch, timestamp));
+    ASSERT_EQ(chunk1->getLastmod(), ChunkVersion({epoch, timestamp}, {4, 0}));
     ASSERT_EQ(chunk1->getMin().woCompare(getShardKeyPattern().globalMin()), 0);
     ASSERT_EQ(chunk1->getMax().woCompare(BSON("a" << 10)), 0);
 }
diff --git a/src/mongo/s/s_sharding_server_status.cpp b/src/mongo/s/s_sharding_server_status.cpp
index a1515a609f4..791d40aa0fb 100644
--- a/src/mongo/s/s_sharding_server_status.cpp
+++ b/src/mongo/s/s_sharding_server_status.cpp
@@ -60,14 +60,20 @@ public:
         result.append("configsvrConnectionString",
                       shardRegistry->getConfigServerConnectionString().toString());
 
+        const auto vcTime = VectorClock::get(opCtx)->getTime();
+
         const auto configOpTime = [&]() {
-            const auto vcTime = VectorClock::get(opCtx)->getTime();
             const auto vcConfigTimeTs = vcTime.configTime().asTimestamp();
             return mongo::repl::OpTime(vcConfigTimeTs, mongo::repl::OpTime::kUninitializedTerm);
         }();
-
         configOpTime.append(&result, "lastSeenConfigServerOpTime");
 
+        const auto topologyOpTime = [&]() {
+            const auto vcTopologyTimeTs = vcTime.topologyTime().asTimestamp();
+            return mongo::repl::OpTime(vcTopologyTimeTs, mongo::repl::OpTime::kUninitializedTerm);
+        }();
+        topologyOpTime.append(&result, "lastSeenTopologyOpTime");
+
         const long long maxChunkSizeInBytes =
             grid->getBalancerConfiguration()->getMaxChunkSizeBytes();
         result.append("maxChunkSizeInBytes", maxChunkSizeInBytes);
diff --git a/src/mongo/s/service_entry_point_mongos.cpp b/src/mongo/s/service_entry_point_mongos.cpp
index 401a1363202..36a2d0aa13c 100644
--- a/src/mongo/s/service_entry_point_mongos.cpp
+++ b/src/mongo/s/service_entry_point_mongos.cpp
@@ -221,7 +221,7 @@ void ServiceEntryPointMongos::onClientConnect(Client* client) {
     }
 }
 
-void ServiceEntryPointMongos::onClientDisconnect(Client* client) {
+void ServiceEntryPointMongos::derivedOnClientDisconnect(Client* client) {
     if (load_balancer_support::isFromLoadBalancer(client)) {
         _loadBalancedConnections.decrement();
 
diff --git a/src/mongo/s/service_entry_point_mongos.h b/src/mongo/s/service_entry_point_mongos.h
index c5c6530d2a9..42645ac236f 100644
--- a/src/mongo/s/service_entry_point_mongos.h
+++ b/src/mongo/s/service_entry_point_mongos.h
@@ -54,7 +54,7 @@ public:
     void appendStats(BSONObjBuilder* bob) const override;
 
     void onClientConnect(Client* client) override;
-    void onClientDisconnect(Client* client) override;
+    void derivedOnClientDisconnect(Client* client) override;
 
 private:
     Counter64 _loadBalancedConnections;
diff --git a/src/mongo/s/sessions_collection_sharded_test.cpp b/src/mongo/s/sessions_collection_sharded_test.cpp
index d82a4edfbfa..011821aa131 100644
--- a/src/mongo/s/sessions_collection_sharded_test.cpp
+++ b/src/mongo/s/sessions_collection_sharded_test.cpp
@@ -98,6 +98,29 @@ TEST_F(SessionsCollectionShardedTest, RefreshOneSessionOKTest) {
     future.default_timed_get();
 }
 
+TEST_F(SessionsCollectionShardedTest, CheckReadConcern) {
+    // Set up routing table for the logical sessions collection.
+    loadRoutingTableWithTwoChunksAndTwoShardsImpl(NamespaceString::kLogicalSessionsNamespace,
+                                                  BSON("_id" << 1));
+    auto future = launchAsync([&] {
+        auto notInsertedRecord = makeRecord();
+        LogicalSessionIdSet lsids{notInsertedRecord.getId()};
+
+        _collection.findRemovedSessions(operationContext(), lsids);
+    });
+
+    onCommandForPoolExecutor([&](const RemoteCommandRequest& request) {
+        BSONObj obj = request.cmdObj;
+        auto readConcern = obj.getObjectField("readConcern");
+        ASSERT_FALSE(readConcern.isEmpty());
+        auto level = readConcern.getStringField("level");
+        ASSERT_EQ(level, "local");
+
+        return CursorResponse().toBSONAsInitialResponse();
+    });
+
+    future.default_timed_get();
+}
 
 TEST_F(SessionsCollectionShardedTest, RefreshOneSessionStatusErrTest) {
     // Set up routing table for the logical sessions collection.
diff --git a/src/mongo/s/sharding_feature_flags.idl b/src/mongo/s/sharding_feature_flags.idl
index 9e210d760d3..da238cba672 100644
--- a/src/mongo/s/sharding_feature_flags.idl
+++ b/src/mongo/s/sharding_feature_flags.idl
@@ -47,3 +47,7 @@ feature_flags:
     cpp_varname: feature_flags::gOrphanTracking
     default: true
     version: 6.0
+  featureFlagGlobalIndexesShardingCatalog:
+    description: "Feature flag for enabling sharding catalog features for global indexes"
+    cpp_varname: feature_flags::gGlobalIndexesShardingCatalog
+    default: false
diff --git a/src/mongo/s/stale_shard_version_helpers_test.cpp b/src/mongo/s/stale_shard_version_helpers_test.cpp
index 89a7c0d9d11..0acedd12eae 100644
--- a/src/mongo/s/stale_shard_version_helpers_test.cpp
+++ b/src/mongo/s/stale_shard_version_helpers_test.cpp
@@ -27,9 +27,6 @@
  *    it in the license file.
  */
 
-
-#include "mongo/platform/basic.h"
-
 #include "mongo/logv2/log.h"
 #include "mongo/s/sharding_router_test_fixture.h"
 #include "mongo/s/stale_shard_version_helpers.h"
@@ -38,7 +35,6 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
 
-
 namespace mongo {
 namespace {
 
@@ -98,8 +94,8 @@ TEST_F(AsyncShardVersionRetry, LimitedStaleErrorsShouldReturnCorrectValue) {
         service(), nss(), catalogCache, desc(), getExecutor(), token, [&](OperationContext*) {
             if (++tries < 5) {
                 uassert(StaleConfigInfo(nss(),
-                                        ChunkVersion(5, 23, OID::gen(), {}),
-                                        ChunkVersion(6, 99, OID::gen(), {}),
+                                        ChunkVersion({OID::gen(), Timestamp(1, 0)}, {5, 23}),
+                                        ChunkVersion({OID::gen(), Timestamp(1, 0)}, {6, 99}),
                                         ShardId("sB")),
                         "testX",
                         false);
diff --git a/src/mongo/s/transaction_router_test.cpp b/src/mongo/s/transaction_router_test.cpp
index ef933e911c1..ff4bd664283 100644
--- a/src/mongo/s/transaction_router_test.cpp
+++ b/src/mongo/s/transaction_router_test.cpp
@@ -67,6 +67,8 @@ const BSONObj kOkReadOnlyFalseResponse = BSON("ok" << 1 << "readOnly" << false);
 const BSONObj kOkReadOnlyTrueResponse = BSON("ok" << 1 << "readOnly" << true);
 const BSONObj kNoSuchTransactionResponse =
     BSON("ok" << 0 << "code" << ErrorCodes::NoSuchTransaction);
+const BSONObj kDummyFindCmd = BSON("find"
+                                   << "dummy");
 
 class TransactionRouterTest : public ShardingTestFixture {
 protected:
@@ -468,7 +470,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     txnRouter.beginOrContinueTxn(
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
 
     auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(),
@@ -491,7 +493,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, FirstParticipantIsCoordinator) {
     ASSERT_FALSE(txnRouter.getCoordinatorId());
 
     {
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         auto& participant = *txnRouter.getParticipant(shard1);
         ASSERT(participant.isCoordinator);
         ASSERT(txnRouter.getCoordinatorId());
@@ -499,7 +501,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, FirstParticipantIsCoordinator) {
     }
 
     {
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         auto& participant = *txnRouter.getParticipant(shard2);
         ASSERT(!participant.isCoordinator);
         ASSERT(txnRouter.getCoordinatorId());
@@ -515,7 +517,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, FirstParticipantIsCoordinator) {
     ASSERT_FALSE(txnRouter.getCoordinatorId());
 
     {
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         auto& participant = *txnRouter.getParticipant(shard2);
         ASSERT(participant.isCoordinator);
         ASSERT(txnRouter.getCoordinatorId());
@@ -536,7 +538,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, RecoveryShardDoesNotGetSetForRea
     ASSERT_FALSE(txnRouter.getRecoveryShardId());
 
     // The recovery shard is not set on scheduling requests.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     ASSERT_FALSE(txnRouter.getRecoveryShardId());
 
     // The recovery shard is not set if a participant responds with ok but that it is read-only.
@@ -544,7 +546,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, RecoveryShardDoesNotGetSetForRea
     ASSERT_FALSE(txnRouter.getRecoveryShardId());
 
     // The recovery shard is not set even if more read-only participants respond.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyTrueResponse);
     ASSERT_FALSE(txnRouter.getRecoveryShardId());
 
@@ -575,7 +577,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     ASSERT(txnRouter.getRecoveryShardId());
     ASSERT_EQ(*txnRouter.getRecoveryShardId(), shard1);
@@ -591,7 +593,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     // Response to first statement says read-only.
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
@@ -614,12 +616,12 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     txnRouter.setDefaultAtClusterTime(operationContext());
 
     // Shard1's response says read-only.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
     ASSERT_FALSE(txnRouter.getRecoveryShardId());
 
     // Shard2's response says not read-only.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
     ASSERT(txnRouter.getRecoveryShardId());
     ASSERT_EQ(*txnRouter.getRecoveryShardId(), shard2);
@@ -636,7 +638,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     txnRouter.setDefaultAtClusterTime(operationContext());
 
     // Shard1's response says not read-only.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     ASSERT(txnRouter.getRecoveryShardId());
     ASSERT_EQ(*txnRouter.getRecoveryShardId(), shard1);
@@ -661,7 +663,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     txnRouter.setDefaultAtClusterTime(operationContext());
 
     // Shard1's response says not read-only.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     ASSERT(txnRouter.getRecoveryShardId());
     ASSERT_EQ(*txnRouter.getRecoveryShardId(), shard1);
@@ -673,7 +675,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
 
     // Shard2 responds, it doesn't matter whether it's read-only, just that it's a pending
     // participant.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
     ASSERT(txnRouter.getRecoveryShardId());
     ASSERT_EQ(*txnRouter.getRecoveryShardId(), shard1);
@@ -698,7 +700,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, RecoveryShardIsResetOnStartingNe
     txnRouter.setDefaultAtClusterTime(operationContext());
 
     // Shard1's response says not read-only.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     ASSERT(txnRouter.getRecoveryShardId());
     ASSERT_EQ(*txnRouter.getRecoveryShardId(), shard1);
@@ -1078,7 +1080,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
 
     TxnRecoveryToken recoveryToken;
@@ -1115,7 +1117,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
 
     TxnRecoveryToken recoveryToken;
@@ -1152,8 +1154,8 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyTrueResponse);
 
@@ -1203,8 +1205,8 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
 
@@ -1252,8 +1254,8 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
 
@@ -1409,8 +1411,8 @@ TEST_F(TransactionRouterTestWithDefaultSession,
             operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
         txnRouter.setDefaultAtClusterTime(operationContext());
 
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
         txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
 
@@ -1469,8 +1471,8 @@ TEST_F(TransactionRouterTestWithDefaultSession,
             operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
         txnRouter.setDefaultAtClusterTime(operationContext());
 
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
         txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
 
@@ -1717,8 +1719,8 @@ TEST_F(TransactionRouterTestWithDefaultSession, SnapshotErrorsClearsAllParticipa
 
     // Successfully start a transaction on two shards, selecting one as the coordinator.
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
 
     ASSERT(txnRouter.getCoordinatorId());
     ASSERT_EQ(*txnRouter.getCoordinatorId(), shard1);
@@ -1736,10 +1738,10 @@ TEST_F(TransactionRouterTestWithDefaultSession, SnapshotErrorsClearsAllParticipa
     ASSERT_FALSE(txnRouter.getCoordinatorId());
 
     {
-        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         ASSERT_TRUE(newCmd["startTransaction"].trueValue());
 
-        newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         ASSERT_FALSE(newCmd["startTransaction"].trueValue());
     }
 
@@ -1749,10 +1751,10 @@ TEST_F(TransactionRouterTestWithDefaultSession, SnapshotErrorsClearsAllParticipa
 
     {
         // Shard1 should also attach startTransaction field again.
-        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         ASSERT_TRUE(newCmd["startTransaction"].trueValue());
 
-        newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         ASSERT_FALSE(newCmd["startTransaction"].trueValue());
     }
 }
@@ -1794,8 +1796,8 @@ TEST_F(TransactionRouterTestWithDefaultSession, ParticipantsRememberStmtIdCreate
     // command.
 
     int initialStmtId = 0;
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
 
     ASSERT_EQ(txnRouter.getParticipant(shard1)->stmtIdCreatedAt, initialStmtId);
     ASSERT_EQ(txnRouter.getParticipant(shard2)->stmtIdCreatedAt, initialStmtId);
@@ -1805,7 +1807,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, ParticipantsRememberStmtIdCreate
         operationContext(), txnNum, TransactionRouter::TransactionActions::kContinue);
 
     ShardId shard3("shard3");
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, kDummyFindCmd);
     ASSERT_EQ(txnRouter.getParticipant(shard3)->stmtIdCreatedAt, initialStmtId + 1);
 
     ASSERT_EQ(txnRouter.getParticipant(shard1)->stmtIdCreatedAt, initialStmtId);
@@ -1822,8 +1824,8 @@ TEST_F(TransactionRouterTestWithDefaultSession, ParticipantsRememberStmtIdCreate
         operationContext(), txnNum2, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
 
     ASSERT_EQ(txnRouter.getParticipant(shard3)->stmtIdCreatedAt, initialStmtId);
     ASSERT_EQ(txnRouter.getParticipant(shard2)->stmtIdCreatedAt, initialStmtId);
@@ -1832,7 +1834,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, ParticipantsRememberStmtIdCreate
     txnRouter.beginOrContinueTxn(
         operationContext(), txnNum2, TransactionRouter::TransactionActions::kContinue);
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     ASSERT_EQ(txnRouter.getParticipant(shard1)->stmtIdCreatedAt, initialStmtId + 1);
 }
 
@@ -1849,8 +1851,8 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     // Start a transaction on two shards, selecting one as the coordinator, but simulate a
     // re-targeting error from at least one of them.
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
 
     ASSERT(txnRouter.getCoordinatorId());
     ASSERT_EQ(*txnRouter.getCoordinatorId(), shard1);
@@ -1867,7 +1869,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
 
     {
         ASSERT_FALSE(txnRouter.getParticipant(shard2));
-        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         ASSERT_TRUE(newCmd["startTransaction"].trueValue());
     }
 
@@ -1878,7 +1880,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     {
         // Shard1 has not started a transaction.
         ASSERT_FALSE(txnRouter.getParticipant(shard1));
-        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         ASSERT_TRUE(newCmd["startTransaction"].trueValue());
     }
 }
@@ -1894,7 +1896,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnlyNewlyCreatedParticipantsClea
 
     // First statement successfully targets one shard, selecing it as the coordinator.
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     ASSERT(txnRouter.getCoordinatorId());
     ASSERT_EQ(*txnRouter.getCoordinatorId(), shard1);
@@ -1906,8 +1908,8 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnlyNewlyCreatedParticipantsClea
     txnRouter.beginOrContinueTxn(
         operationContext(), txnNum, TransactionRouter::TransactionActions::kContinue);
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, kDummyFindCmd);
 
     ASSERT(txnRouter.canContinueOnStaleShardOrDbError("find", kDummyStatus));
     auto future = launchAsync(
@@ -1917,13 +1919,16 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnlyNewlyCreatedParticipantsClea
 
     // Shards 2 and 3 must start a transaction, but shard 1 must not.
     ASSERT_FALSE(
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {})["startTransaction"]
+        txnRouter
+            .attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd)["startTransaction"]
             .trueValue());
     ASSERT_TRUE(
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {})["startTransaction"]
+        txnRouter
+            .attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd)["startTransaction"]
             .trueValue());
     ASSERT_TRUE(
-        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, {})["startTransaction"]
+        txnRouter
+            .attachTxnFieldsIfNeeded(operationContext(), shard3, kDummyFindCmd)["startTransaction"]
             .trueValue());
 }
 
@@ -2044,7 +2049,7 @@ TEST_F(TransactionRouterTest, AbortForSingleParticipant) {
 
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     auto future = launchAsync([&] { return txnRouter.abortTransaction(operationContext()); });
 
@@ -2077,8 +2082,8 @@ TEST_F(TransactionRouterTest, AbortForMultipleParticipantsAllReturnSuccess) {
 
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
 
@@ -2120,9 +2125,9 @@ TEST_F(TransactionRouterTest, AbortForMultipleParticipantsSomeReturnNoSuchTransa
 
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
     txnRouter.processParticipantResponse(operationContext(), shard3, kOkReadOnlyFalseResponse);
@@ -2169,9 +2174,9 @@ TEST_F(TransactionRouterTest, AbortForMultipleParticipantsSomeReturnNetworkError
 
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard3, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     txnRouter.processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
     txnRouter.processParticipantResponse(operationContext(), shard3, kOkReadOnlyFalseResponse);
@@ -2219,7 +2224,8 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnViewResolutionErrorClearsAllNe
     txnRouter.setDefaultAtClusterTime(operationContext());
 
     // One shard is targeted by the first statement.
-    auto firstShardCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    auto firstShardCmd =
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     ASSERT_TRUE(firstShardCmd["startTransaction"].trueValue());
 
     ASSERT(txnRouter.getCoordinatorId());
@@ -2237,7 +2243,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnViewResolutionErrorClearsAllNe
     ASSERT_FALSE(txnRouter.getCoordinatorId());
 
     // The first shard is targeted by the retry and should have to start a transaction again.
-    firstShardCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    firstShardCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     ASSERT_TRUE(firstShardCmd["startTransaction"].trueValue());
 
     // Advance to a later client statement that targets a new shard.
@@ -2246,7 +2252,8 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnViewResolutionErrorClearsAllNe
     txnRouter.beginOrContinueTxn(
         operationContext(), txnNum, TransactionRouter::TransactionActions::kContinue);
 
-    auto secondShardCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    auto secondShardCmd =
+        txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     ASSERT_TRUE(secondShardCmd["startTransaction"].trueValue());
 
     // Simulate a view resolution error.
@@ -2256,9 +2263,9 @@ TEST_F(TransactionRouterTestWithDefaultSession, OnViewResolutionErrorClearsAllNe
     future.default_timed_get();
 
     // Only the new participant shard was reset.
-    firstShardCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    firstShardCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     ASSERT_FALSE(firstShardCmd["startTransaction"].trueValue());
-    secondShardCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    secondShardCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     ASSERT_TRUE(secondShardCmd["startTransaction"].trueValue());
 }
 
@@ -2292,7 +2299,7 @@ TEST_F(TransactionRouterTest, ImplicitAbortForSingleParticipant) {
 
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     auto future = launchAsync(
         [&] { return txnRouter.implicitlyAbortTransaction(operationContext(), kDummyStatus); });
@@ -2325,8 +2332,8 @@ TEST_F(TransactionRouterTest, ImplicitAbortForMultipleParticipants) {
 
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
 
     auto future = launchAsync(
         [&] { return txnRouter.implicitlyAbortTransaction(operationContext(), kDummyStatus); });
@@ -2366,7 +2373,7 @@ TEST_F(TransactionRouterTest, ImplicitAbortIgnoresErrors) {
 
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     auto future = launchAsync(
         [&] { return txnRouter.implicitlyAbortTransaction(operationContext(), kDummyStatus); });
@@ -2400,7 +2407,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, AbortPropagatesWriteConcern) {
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
 
     txnRouter.setDefaultAtClusterTime(opCtx);
-    txnRouter.attachTxnFieldsIfNeeded(opCtx, shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(opCtx, shard1, kDummyFindCmd);
 
     auto future = launchAsync([&] { return txnRouter.abortTransaction(operationContext()); });
 
@@ -2424,7 +2431,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, ContinueOnlyOnStaleVersionOnFirs
     txnRouter.beginOrContinueTxn(
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     disableRouterRetriesFailPoint();
 
@@ -2439,10 +2446,10 @@ TEST_F(TransactionRouterTestWithDefaultSession, ContinueOnlyOnStaleVersionOnFirs
     txnRouter.onStaleShardOrDbError(operationContext(), "find", kStaleConfigStatus);
 
     // Readd the initial participant removed on onStaleShardOrDbError
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     // Add another participant
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
 
     // Check that the transaction cannot continue on stale config with more than one participant
     ASSERT_FALSE(txnRouter.canContinueOnStaleShardOrDbError("update", kStaleConfigStatus));
@@ -2464,7 +2471,7 @@ TEST_F(TransactionRouterTestWithDefaultSession, ContinueOnlyOnStaleVersionOnFirs
         operationContext(), txnNum, TransactionRouter::TransactionActions::kContinue);
 
     // Cannot retry on a stale config error with one participant after the first statement.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     ASSERT_FALSE(txnRouter.canContinueOnStaleShardOrDbError("update", kStaleConfigStatus));
 }
@@ -2647,7 +2654,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     //
 
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     ASSERT(txnRouter.canContinueOnSnapshotError());
     auto future = launchAsync([&] { txnRouter.onSnapshotError(operationContext(), kDummyStatus); });
@@ -2678,7 +2685,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     //
 
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     ASSERT(txnRouter.canContinueOnSnapshotError());
     auto future = launchAsync([&] { txnRouter.onSnapshotError(operationContext(), kDummyStatus); });
@@ -2711,7 +2718,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     //
 
     txnRouter.setDefaultAtClusterTime(operationContext());
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     ASSERT(txnRouter.canContinueOnSnapshotError());
     auto future = launchAsync([&] {
@@ -2741,8 +2748,8 @@ DEATH_TEST_F(TransactionRouterTestWithDefaultSession,
     txnRouter.setDefaultAtClusterTime(operationContext());
 
     // Add some participants to the list.
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
 
     // Simulate response from some participant not in the list.
     txnRouter.processParticipantResponse(operationContext(), shard3, kOkReadOnlyTrueResponse);
@@ -2758,7 +2765,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, BSON("ok" << 0));
     ASSERT(TransactionRouter::Participant::ReadOnly::kUnset ==
            txnRouter.getParticipant(shard1)->readOnly);
@@ -2774,7 +2781,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
 
     const auto participant = txnRouter.getParticipant(shard1);
@@ -2800,7 +2807,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
 
     const auto participant = txnRouter.getParticipant(shard1);
@@ -2826,7 +2833,7 @@ TEST_F(
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     // First response says readOnly: true.
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
@@ -2853,7 +2860,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     // First response says readOnly: false.
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
@@ -2879,7 +2886,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
     txnRouter.setDefaultAtClusterTime(operationContext());
 
-    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
 
     // First response is an error.
     txnRouter.processParticipantResponse(operationContext(), shard1, BSON("ok" << 0));
@@ -2917,7 +2924,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
 
     txnRouter.setDefaultAtClusterTime(opCtx);
-    txnRouter.attachTxnFieldsIfNeeded(opCtx, shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(opCtx, shard1, kDummyFindCmd);
 
     // Continue causes the _latestStmtId to be bumped.
     repl::ReadConcernArgs::get(opCtx) = repl::ReadConcernArgs();
@@ -2944,7 +2951,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
 
     txnRouter.setDefaultAtClusterTime(opCtx);
-    txnRouter.attachTxnFieldsIfNeeded(opCtx, shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(opCtx, shard1, kDummyFindCmd);
 
     // Aborting will set the termination initiation state.
     auto future = launchAsync([&] { txnRouter.implicitlyAbortTransaction(opCtx, kDummyStatus); });
@@ -2967,7 +2974,7 @@ TEST_F(TransactionRouterTestWithDefaultSession,
     txnRouter.beginOrContinueTxn(opCtx, txnNum, TransactionRouter::TransactionActions::kStart);
 
     txnRouter.setDefaultAtClusterTime(opCtx);
-    txnRouter.attachTxnFieldsIfNeeded(opCtx, shard1, {});
+    txnRouter.attachTxnFieldsIfNeeded(opCtx, shard1, kDummyFindCmd);
 
     // Process !readonly response to set participant state.
     txnRouter.processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
@@ -3236,7 +3243,7 @@ protected:
     //
 
     void explicitAbortInProgress() {
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         txnRouter().processParticipantResponse(
             operationContext(), shard1, kOkReadOnlyFalseResponse);
 
@@ -3248,7 +3255,7 @@ protected:
     }
 
     void implicitAbortInProgress() {
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         txnRouter().processParticipantResponse(
             operationContext(), shard1, kOkReadOnlyFalseResponse);
 
@@ -3261,7 +3268,7 @@ protected:
     }
 
     void runCommit(StatusWith<BSONObj> swRes, bool expectRetries = false) {
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         txnRouter().processParticipantResponse(
             operationContext(), shard1, kOkReadOnlyFalseResponse);
 
@@ -3317,7 +3324,7 @@ protected:
     }
 
     void runSingleShardCommit() {
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         txnRouter().processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
 
         startCapturingLogMessages();
@@ -3329,9 +3336,9 @@ protected:
     }
 
     void runReadOnlyCommit() {
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         txnRouter().processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         txnRouter().processParticipantResponse(operationContext(), shard2, kOkReadOnlyTrueResponse);
 
         startCapturingLogMessages();
@@ -3344,9 +3351,9 @@ protected:
     }
 
     void runSingleWriteShardCommit() {
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         txnRouter().processParticipantResponse(operationContext(), shard1, kOkReadOnlyTrueResponse);
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         txnRouter().processParticipantResponse(
             operationContext(), shard2, kOkReadOnlyFalseResponse);
 
@@ -3359,10 +3366,10 @@ protected:
     }
 
     void runTwoPhaseCommit() {
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         txnRouter().processParticipantResponse(
             operationContext(), shard1, kOkReadOnlyFalseResponse);
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
         txnRouter().processParticipantResponse(
             operationContext(), shard2, kOkReadOnlyFalseResponse);
 
@@ -3401,7 +3408,7 @@ protected:
 
     auto beginAndPauseCommit() {
         // Commit after targeting one shard so the commit has to do work and can be paused.
-        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+        txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
         txnRouter().processParticipantResponse(
             operationContext(), shard1, kOkReadOnlyFalseResponse);
         auto future = launchAsync(
@@ -4941,14 +4948,14 @@ TEST_F(TransactionRouterMetricsTest, RouterMetricsTotalContactedParticipants) {
     beginTxnWithDefaultTxnNumber();
     ASSERT_EQUALS(0L, routerTxnMetrics()->getTotalContactedParticipants());
 
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     ASSERT_EQUALS(1L, routerTxnMetrics()->getTotalContactedParticipants());
 
     // Only increases for new participants.
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     ASSERT_EQUALS(1L, routerTxnMetrics()->getTotalContactedParticipants());
 
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     ASSERT_EQUALS(2L, routerTxnMetrics()->getTotalContactedParticipants());
 
     // Is cumulative across transactions.
@@ -4956,7 +4963,7 @@ TEST_F(TransactionRouterMetricsTest, RouterMetricsTotalContactedParticipants) {
         operationContext(), kTxnNumber + 1, TransactionRouter::TransactionActions::kStart);
     ASSERT_EQUALS(2L, routerTxnMetrics()->getTotalContactedParticipants());
 
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     ASSERT_EQUALS(3L, routerTxnMetrics()->getTotalContactedParticipants());
 }
 
@@ -4969,15 +4976,15 @@ TEST_F(TransactionRouterMetricsTest, RouterMetricsTotalRequestsTargeted) {
     ASSERT_EQUALS(0L, routerTxnMetrics()->getTotalRequestsTargeted());
 
     // Increases each time transaction fields are attached.
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter().processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     ASSERT_EQUALS(1L, routerTxnMetrics()->getTotalRequestsTargeted());
 
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter().processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     ASSERT_EQUALS(2L, routerTxnMetrics()->getTotalRequestsTargeted());
 
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter().processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
     ASSERT_EQUALS(3L, routerTxnMetrics()->getTotalRequestsTargeted());
 }
@@ -5017,11 +5024,11 @@ TEST_F(TransactionRouterMetricsTest, RouterMetricsTotalParticipantsAtCommit) {
     beginTxnWithDefaultTxnNumber();
     ASSERT_EQUALS(0L, routerTxnMetrics()->getTotalParticipantsAtCommit());
 
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     txnRouter().processParticipantResponse(operationContext(), shard1, kOkReadOnlyFalseResponse);
     ASSERT_EQUALS(0L, routerTxnMetrics()->getTotalParticipantsAtCommit());
 
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
     txnRouter().processParticipantResponse(operationContext(), shard2, kOkReadOnlyFalseResponse);
     ASSERT_EQUALS(0L, routerTxnMetrics()->getTotalParticipantsAtCommit());
 
@@ -5038,7 +5045,7 @@ TEST_F(TransactionRouterMetricsTest, RouterMetricsTotalParticipantsAtCommit) {
     operationContext()->setTxnNumber(kTxnNumber + 1);
     txnRouter().beginOrContinueTxn(
         operationContext(), kTxnNumber + 1, TransactionRouter::TransactionActions::kStart);
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
     runCommit(kDummyOkRes);
     ASSERT_EQUALS(3L, routerTxnMetrics()->getTotalParticipantsAtCommit());
 }
@@ -5202,8 +5209,8 @@ TEST_F(TransactionRouterMetricsTest, ReportResourcesWithParticipantList) {
     clockSource->reset(startTime);
 
     beginTxnWithDefaultTxnNumber();
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, {});
-    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, {});
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard1, kDummyFindCmd);
+    txnRouter().attachTxnFieldsIfNeeded(operationContext(), shard2, kDummyFindCmd);
 
     auto state = txnRouter().reportState(operationContext(), true /* sessionIsActive */);
     auto transactionDocument = state.getObjectField("transaction");
diff --git a/src/mongo/s/write_ops/batch_write_exec_test.cpp b/src/mongo/s/write_ops/batch_write_exec_test.cpp
index a0ec8867628..aba9c8367c2 100644
--- a/src/mongo/s/write_ops/batch_write_exec_test.cpp
+++ b/src/mongo/s/write_ops/batch_write_exec_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/client/remote_command_targeter_factory_mock.h"
 #include "mongo/client/remote_command_targeter_mock.h"
@@ -94,8 +92,8 @@ BSONObj expectInsertsReturnStaleVersionErrorsBase(const NamespaceString& nss,
         staleResponse.addToErrDetails(
             write_ops::WriteError(i,
                                   Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(1, 0, epoch, timestamp),
-                                                         ChunkVersion(2, 0, epoch, timestamp),
+                                                         ChunkVersion({epoch, timestamp}, {1, 0}),
+                                                         ChunkVersion({epoch, timestamp}, {2, 0}),
                                                          ShardId(kShardName1)),
                                          "Stale error")));
         ++i;
@@ -335,7 +333,7 @@ public:
     MockNSTargeter singleShardNSTargeter{
         nss,
         {MockRange(ShardEndpoint(kShardName1,
-                                 ChunkVersion(100, 200, OID::gen(), Timestamp(1, 1)),
+                                 ChunkVersion({OID::gen(), Timestamp(1, 1)}, {100, 200}),
                                  boost::none),
                    BSON("x" << MINKEY),
                    BSON("x" << MAXKEY))}};
@@ -406,19 +404,19 @@ TEST_F(BatchWriteExecTest, SingleUpdateTargetsShardWithLet) {
 
         std::vector<ShardEndpoint> targetUpdate(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
-            return std::vector{
-                ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+            return std::vector{ShardEndpoint(
+                kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("x" << MINKEY),
              BSON("x" << 0)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("x" << 0),
              BSON("x" << MAXKEY))});
 
@@ -493,18 +491,20 @@ TEST_F(BatchWriteExecTest, SingleDeleteTargetsShardWithLet) {
         std::vector<ShardEndpoint> targetDelete(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
             return std::vector{ShardEndpoint(
-                kShardName2, ChunkVersion(101, 200, epoch, Timestamp(1, 1)), boost::none)};
+                kShardName2, ChunkVersion({epoch, Timestamp(1, 1)}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
-        {MockRange(ShardEndpoint(
-                       kShardName1, ChunkVersion(100, 200, epoch, Timestamp(1, 1)), boost::none),
+        {MockRange(ShardEndpoint(kShardName1,
+                                 ChunkVersion({epoch, Timestamp(1, 1)}, {100, 200}),
+                                 boost::none),
                    BSON("x" << MINKEY),
                    BSON("x" << 0)),
-         MockRange(ShardEndpoint(
-                       kShardName2, ChunkVersion(101, 200, epoch, Timestamp(1, 1)), boost::none),
+         MockRange(ShardEndpoint(kShardName2,
+                                 ChunkVersion({epoch, Timestamp(1, 1)}, {101, 200}),
+                                 boost::none),
                    BSON("x" << 0),
                    BSON("x" << MAXKEY))});
 
@@ -685,19 +685,21 @@ TEST_F(BatchWriteExecTest, StaleShardVersionReturnedFromBatchWithSingleMultiWrit
         std::vector<ShardEndpoint> targetUpdate(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
             return std::vector{
-                ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
-                ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                ShardEndpoint(
+                    kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
+                ShardEndpoint(
+                    kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("x" << MINKEY),
              BSON("x" << 0)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("x" << 0),
              BSON("x" << MAXKEY))});
 
@@ -726,13 +728,13 @@ TEST_F(BatchWriteExecTest, StaleShardVersionReturnedFromBatchWithSingleMultiWrit
         BatchedCommandResponse response;
         response.setStatus(Status::OK());
         response.setNModified(0);
-        response.addToErrDetails(
-            write_ops::WriteError(0,
-                                  Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(101, 200, epoch, timestamp),
-                                                         ChunkVersion(105, 200, epoch, timestamp),
-                                                         ShardId(kShardName2)),
-                                         "Stale error")));
+        response.addToErrDetails(write_ops::WriteError(
+            0,
+            Status(StaleConfigInfo(nss,
+                                   ChunkVersion({epoch, timestamp}, {101, 200}),
+                                   ChunkVersion({epoch, timestamp}, {105, 200}),
+                                   ShardId(kShardName2)),
+                   "Stale error")));
         return response.toBSON();
     });
 
@@ -783,19 +785,21 @@ TEST_F(BatchWriteExecTest,
         std::vector<ShardEndpoint> targetUpdate(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
             return std::vector{
-                ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
-                ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                ShardEndpoint(
+                    kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
+                ShardEndpoint(
+                    kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("sk" << MINKEY),
              BSON("sk" << 10)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("sk" << 10),
              BSON("sk" << MAXKEY))});
 
@@ -824,20 +828,20 @@ TEST_F(BatchWriteExecTest,
         BatchedCommandResponse response;
         response.setStatus(Status::OK());
         response.setNModified(0);
-        response.addToErrDetails(
-            write_ops::WriteError(0,
-                                  Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(101, 200, epoch, timestamp),
-                                                         ChunkVersion(105, 200, epoch, timestamp),
-                                                         ShardId(kShardName2)),
-                                         "Stale error")));
-        response.addToErrDetails(
-            write_ops::WriteError(1,
-                                  Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(101, 200, epoch, timestamp),
-                                                         ChunkVersion(105, 200, epoch, timestamp),
-                                                         ShardId(kShardName2)),
-                                         "Stale error")));
+        response.addToErrDetails(write_ops::WriteError(
+            0,
+            Status(StaleConfigInfo(nss,
+                                   ChunkVersion({epoch, timestamp}, {101, 200}),
+                                   ChunkVersion({epoch, timestamp}, {105, 200}),
+                                   ShardId(kShardName2)),
+                   "Stale error")));
+        response.addToErrDetails(write_ops::WriteError(
+            1,
+            Status(StaleConfigInfo(nss,
+                                   ChunkVersion({epoch, timestamp}, {101, 200}),
+                                   ChunkVersion({epoch, timestamp}, {105, 200}),
+                                   ShardId(kShardName2)),
+                   "Stale error")));
         return response.toBSON();
     });
 
@@ -887,19 +891,21 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1Firs) {
         std::vector<ShardEndpoint> targetUpdate(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
             return std::vector{
-                ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
-                ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                ShardEndpoint(
+                    kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
+                ShardEndpoint(
+                    kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("sk" << MINKEY),
              BSON("sk" << 10)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("sk" << 10),
              BSON("sk" << MAXKEY))});
 
@@ -918,13 +924,13 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1Firs) {
         BatchedCommandResponse response;
         response.setStatus(Status::OK());
         response.setNModified(0);
-        response.addToErrDetails(
-            write_ops::WriteError(1,
-                                  Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(101, 200, epoch, timestamp),
-                                                         ChunkVersion(105, 200, epoch, timestamp),
-                                                         ShardId(kShardName2)),
-                                         "Stale error")));
+        response.addToErrDetails(write_ops::WriteError(
+            1,
+            Status(StaleConfigInfo(nss,
+                                   ChunkVersion({epoch, timestamp}, {101, 200}),
+                                   ChunkVersion({epoch, timestamp}, {105, 200}),
+                                   ShardId(kShardName2)),
+                   "Stale error")));
         return response.toBSON();
     });
 
@@ -934,13 +940,13 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1Firs) {
         BatchedCommandResponse response;
         response.setStatus(Status::OK());
         response.setNModified(0);
-        response.addToErrDetails(
-            write_ops::WriteError(0,
-                                  Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(101, 200, epoch, timestamp),
-                                                         ChunkVersion(105, 200, epoch, timestamp),
-                                                         ShardId(kShardName2)),
-                                         "Stale error")));
+        response.addToErrDetails(write_ops::WriteError(
+            0,
+            Status(StaleConfigInfo(nss,
+                                   ChunkVersion({epoch, timestamp}, {101, 200}),
+                                   ChunkVersion({epoch, timestamp}, {105, 200}),
+                                   ShardId(kShardName2)),
+                   "Stale error")));
         return response.toBSON();
     });
 
@@ -1001,19 +1007,21 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1FirstOK
         std::vector<ShardEndpoint> targetUpdate(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
             return std::vector{
-                ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
-                ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                ShardEndpoint(
+                    kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
+                ShardEndpoint(
+                    kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("sk" << MINKEY),
              BSON("sk" << 10)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("sk" << 10),
              BSON("sk" << MAXKEY))});
 
@@ -1032,13 +1040,13 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1FirstOK
         BatchedCommandResponse response;
         response.setStatus(Status::OK());
         response.setNModified(0);
-        response.addToErrDetails(
-            write_ops::WriteError(1,
-                                  Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(101, 200, epoch, timestamp),
-                                                         ChunkVersion(105, 200, epoch, timestamp),
-                                                         ShardId(kShardName2)),
-                                         "Stale error")));
+        response.addToErrDetails(write_ops::WriteError(
+            1,
+            Status(StaleConfigInfo(nss,
+                                   ChunkVersion({epoch, timestamp}, {101, 200}),
+                                   ChunkVersion({epoch, timestamp}, {105, 200}),
+                                   ShardId(kShardName2)),
+                   "Stale error")));
         return response.toBSON();
     });
 
@@ -1048,13 +1056,13 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1FirstOK
         BatchedCommandResponse response;
         response.setStatus(Status::OK());
         response.setNModified(0);
-        response.addToErrDetails(
-            write_ops::WriteError(1,
-                                  Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(101, 200, epoch, timestamp),
-                                                         ChunkVersion(105, 200, epoch, timestamp),
-                                                         ShardId(kShardName2)),
-                                         "Stale error")));
+        response.addToErrDetails(write_ops::WriteError(
+            1,
+            Status(StaleConfigInfo(nss,
+                                   ChunkVersion({epoch, timestamp}, {101, 200}),
+                                   ChunkVersion({epoch, timestamp}, {105, 200}),
+                                   ShardId(kShardName2)),
+                   "Stale error")));
         return response.toBSON();
     });
 
@@ -1112,12 +1120,12 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromWriteWithShard1SSVShard2OK)
             if (targetAll) {
                 return std::vector{
                     ShardEndpoint(
-                        kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+                        kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
                     ShardEndpoint(
-                        kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                        kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
             } else {
                 return std::vector{ShardEndpoint(
-                    kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                    kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
             }
         }
 
@@ -1127,11 +1135,11 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromWriteWithShard1SSVShard2OK)
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("sk" << MINKEY),
              BSON("sk" << 10)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("sk" << 10),
              BSON("sk" << MAXKEY))});
 
@@ -1151,13 +1159,13 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromWriteWithShard1SSVShard2OK)
         response.setStatus(Status::OK());
         response.setNModified(0);
         response.setN(0);
-        response.addToErrDetails(
-            write_ops::WriteError(0,
-                                  Status(StaleConfigInfo(nss,
-                                                         ChunkVersion(101, 200, epoch, timestamp),
-                                                         ChunkVersion(105, 200, epoch, timestamp),
-                                                         ShardId(kShardName2)),
-                                         "Stale error")));
+        response.addToErrDetails(write_ops::WriteError(
+            0,
+            Status(StaleConfigInfo(nss,
+                                   ChunkVersion({epoch, timestamp}, {101, 200}),
+                                   ChunkVersion({epoch, timestamp}, {105, 200}),
+                                   ShardId(kShardName2)),
+                   "Stale error")));
 
         // This simulates a migration of the last chunk on shard 1 to shard 2, which means that
         // future rounds on the batchExecutor should only target shard 2
@@ -1874,19 +1882,21 @@ TEST_F(BatchWriteExecTargeterErrorTest, TargetedFailedAndErrorResponse) {
         std::vector<ShardEndpoint> targetUpdate(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
             return std::vector{
-                ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
-                ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                ShardEndpoint(
+                    kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
+                ShardEndpoint(
+                    kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("x" << MINKEY),
              BSON("x" << 0)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("x" << 0),
              BSON("x" << MAXKEY))});
 
@@ -2010,19 +2020,21 @@ TEST_F(BatchWriteExecTransactionTargeterErrorTest, TargetedFailedAndErrorRespons
         std::vector<ShardEndpoint> targetUpdate(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
             return std::vector{
-                ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
-                ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                ShardEndpoint(
+                    kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
+                ShardEndpoint(
+                    kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("x" << MINKEY),
              BSON("x" << 0)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("x" << 0),
              BSON("x" << MAXKEY))});
 
@@ -2154,19 +2166,21 @@ TEST_F(BatchWriteExecTransactionMultiShardTest, TargetedSucceededAndErrorRespons
         std::vector<ShardEndpoint> targetUpdate(OperationContext* opCtx,
                                                 const BatchItemRef& itemRef) const override {
             return std::vector{
-                ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
-                ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none)};
+                ShardEndpoint(
+                    kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
+                ShardEndpoint(
+                    kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none)};
         }
     };
 
     MultiShardTargeter multiShardNSTargeter(
         nss,
         {MockRange(
-             ShardEndpoint(kShardName1, ChunkVersion(100, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName1, ChunkVersion({epoch, timestamp}, {100, 200}), boost::none),
              BSON("x" << MINKEY),
              BSON("x" << 0)),
          MockRange(
-             ShardEndpoint(kShardName2, ChunkVersion(101, 200, epoch, timestamp), boost::none),
+             ShardEndpoint(kShardName2, ChunkVersion({epoch, timestamp}, {101, 200}), boost::none),
              BSON("x" << 0),
              BSON("x" << MAXKEY))});
 
diff --git a/src/mongo/s/write_ops/batch_write_op.cpp b/src/mongo/s/write_ops/batch_write_op.cpp
index b034bcd0ee4..a61ee3dd4bf 100644
--- a/src/mongo/s/write_ops/batch_write_op.cpp
+++ b/src/mongo/s/write_ops/batch_write_op.cpp
@@ -289,6 +289,13 @@ void populateCollectionUUIDMismatch(OperationContext* opCtx,
     }
 }
 
+int getEncryptionInformationSize(const BatchedCommandRequest& req) {
+    if (!req.getWriteCommandRequestBase().getEncryptionInformation()) {
+        return 0;
+    }
+    return req.getWriteCommandRequestBase().getEncryptionInformation().get().toBSON().objsize();
+}
+
 }  // namespace
 
 BatchWriteOp::BatchWriteOp(OperationContext* opCtx, const BatchedCommandRequest& clientRequest)
@@ -421,6 +428,7 @@ Status BatchWriteOp::targetBatch(
         //
         // The constant 4 is chosen as the size of the BSON representation of the stmtId.
         const int writeSizeBytes = getWriteSizeBytes(writeOp) +
+            getEncryptionInformationSize(_clientRequest) +
             write_ops::kWriteCommandBSONArrayPerElementOverheadBytes +
             (_batchTxnNum ? write_ops::kWriteCommandBSONArrayPerElementOverheadBytes + 4 : 0);
 
@@ -583,6 +591,9 @@ BatchedCommandRequest BatchWriteOp::buildBatchRequest(const TargetedWriteBatch&
         wcb.setOrdered(_clientRequest.getWriteCommandRequestBase().getOrdered());
         wcb.setCollectionUUID(_clientRequest.getWriteCommandRequestBase().getCollectionUUID());
 
+        wcb.setEncryptionInformation(
+            _clientRequest.getWriteCommandRequestBase().getEncryptionInformation());
+
         if (targeter.isShardedTimeSeriesBucketsNamespace() &&
             !_clientRequest.getNS().isTimeseriesBucketsCollection()) {
             wcb.setIsTimeseriesNamespace(true);
diff --git a/src/mongo/s/write_ops/batch_write_op_test.cpp b/src/mongo/s/write_ops/batch_write_op_test.cpp
index bfda09f0814..eb07226621f 100644
--- a/src/mongo/s/write_ops/batch_write_op_test.cpp
+++ b/src/mongo/s/write_ops/batch_write_op_test.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/s/concurrency/locker_mongos_client_observer.h"
 #include "mongo/s/mock_ns_targeter.h"
 #include "mongo/s/session_catalog_router.h"
@@ -289,8 +287,8 @@ TEST_F(BatchWriteOpTest, SingleStaleError) {
     response.addToErrDetails(
         write_ops::WriteError(0,
                               Status{StaleConfigInfo(nss,
-                                                     ChunkVersion(101, 200, epoch, timestamp),
-                                                     ChunkVersion(105, 200, epoch, timestamp),
+                                                     ChunkVersion({epoch, timestamp}, {101, 200}),
+                                                     ChunkVersion({epoch, timestamp}, {105, 200}),
                                                      ShardId("shard")),
                                      "mock stale error"}));
 
diff --git a/src/mongo/s/write_ops/batched_command_request.cpp b/src/mongo/s/write_ops/batched_command_request.cpp
index 107f1a49204..c33280790d2 100644
--- a/src/mongo/s/write_ops/batched_command_request.cpp
+++ b/src/mongo/s/write_ops/batched_command_request.cpp
@@ -46,7 +46,7 @@ BatchedCommandRequest constructBatchedCommandRequest(const OpMsgRequest& request
 
     auto shardVersionField = request.body[ChunkVersion::kShardVersionField];
     if (!shardVersionField.eoo()) {
-        auto shardVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(shardVersionField);
+        auto shardVersion = ChunkVersion::parse(shardVersionField);
         if (shardVersion == ChunkVersion::UNSHARDED()) {
             batchRequest.setDbVersion(DatabaseVersion(request.body));
         }
diff --git a/src/mongo/s/write_ops/batched_command_request_test.cpp b/src/mongo/s/write_ops/batched_command_request_test.cpp
index 9a5e968f10d..0ba795e44a5 100644
--- a/src/mongo/s/write_ops/batched_command_request_test.cpp
+++ b/src/mongo/s/write_ops/batched_command_request_test.cpp
@@ -27,10 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
-#include <memory>
-
 #include "mongo/bson/json.h"
 #include "mongo/db/ops/write_ops_parsers_test_helpers.h"
 #include "mongo/s/write_ops/batched_command_request.h"
@@ -60,14 +56,15 @@ TEST(BatchedCommandRequest, InsertWithShardVersion) {
     BSONArray insertArray = BSON_ARRAY(BSON("a" << 1) << BSON("b" << 1));
 
     const OID epoch = OID::gen();
-    const Timestamp majorAndMinor(1, 2);
     const Timestamp timestamp(2, 2);
+    const Timestamp majorAndMinor(1, 2);
 
     BSONObj origInsertRequestObj = BSON("insert"
                                         << "test"
                                         << "documents" << insertArray << "writeConcern"
                                         << BSON("w" << 1) << "ordered" << true << "shardVersion"
-                                        << BSON_ARRAY(majorAndMinor << epoch << timestamp));
+                                        << BSON("e" << epoch << "t" << timestamp << "v"
+                                                    << majorAndMinor));
 
     for (auto docSeq : {false, true}) {
         const auto opMsgRequest(toOpMsg("TestDB", origInsertRequestObj, docSeq));
@@ -75,7 +72,7 @@ TEST(BatchedCommandRequest, InsertWithShardVersion) {
 
         ASSERT_EQ("TestDB.test", insertRequest.getInsertRequest().getNamespace().ns());
         ASSERT(insertRequest.hasShardVersion());
-        ASSERT_EQ(ChunkVersion(1, 2, epoch, timestamp).toString(),
+        ASSERT_EQ(ChunkVersion({epoch, timestamp}, {1, 2}).toString(),
                   insertRequest.getShardVersion().toString());
     }
 }
diff --git a/src/mongo/s/write_ops/batched_command_response_test.cpp b/src/mongo/s/write_ops/batched_command_response_test.cpp
index f17637ade04..4d7acf32c22 100644
--- a/src/mongo/s/write_ops/batched_command_response_test.cpp
+++ b/src/mongo/s/write_ops/batched_command_response_test.cpp
@@ -67,45 +67,12 @@ TEST(BatchedCommandResponseTest, Basic) {
     ASSERT_BSONOBJ_EQ(origResponseObj, genResponseObj);
 }
 
-// TODO (SERVER-64449): Get rid of this entire test case
-TEST(BatchedCommandResponseTest, StaleErrorAsStaleShardVersionCompatibility) {
+TEST(BatchedCommandResponseTest, StaleConfigInfo) {
     OID epoch = OID::gen();
 
     StaleConfigInfo staleInfo(NamespaceString("TestDB.TestColl"),
-                              ChunkVersion(1, 0, epoch, Timestamp(100, 0)),
-                              ChunkVersion(2, 0, epoch, Timestamp(100, 0)),
-                              ShardId("TestShard"));
-    BSONObjBuilder builder;
-    staleInfo.serialize(&builder);
-
-    BSONArray writeErrorsArray(
-        BSON_ARRAY(BSON("index" << 0 << "code" << ErrorCodes::OBSOLETE_StaleShardVersion << "errmsg"
-                                << "OBSOLETE_StaleShardVersion error"
-                                << "errInfo" << builder.obj())
-                   << BSON("index" << 1 << "code" << ErrorCodes::InvalidNamespace << "errmsg"
-                                   << "index 1 failed too")));
-
-    BSONObj origResponseObj =
-        BSON("n" << 0 << "opTime" << mongo::Timestamp(1ULL) << "writeErrors" << writeErrorsArray
-                 << "retriedStmtIds" << BSON_ARRAY(1 << 3) << "ok" << 1.0);
-
-    std::string errMsg;
-    BatchedCommandResponse response;
-    ASSERT_TRUE(response.parseBSON(origResponseObj, &errMsg));
-    ASSERT_EQ(0, response.getErrDetailsAt(0).getIndex());
-    ASSERT_EQ(ErrorCodes::StaleConfig, response.getErrDetailsAt(0).getStatus().code());
-    auto extraInfo = response.getErrDetailsAt(0).getStatus().extraInfo<StaleConfigInfo>();
-    ASSERT_EQ(staleInfo.getVersionReceived(), extraInfo->getVersionReceived());
-    ASSERT_EQ(*staleInfo.getVersionWanted(), *extraInfo->getVersionWanted());
-    ASSERT_EQ(staleInfo.getShardId(), extraInfo->getShardId());
-}
-
-TEST(BatchedCommandResponseTest, StaleErrorAsStaleConfigCompatibility) {
-    OID epoch = OID::gen();
-
-    StaleConfigInfo staleInfo(NamespaceString("TestDB.TestColl"),
-                              ChunkVersion(1, 0, epoch, Timestamp(100, 0)),
-                              ChunkVersion(2, 0, epoch, Timestamp(100, 0)),
+                              ChunkVersion({epoch, Timestamp(100, 0)}, {1, 0}),
+                              ChunkVersion({epoch, Timestamp(100, 0)}, {2, 0}),
                               ShardId("TestShard"));
     BSONObjBuilder builder(BSON("index" << 0 << "code" << ErrorCodes::StaleConfig << "errmsg"
                                         << "StaleConfig error"));
@@ -189,7 +156,7 @@ TEST(BatchedCommandResponseTest, TooManyBigErrors) {
 }
 
 TEST(BatchedCommandResponseTest, CompatibilityFromWriteErrorToBatchCommandResponse) {
-    ChunkVersion versionReceived(1, 0, OID::gen(), Timestamp(2, 0));
+    ChunkVersion versionReceived({OID::gen(), Timestamp(2, 0)}, {1, 0});
 
     write_ops::UpdateCommandReply reply;
     reply.getWriteCommandReplyBase().setN(1);
diff --git a/src/mongo/s/write_ops/write_op_test.cpp b/src/mongo/s/write_ops/write_op_test.cpp
index 884ffc906c3..2d179b6593f 100644
--- a/src/mongo/s/write_ops/write_op_test.cpp
+++ b/src/mongo/s/write_ops/write_op_test.cpp
@@ -119,11 +119,11 @@ TEST_F(WriteOpTest, TargetSingle) {
 // Multi-write targeting test where our query goes to one shard
 TEST_F(WriteOpTest, TargetMultiOneShard) {
     ShardEndpoint endpointA(
-        ShardId("shardA"), ChunkVersion(10, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardA"), ChunkVersion({OID(), Timestamp(1, 1)}, {10, 0}), boost::none);
     ShardEndpoint endpointB(
-        ShardId("shardB"), ChunkVersion(20, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardB"), ChunkVersion({OID(), Timestamp(1, 1)}, {20, 0}), boost::none);
     ShardEndpoint endpointC(
-        ShardId("shardB"), ChunkVersion(20, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardB"), ChunkVersion({OID(), Timestamp(1, 1)}, {20, 0}), boost::none);
 
     BatchedCommandRequest request([&] {
         write_ops::DeleteCommandRequest deleteOp(kNss);
@@ -154,11 +154,11 @@ TEST_F(WriteOpTest, TargetMultiOneShard) {
 // Multi-write targeting test where our write goes to more than one shard
 TEST_F(WriteOpTest, TargetMultiAllShards) {
     ShardEndpoint endpointA(
-        ShardId("shardA"), ChunkVersion(10, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardA"), ChunkVersion({OID(), Timestamp(1, 1)}, {10, 0}), boost::none);
     ShardEndpoint endpointB(
-        ShardId("shardB"), ChunkVersion(20, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardB"), ChunkVersion({OID(), Timestamp(1, 1)}, {20, 0}), boost::none);
     ShardEndpoint endpointC(
-        ShardId("shardB"), ChunkVersion(20, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardB"), ChunkVersion({OID(), Timestamp(1, 1)}, {20, 0}), boost::none);
 
     BatchedCommandRequest request([&] {
         write_ops::DeleteCommandRequest deleteOp(kNss);
@@ -196,9 +196,9 @@ TEST_F(WriteOpTest, TargetMultiAllShards) {
 
 TEST_F(WriteOpTest, TargetMultiAllShardsAndErrorSingleChildOp) {
     ShardEndpoint endpointA(
-        ShardId("shardA"), ChunkVersion(10, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardA"), ChunkVersion({OID(), Timestamp(1, 1)}, {10, 0}), boost::none);
     ShardEndpoint endpointB(
-        ShardId("shardB"), ChunkVersion(20, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardB"), ChunkVersion({OID(), Timestamp(1, 1)}, {20, 0}), boost::none);
 
     BatchedCommandRequest request([&] {
         write_ops::DeleteCommandRequest deleteOp(kNss);
@@ -228,8 +228,8 @@ TEST_F(WriteOpTest, TargetMultiAllShardsAndErrorSingleChildOp) {
     write_ops::WriteError retryableError(
         0,
         {StaleConfigInfo(kNss,
-                         ChunkVersion(10, 0, OID(), Timestamp(1, 1)),
-                         ChunkVersion(11, 0, OID(), Timestamp(1, 1)),
+                         ChunkVersion({OID(), Timestamp(1, 1)}, {10, 0}),
+                         ChunkVersion({OID(), Timestamp(1, 1)}, {11, 0}),
                          ShardId("shardA")),
          "simulate ssv error for test"});
     writeOp.noteWriteError(*targeted[0], retryableError);
@@ -346,11 +346,11 @@ private:
 
 TEST_F(WriteOpTransactionTest, TargetMultiDoesNotTargetAllShards) {
     ShardEndpoint endpointA(
-        ShardId("shardA"), ChunkVersion(10, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardA"), ChunkVersion({OID(), Timestamp(1, 1)}, {10, 0}), boost::none);
     ShardEndpoint endpointB(
-        ShardId("shardB"), ChunkVersion(20, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardB"), ChunkVersion({OID(), Timestamp(1, 1)}, {20, 0}), boost::none);
     ShardEndpoint endpointC(
-        ShardId("shardB"), ChunkVersion(20, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardB"), ChunkVersion({OID(), Timestamp(1, 1)}, {20, 0}), boost::none);
 
     BatchedCommandRequest request([&] {
         write_ops::DeleteCommandRequest deleteOp(kNss);
@@ -386,9 +386,9 @@ TEST_F(WriteOpTransactionTest, TargetMultiDoesNotTargetAllShards) {
 
 TEST_F(WriteOpTransactionTest, TargetMultiAllShardsAndErrorSingleChildOp) {
     ShardEndpoint endpointA(
-        ShardId("shardA"), ChunkVersion(10, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardA"), ChunkVersion({OID(), Timestamp(1, 1)}, {10, 0}), boost::none);
     ShardEndpoint endpointB(
-        ShardId("shardB"), ChunkVersion(20, 0, OID(), Timestamp(1, 1)), boost::none);
+        ShardId("shardB"), ChunkVersion({OID(), Timestamp(1, 1)}, {20, 0}), boost::none);
 
     BatchedCommandRequest request([&] {
         write_ops::DeleteCommandRequest deleteOp(kNss);
@@ -422,8 +422,8 @@ TEST_F(WriteOpTransactionTest, TargetMultiAllShardsAndErrorSingleChildOp) {
     write_ops::WriteError retryableError(
         0,
         {StaleConfigInfo(kNss,
-                         ChunkVersion(10, 0, OID(), Timestamp(1, 1)),
-                         ChunkVersion(11, 0, OID(), Timestamp(1, 1)),
+                         ChunkVersion({OID(), Timestamp(1, 1)}, {10, 0}),
+                         ChunkVersion({OID(), Timestamp(1, 1)}, {11, 0}),
                          ShardId("shardA")),
          "simulate ssv error for test"});
     writeOp.noteWriteError(*targeted[0], retryableError);
diff --git a/src/mongo/scripting/SConscript b/src/mongo/scripting/SConscript
index 9ff0309cc22..d062b2b667e 100644
--- a/src/mongo/scripting/SConscript
+++ b/src/mongo/scripting/SConscript
@@ -122,13 +122,13 @@ if jsEngine:
         LIBDEPS=[
             '$BUILD_DIR/mongo/db/service_context',
             '$BUILD_DIR/mongo/shell/mongojs',
-            '$BUILD_DIR/third_party/mozjs/mozjs',
             'bson_template_evaluator',
             'scripting_common',
         ],
         LIBDEPS_PRIVATE=[
             '$BUILD_DIR/mongo/client/clientdriver_network',
             '$BUILD_DIR/mongo/idl/server_parameter',
+            '$BUILD_DIR/third_party/mozjs/mozjs',
         ],
     )
 else:
diff --git a/src/mongo/scripting/mozjs/mongo.cpp b/src/mongo/scripting/mozjs/mongo.cpp
index 00818afb115..849d74aca36 100644
--- a/src/mongo/scripting/mozjs/mongo.cpp
+++ b/src/mongo/scripting/mozjs/mongo.cpp
@@ -27,8 +27,6 @@
  *    it in the license file.
  */
 
-#include "mongo/platform/basic.h"
-
 #include "mongo/scripting/mozjs/mongo.h"
 
 #include <js/Object.h>
@@ -36,6 +34,7 @@
 
 #include "mongo/bson/simple_bsonelement_comparator.h"
 #include "mongo/client/client_api_version_parameters_gen.h"
+#include "mongo/client/client_deprecated.h"
 #include "mongo/client/dbclient_base.h"
 #include "mongo/client/dbclient_rs.h"
 #include "mongo/client/global_conn_pool.h"
@@ -259,13 +258,16 @@ void doRunCommand(JSContext* cx, JS::CallArgs args, MakeRequest makeRequest) {
     auto arg = ValueWriter(cx, args.get(1)).toBSON();
 
     auto request = makeRequest(database, arg);
-    if (auto token = args.get(3); token.isObject()) {
-        request.securityToken = ValueWriter(cx, token).toBSON();
+    if (auto tokenArg = args.get(3); tokenArg.isObject()) {
+        using VTS = auth::ValidatedTenancyScope;
+        if (auto token = ValueWriter(cx, tokenArg).toBSON(); token.nFields() > 0) {
+            request.validatedTenancyScope = VTS(token, VTS::InitTag::kInitForShell);
+        }
     } else {
         uassert(ErrorCodes::BadValue,
                 str::stream() << "The token parameter to " << Params::kCommandName
                               << " must be an object",
-                token.isUndefined());
+                tokenArg.isUndefined());
     }
 
     const auto& conn = getConnectionRef(args);
@@ -316,6 +318,51 @@ void MongoBase::Functions::_runCommandImpl::call(JSContext* cx, JS::CallArgs arg
     });
 }
 
+namespace {
+/**
+ * WARNING: Do not add new callers! This is a special-purpose function that exists only to
+ * accommodate the shell.
+ *
+ * Although OP_QUERY find is no longer supported by either the shell or the server, the shell's
+ * exhaust path still internally constructs a request that resembles on OP_QUERY find. This function
+ * converts this query to a 'FindCommandRequest'.
+ */
+FindCommandRequest upconvertLegacyOpQueryToFindCommandRequest(NamespaceString nss,
+                                                              const BSONObj& opQueryFormattedBson,
+                                                              const BSONObj& projection,
+                                                              int limit,
+                                                              int skip,
+                                                              int batchSize,
+                                                              int queryOptions) {
+    FindCommandRequest findCommand{std::move(nss)};
+
+    client_deprecated::initFindFromLegacyOptions(opQueryFormattedBson, queryOptions, &findCommand);
+
+    if (!projection.isEmpty()) {
+        findCommand.setProjection(projection.getOwned());
+    }
+
+    if (limit) {
+        // To avoid changing the behavior of the shell API, we allow the caller of the JS code to
+        // use a negative limit to request at most a single batch.
+        if (limit < 0) {
+            findCommand.setLimit(-static_cast<int64_t>(limit));
+            findCommand.setSingleBatch(true);
+        } else {
+            findCommand.setLimit(limit);
+        }
+    }
+    if (skip) {
+        findCommand.setSkip(skip);
+    }
+    if (batchSize) {
+        findCommand.setBatchSize(batchSize);
+    }
+
+    return findCommand;
+}
+}  // namespace
+
 void MongoBase::Functions::find::call(JSContext* cx, JS::CallArgs args) {
     auto scope = getScope(cx);
 
@@ -351,15 +398,12 @@ void MongoBase::Functions::find::call(JSContext* cx, JS::CallArgs args) {
     int batchSize = ValueWriter(cx, args.get(5)).toInt32();
     int options = ValueWriter(cx, args.get(6)).toInt32();
 
-    const Query query = Query::fromBSONDeprecated(q);
-    std::unique_ptr<DBClientCursor> cursor(conn->query_DEPRECATED(NamespaceString(ns),
-                                                                  query.getFilter(),
-                                                                  query,
-                                                                  limit,
-                                                                  nToSkip,
-                                                                  haveFields ? &fields : nullptr,
-                                                                  options,
-                                                                  batchSize));
+    auto findCmd = upconvertLegacyOpQueryToFindCommandRequest(
+        NamespaceString{ns}, q, fields, limit, nToSkip, batchSize, options);
+    auto readPref = uassertStatusOK(ReadPreferenceSetting::fromContainingBSON(q));
+    ExhaustMode exhaustMode =
+        ((options & QueryOption_Exhaust) != 0) ? ExhaustMode::kOn : ExhaustMode::kOff;
+    std::unique_ptr<DBClientCursor> cursor = conn->find(std::move(findCmd), readPref, exhaustMode);
     if (!cursor.get()) {
         uasserted(ErrorCodes::InternalError, "error doing query: failed");
     }
diff --git a/src/mongo/shell/SConscript b/src/mongo/shell/SConscript
index 6607afc5792..650f8f72f1f 100644
--- a/src/mongo/shell/SConscript
+++ b/src/mongo/shell/SConscript
@@ -212,7 +212,7 @@ if get_option('ssl') == 'on':
                 '$BUILD_DIR/mongo/db/matcher/expressions',
                 '$BUILD_DIR/mongo/scripting/scripting',
                 '$BUILD_DIR/mongo/shell/shell_options_register',
-                '$BUILD_DIR/third_party/shim_mozjs',
+                '$BUILD_DIR/third_party/mozjs/mozjs',
                 'kms',
             ],
         )
diff --git a/src/mongo/shell/db.js b/src/mongo/shell/db.js
index d3800871af1..0d55b6baa45 100644
--- a/src/mongo/shell/db.js
+++ b/src/mongo/shell/db.js
@@ -800,12 +800,6 @@ DB.prototype.hello = function() {
     return this.runCommand("hello");
 };
 
-var commandUnsupported = function(res) {
-    return (!res.ok &&
-            (res.errmsg.startsWith("no such cmd") || res.errmsg.startsWith("no such command") ||
-             res.code === 59 /* CommandNotFound */));
-};
-
 DB.prototype.currentOp = function(arg) {
     // TODO CLOUDP-89361: The shell is connected to the Atlas Proxy, which currently does not
     // support the $currentOp aggregation stage. Remove the legacy server command path once the
@@ -841,19 +835,7 @@ DB.prototype.currentOpLegacy = function(arg) {
 
     var commandObj = {"currentOp": 1};
     Object.extend(commandObj, q);
-    var res = this.adminCommand(commandObj);
-    if (commandUnsupported(res)) {
-        // always send legacy currentOp with default (null) read preference (SERVER-17951)
-        const session = this.getSession();
-        const readPreference = session.getOptions().getReadPreference();
-        try {
-            session.getOptions().setReadPreference(null);
-            res = this.getSiblingDB("admin").$cmd.sys.inprog.findOne(q);
-        } finally {
-            session.getOptions().setReadPreference(readPreference);
-        }
-    }
-    return res;
+    return this.adminCommand(commandObj);
 };
 
 DB.prototype.currentOpCursor = function(arg) {
@@ -898,19 +880,7 @@ DB.prototype.currentOpCursor = function(arg) {
 DB.prototype.killOp = function(op) {
     if (!op)
         throw Error("no opNum to kill specified");
-    var res = this.adminCommand({'killOp': 1, 'op': op});
-    if (commandUnsupported(res)) {
-        // fall back for old servers
-        const session = this.getSession();
-        const readPreference = session.getOptions().getReadPreference();
-        try {
-            session.getOptions().setReadPreference(null);
-            res = this.getSiblingDB("admin").$cmd.sys.killop.findOne({'op': op});
-        } finally {
-            session.getOptions().setReadPreference(readPreference);
-        }
-    }
-    return res;
+    return this.adminCommand({'killOp': 1, 'op': op});
 };
 DB.prototype.killOP = DB.prototype.killOp;
 
@@ -1200,18 +1170,7 @@ DB.prototype.fsyncLock = function() {
 };
 
 DB.prototype.fsyncUnlock = function() {
-    var res = this.adminCommand({fsyncUnlock: 1});
-    if (commandUnsupported(res)) {
-        const session = this.getSession();
-        const readPreference = session.getOptions().getReadPreference();
-        try {
-            session.getOptions().setReadPreference(null);
-            res = this.getSiblingDB("admin").$cmd.sys.unlock.findOne();
-        } finally {
-            session.getOptions().setReadPreference(readPreference);
-        }
-    }
-    return res;
+    return this.adminCommand({fsyncUnlock: 1});
 };
 
 DB.autocomplete = function(obj) {
diff --git a/src/mongo/shell/encrypted_dbclient_base.cpp b/src/mongo/shell/encrypted_dbclient_base.cpp
index fe301dcceb1..ba83ab07471 100644
--- a/src/mongo/shell/encrypted_dbclient_base.cpp
+++ b/src/mongo/shell/encrypted_dbclient_base.cpp
@@ -566,29 +566,9 @@ JS::Value EncryptedDBClientBase::getCollection() const {
 }
 
 std::unique_ptr<DBClientCursor> EncryptedDBClientBase::find(FindCommandRequest findRequest,
-                                                            const ReadPreferenceSetting& readPref) {
-    return _conn->find(std::move(findRequest), readPref);
-}
-
-std::unique_ptr<DBClientCursor> EncryptedDBClientBase::query_DEPRECATED(
-    const NamespaceStringOrUUID& nsOrUuid,
-    const BSONObj& filter,
-    const Query& querySettings,
-    int limit,
-    int nToSkip,
-    const BSONObj* fieldsToReturn,
-    int queryOptions,
-    int batchSize,
-    boost::optional<BSONObj> readConcernObj) {
-    return _conn->query_DEPRECATED(nsOrUuid,
-                                   filter,
-                                   querySettings,
-                                   limit,
-                                   nToSkip,
-                                   fieldsToReturn,
-                                   queryOptions,
-                                   batchSize,
-                                   readConcernObj);
+                                                            const ReadPreferenceSetting& readPref,
+                                                            ExhaustMode exhaustMode) {
+    return _conn->find(std::move(findRequest), readPref, exhaustMode);
 }
 
 bool EncryptedDBClientBase::isFailed() const {
diff --git a/src/mongo/shell/encrypted_dbclient_base.h b/src/mongo/shell/encrypted_dbclient_base.h
index 4c0a75434fb..ddb0c18e235 100644
--- a/src/mongo/shell/encrypted_dbclient_base.h
+++ b/src/mongo/shell/encrypted_dbclient_base.h
@@ -87,7 +87,6 @@ class EncryptedDBClientBase : public DBClientBase,
                               public FLEKeyVault {
 public:
     using DBClientBase::find;
-    using DBClientBase::query_DEPRECATED;
 
     EncryptedDBClientBase(std::unique_ptr<DBClientBase> conn,
                           ClientSideFLEOptions encryptionOptions,
@@ -128,18 +127,8 @@ public:
     void trace(JSTracer* trc) final;
 
     std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
-                                         const ReadPreferenceSetting& readPref) final;
-
-    std::unique_ptr<DBClientCursor> query_DEPRECATED(
-        const NamespaceStringOrUUID& nsOrUuid,
-        const BSONObj& filter,
-        const Query& querySettings,
-        int limit,
-        int nToSkip,
-        const BSONObj* fieldsToReturn,
-        int queryOptions,
-        int batchSize,
-        boost::optional<BSONObj> readConcernObj = boost::none) final;
+                                         const ReadPreferenceSetting& readPref,
+                                         ExhaustMode exhaustMode) final;
 
     bool isFailed() const final;
 
diff --git a/src/mongo/shell/query.js b/src/mongo/shell/query.js
index a5d920cb109..fc8dd6e8731 100644
--- a/src/mongo/shell/query.js
+++ b/src/mongo/shell/query.js
@@ -91,11 +91,8 @@ DBQuery.prototype._checkModify = function() {
         throw Error("query already executed");
 };
 
-DBQuery.prototype._canUseCommandCursor = function() {
-    // We also forbid queries with the exhaust option from running as DBCommandCursor, because the
-    // DBCommandCursor does not support exhaust.
-    return (this._collection.getName().indexOf("$cmd") !== 0) &&
-        (this._options & DBQuery.Option.exhaust) === 0;
+DBQuery.prototype._isExhaustCursor = function() {
+    return (this._options & DBQuery.Option.exhaust) !== 0;
 };
 
 /**
@@ -120,7 +117,11 @@ DBQuery.prototype._exec = function() {
         assert.eq(0, this._numReturned);
         this._cursorSeen = 0;
 
-        if (this._canUseCommandCursor()) {
+        // We forbid queries with the exhaust option from running as 'DBCommandCursor', because
+        // 'DBCommandCursor' does not currently support exhaust.
+        //
+        // In the future, we could unify the shell's exhaust and non-exhaust code paths.
+        if (!this._isExhaustCursor()) {
             var canAttachReadPref = true;
             var findCmd = this._convertToCommand(canAttachReadPref);
             var cmdRes = this._db.runReadCommand(findCmd, null, this._options);
diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js
index 3ca1623c413..08ffea17f3c 100644
--- a/src/mongo/shell/shardingtest.js
+++ b/src/mongo/shell/shardingtest.js
@@ -725,8 +725,13 @@ var ShardingTest = function(params) {
         var result;
         for (var i = 0; i < 5; i++) {
             var otherShard = this.getOther(this.getPrimaryShard(dbName)).name;
-            result = this.s.adminCommand(
-                {movechunk: c, find: move, to: otherShard, _waitForDelete: waitForDelete});
+            let cmd = {movechunk: c, find: move, to: otherShard};
+
+            if (waitForDelete != null) {
+                cmd._waitForDelete = waitForDelete;
+            }
+
+            result = this.s.adminCommand(cmd);
             if (result.ok)
                 break;
 
@@ -1414,7 +1419,6 @@ var ShardingTest = function(params) {
 
         // Do replication.
         rst.awaitNodesAgreeOnPrimary();
-        rst.getPrimary().getDB("admin").foo.save({x: 1});
         if (rst.keyFile) {
             authutil.asCluster(rst.nodes, rst.keyFile, function() {
                 rst.awaitReplication();
diff --git a/src/mongo/shell/shell_utils.cpp b/src/mongo/shell/shell_utils.cpp
index 3c1f4a843f5..748d5a1b80c 100644
--- a/src/mongo/shell/shell_utils.cpp
+++ b/src/mongo/shell/shell_utils.cpp
@@ -48,7 +48,8 @@
 #include "mongo/base/shim.h"
 #include "mongo/client/dbclient_base.h"
 #include "mongo/client/replica_set_monitor.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/security_token_gen.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
 #include "mongo/db/hasher.h"
 #include "mongo/logv2/log.h"
 #include "mongo/platform/decimal128.h"
@@ -436,7 +437,9 @@ BSONObj _createSecurityToken(const BSONObj& args, void* data) {
 
     constexpr auto authUserFieldName = auth::SecurityToken::kAuthenticatedUserFieldName;
     auto authUser = args.firstElement().Obj();
-    return BSON("" << auth::signSecurityToken(BSON(authUserFieldName << authUser)));
+    using VTS = auth::ValidatedTenancyScope;
+    auto token = VTS(BSON(authUserFieldName << authUser), VTS::TokenForTestingTag{});
+    return BSON("" << token.getOriginalToken());
 }
 
 BSONObj replMonitorStats(const BSONObj& a, void* data) {
diff --git a/src/mongo/shell/shell_utils_extended.cpp b/src/mongo/shell/shell_utils_extended.cpp
index 89ecb4ab095..141389f007d 100644
--- a/src/mongo/shell/shell_utils_extended.cpp
+++ b/src/mongo/shell/shell_utils_extended.cpp
@@ -502,7 +502,8 @@ BSONObj readDumpFile(const BSONObj& a, void*) {
         uassertStatusOK(swObj);
 
         const auto obj = swObj.getValue();
-        uassertStatusOK(validateBSON(obj.objdata(), valid));
+        uassertStatusOKWithContext(validateBSON(obj.objdata(), valid),
+                                   str::stream() << " at offset " << cursor.debug_offset());
 
         array.append(obj);
     }
diff --git a/src/mongo/tools/bridge.cpp b/src/mongo/tools/bridge.cpp
index f1a7b33a0df..a95b98f1be7 100644
--- a/src/mongo/tools/bridge.cpp
+++ b/src/mongo/tools/bridge.cpp
@@ -324,7 +324,7 @@ Future<DbResponse> ServiceEntryPointBridge::handleRequest(OperationContext* opCt
     if ((request.operation() == dbQuery &&
          NamespaceString(DbMessage(request).getns()).isCommand()) ||
         request.operation() == dbMsg) {
-        cmdRequest = rpc::opMsgRequestFromAnyProtocol(request);
+        cmdRequest = rpc::opMsgRequestFromAnyProtocol(request, opCtx->getClient());
 
         dest.extractHostInfo(*cmdRequest);
 
diff --git a/src/mongo/transport/service_entry_point_impl.cpp b/src/mongo/transport/service_entry_point_impl.cpp
index 1be3ba5618b..524dc94a964 100644
--- a/src/mongo/transport/service_entry_point_impl.cpp
+++ b/src/mongo/transport/service_entry_point_impl.cpp
@@ -30,20 +30,37 @@
 
 #include "mongo/transport/service_entry_point_impl.h"
 
+#include <boost/optional.hpp>
+#include <cstdint>
 #include <fmt/format.h>
+#include <memory>
+#include <string>
 #include <vector>
 
+#include "mongo/base/status.h"
+#include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/db/auth/restriction_environment.h"
+#include "mongo/db/server_options.h"
 #include "mongo/db/service_context.h"
 #include "mongo/logv2/log.h"
+#include "mongo/platform/atomic_word.h"
+#include "mongo/platform/mutex.h"
+#include "mongo/stdx/condition_variable.h"
+#include "mongo/stdx/unordered_map.h"
+#include "mongo/stdx/variant.h"
 #include "mongo/transport/hello_metrics.h"
+#include "mongo/transport/service_entry_point.h"
 #include "mongo/transport/service_entry_point_impl_gen.h"
 #include "mongo/transport/service_executor.h"
+#include "mongo/transport/service_executor_fixed.h"
 #include "mongo/transport/service_executor_gen.h"
+#include "mongo/transport/service_executor_reserved.h"
+#include "mongo/transport/service_executor_synchronous.h"
 #include "mongo/transport/service_state_machine.h"
 #include "mongo/transport/session.h"
-#include "mongo/util/processinfo.h"
-#include "mongo/util/scopeguard.h"
+#include "mongo/util/duration.h"
+#include "mongo/util/hierarchical_acquisition.h"
+#include "mongo/util/net/cidr.h"
 
 #if !defined(_WIN32)
 #include <sys/resource.h>
@@ -60,40 +77,59 @@ namespace mongo {
 
 using namespace fmt::literals;
 
+namespace {
+bool quiet() {
+    return serverGlobalParams.quiet.load();
+}
+
+/** Some diagnostic data that we will want to log about a Client after its death. */
+struct ClientSummary {
+    explicit ClientSummary(const Client* c)
+        : uuid{c->getUUID()}, remote{c->session()->remote()}, id{c->session()->id()} {}
+
+    friend auto logAttrs(const ClientSummary& m) {
+        return logv2::multipleAttrs(
+            "remote"_attr = m.remote, "uuid"_attr = m.uuid, "connectionId"_attr = m.id);
+    }
+
+    UUID uuid;
+    HostAndPort remote;
+    transport::SessionId id;
+};
+}  // namespace
+
 bool shouldOverrideMaxConns(const transport::SessionHandle& session,
                             const std::vector<stdx::variant<CIDR, std::string>>& exemptions) {
-    if (exemptions.empty()) {
+    if (exemptions.empty())
         return false;
-    }
-
-    const auto& remoteAddr = session->remoteAddr();
-    const auto& localAddr = session->localAddr();
 
     boost::optional<CIDR> remoteCIDR;
+    if (const auto& ra = session->remoteAddr(); ra.isValid() && ra.isIP())
+        remoteCIDR = uassertStatusOK(CIDR::parse(ra.getAddr()));
 
-    if (remoteAddr.isValid() && remoteAddr.isIP()) {
-        remoteCIDR = uassertStatusOK(CIDR::parse(remoteAddr.getAddr()));
-    }
-    for (const auto& exemption : exemptions) {
-        // If this exemption is a CIDR range, then we check that the remote IP is in the
-        // CIDR range
-        if ((stdx::holds_alternative<CIDR>(exemption)) && (remoteCIDR)) {
-            if (stdx::get<CIDR>(exemption).contains(*remoteCIDR)) {
-                return true;
-            }
-// Otherwise the exemption is a UNIX path and we should check the local path
-// (the remoteAddr == "anonymous unix socket") against the exemption string
-//
-// On Windows we don't check this at all and only CIDR ranges are supported
 #ifndef _WIN32
-        } else if ((stdx::holds_alternative<std::string>(exemption)) && localAddr.isValid() &&
-                   (localAddr.getAddr() == stdx::get<std::string>(exemption))) {
-            return true;
+    boost::optional<std::string> localPath;
+    if (const auto& la = session->localAddr(); la.isValid())
+        localPath = la.getAddr();
 #endif
-        }
-    }
 
-    return false;
+    return std::any_of(exemptions.begin(), exemptions.end(), [&](const auto& exemption) {
+        return stdx::visit(
+            [&](auto&& ex) {
+                using Alt = std::decay_t<decltype(ex)>;
+                if constexpr (std::is_same_v<Alt, CIDR>)
+                    return remoteCIDR && ex.contains(*remoteCIDR);
+#ifndef _WIN32
+                // Otherwise the exemption is a UNIX path and we should check the local path
+                // (the remoteAddr == "anonymous unix socket") against the exemption string.
+                // On Windows we don't check this at all and only CIDR ranges are supported.
+                if constexpr (std::is_same_v<Alt, std::string>)
+                    return localPath && *localPath == ex;
+#endif
+                return false;
+            },
+            exemption);
+    });
 }
 
 size_t getSupportedMax() {
@@ -130,8 +166,96 @@ size_t getSupportedMax() {
     return supportedMax;
 }
 
+class ServiceEntryPointImpl::Sessions {
+public:
+    struct Entry {
+        explicit Entry(std::shared_ptr<transport::ServiceStateMachine> ssm) : ssm{std::move(ssm)} {}
+        std::shared_ptr<transport::ServiceStateMachine> ssm;
+        ClientSummary summary{ssm->client()};
+    };
+    using ByClientMap = stdx::unordered_map<Client*, Entry>;
+    using iterator = ByClientMap::iterator;
+
+    /** A proxy object providing properly synchronized Sessions accessors. */
+    class SyncToken {
+    public:
+        explicit SyncToken(Sessions* src) : _src{src}, _lk{_src->_mutex} {}
+
+        /** Run `f(ssm)` for each `ServiceStateMachine& ssm`, in an unspecified order. */
+        template <typename F>
+        void forEach(F&& f) {
+            for (auto& e : _src->_byClient)
+                f(*e.second.ssm);
+        }
+
+        /**
+         * Waits for Sessions to drain, possibly unlocking and relocking its
+         * Mutex. SyncToken holds exclusive access to a Sessions object before
+         * and after this function call, but not during.
+         */
+        bool waitForEmpty(Date_t deadline) {
+            return _src->_cv.wait_until(
+                _lk, deadline.toSystemTimePoint(), [&] { return _src->_byClient.empty(); });
+        }
+
+        iterator insert(std::shared_ptr<transport::ServiceStateMachine> ssm) {
+            Client* cli = ssm->client();
+            auto [it, ok] = _src->_byClient.insert({cli, Entry(std::move(ssm))});
+            invariant(ok);
+            _src->_created.fetchAndAdd(1);
+            _onSizeChange();
+            return it;
+        }
+
+        void erase(iterator it) {
+            _src->_byClient.erase(it);
+            _onSizeChange();
+        }
+
+        iterator find(Client* client) {
+            auto iter = _src->_byClient.find(client);
+            invariant(iter != _src->_byClient.end());
+            return iter;
+        }
+
+        size_t size() const {
+            return _src->_byClient.size();
+        }
+
+    private:
+        void _onSizeChange() {
+            _src->_size.store(_src->_byClient.size());
+            _src->_cv.notify_all();
+        }
+
+        Sessions* _src;
+        stdx::unique_lock<Mutex> _lk;
+    };
+
+    /** Returns a proxy object providing synchronized mutable access to the Sessions object. */
+    SyncToken sync() {
+        return SyncToken(this);
+    }
+
+    size_t size() const {
+        return _size.load();
+    }
+
+    size_t created() const {
+        return _created.load();
+    }
+
+    mutable Mutex _mutex = MONGO_MAKE_LATCH("ServiceEntryPointImpl::Sessions::_mutex");
+    stdx::condition_variable _cv;    ///< notified on `_byClient` changes.
+    AtomicWord<size_t> _size{0};     ///< Kept in sync with `_byClient.size()`
+    AtomicWord<size_t> _created{0};  ///< Increases with each `insert` call.
+    ByClientMap _byClient;           ///< guarded by `_mutex`
+};
+
 ServiceEntryPointImpl::ServiceEntryPointImpl(ServiceContext* svcCtx)
-    : _svcCtx(svcCtx), _maxNumConnections(getSupportedMax()) {}
+    : _svcCtx(svcCtx), _maxSessions(getSupportedMax()), _sessions{std::make_unique<Sessions>()} {}
+
+ServiceEntryPointImpl::~ServiceEntryPointImpl() = default;
 
 Status ServiceEntryPointImpl::start() {
     if (auto status = transport::ServiceExecutorSynchronous::get(_svcCtx)->start();
@@ -153,6 +277,7 @@ Status ServiceEntryPointImpl::start() {
 }
 
 void ServiceEntryPointImpl::startSession(transport::SessionHandle session) {
+    invariant(session);
     // Setup the restriction environment on the Session, if the Session has local/remote Sockaddrs
     const auto& remoteAddr = session->remoteAddr();
     const auto& localAddr = session->localAddr();
@@ -160,90 +285,66 @@ void ServiceEntryPointImpl::startSession(transport::SessionHandle session) {
     auto restrictionEnvironment = std::make_unique<RestrictionEnvironment>(remoteAddr, localAddr);
     RestrictionEnvironment::set(session, std::move(restrictionEnvironment));
 
-    bool canOverrideMaxConns = shouldOverrideMaxConns(session, serverGlobalParams.maxConnsOverride);
-
-    auto clientName = "conn{}"_format(session->id());
-    auto client = _svcCtx->makeClient(clientName, session);
-    auto uuid = client->getUUID();
-
-    const bool quiet = serverGlobalParams.quiet.load();
-
-    size_t connectionCount;
-    auto maybeSsmIt = [&]() -> boost::optional<SSMListIterator> {
-        stdx::lock_guard lk(_sessionsMutex);
-        connectionCount = _currentConnections.load();
-        if (connectionCount > _maxNumConnections && !canOverrideMaxConns) {
-            return boost::none;
-        }
-
-        auto clientPtr = client.get();
-        auto it = _sessions.emplace(_sessions.begin(), std::move(client));
+    bool isPrivilegedSession = shouldOverrideMaxConns(session, serverGlobalParams.maxConnsOverride);
 
-        connectionCount = _sessions.size();
-        _currentConnections.store(connectionCount);
-        _createdConnections.addAndFetch(1);
-        onClientConnect(clientPtr);
+    auto client = _svcCtx->makeClient("conn{}"_format(session->id()), session);
+    auto clientPtr = client.get();
 
-        return it;
-    }();
-
-    if (!maybeSsmIt) {
-        if (!quiet) {
-            LOGV2(22942,
-                  "Connection refused because there are too many open connections",
-                  "remote"_attr = session->remote(),
-                  "connectionCount"_attr = connectionCount);
+    Sessions::iterator iter;
+    {
+        auto sync = _sessions->sync();
+        if (sync.size() >= _maxSessions && !isPrivilegedSession) {
+            if (!quiet()) {
+                LOGV2(22942,
+                      "Connection refused because there are too many open connections",
+                      "remote"_attr = session->remote(),
+                      "connectionCount"_attr = sync.size());
+            }
+            return;
         }
-        return;
-    } else if (!quiet) {
-        LOGV2(22943,
-              "Connection accepted",
-              "remote"_attr = session->remote(),
-              "uuid"_attr = uuid.toString(),
-              "connectionId"_attr = session->id(),
-              "connectionCount"_attr = connectionCount);
-    }
 
-    auto ssmIt = *maybeSsmIt;
-    ssmIt->setCleanupHook([this, ssmIt, quiet, session = std::move(session), uuid] {
-        size_t connectionCount;
-        auto remote = session->remote();
+        // Imbue the new Client with a ServiceExecutorContext.
         {
-            stdx::lock_guard<decltype(_sessionsMutex)> lk(_sessionsMutex);
-            _sessions.erase(ssmIt);
-            connectionCount = _sessions.size();
-            _currentConnections.store(connectionCount);
+            auto seCtx = std::make_unique<transport::ServiceExecutorContext>();
+            seCtx->setThreadingModel(transport::ServiceExecutor::getInitialThreadingModel());
+            seCtx->setCanUseReserved(isPrivilegedSession);
+            stdx::lock_guard lk(*client);
+            transport::ServiceExecutorContext::set(&*client, std::move(seCtx));
         }
 
-        _sessionsCV.notify_one();
-
-        if (!quiet) {
-            LOGV2(22944,
-                  "Connection ended",
-                  "remote"_attr = remote,
-                  "uuid"_attr = uuid.toString(),
-                  "connectionId"_attr = session->id(),
-                  "connectionCount"_attr = connectionCount);
+        auto ssm = transport::ServiceStateMachine::make(std::move(client));
+        iter = sync.insert(std::move(ssm));
+        if (!quiet()) {
+            LOGV2(22943,
+                  "Connection accepted",
+                  logAttrs(iter->second.summary),
+                  "connectionCount"_attr = sync.size());
         }
-    });
+    }
 
-    auto seCtx = transport::ServiceExecutorContext{};
-    seCtx.setThreadingModel(transport::ServiceExecutor::getInitialThreadingModel());
-    seCtx.setCanUseReserved(canOverrideMaxConns);
-    ssmIt->start(std::move(seCtx));
+    onClientConnect(clientPtr);
+    iter->second.ssm->start();
 }
 
-void ServiceEntryPointImpl::endAllSessions(transport::Session::TagMask tags) {
-    // While holding the _sesionsMutex, loop over all the current connections, and if their tags
-    // do not match the requested tags to skip, terminate the session.
+void ServiceEntryPointImpl::onClientDisconnect(Client* client) {
+    derivedOnClientDisconnect(client);
     {
-        stdx::unique_lock<decltype(_sessionsMutex)> lk(_sessionsMutex);
-        for (auto& ssm : _sessions) {
-            ssm.terminateIfTagsDontMatch(tags);
-        }
+        stdx::lock_guard lk(*client);
+        transport::ServiceExecutorContext::reset(client);
+    }
+    auto sync = _sessions->sync();
+    auto iter = sync.find(client);
+    auto summary = iter->second.summary;
+    sync.erase(iter);
+    if (!quiet()) {
+        LOGV2(22944, "Connection ended", logAttrs(summary), "connectionCount"_attr = sync.size());
     }
 }
 
+void ServiceEntryPointImpl::endAllSessions(transport::Session::TagMask tags) {
+    _sessions->sync().forEach([&](auto&& ssm) { ssm.terminateIfTagsDontMatch(tags); });
+}
+
 bool ServiceEntryPointImpl::shutdown(Milliseconds timeout) {
 #if __has_feature(address_sanitizer) || __has_feature(thread_sanitizer)
     static constexpr bool kSanitizerBuild = true;
@@ -266,88 +367,73 @@ bool ServiceEntryPointImpl::shutdown(Milliseconds timeout) {
     return true;
 }
 
-bool ServiceEntryPointImpl::shutdownAndWait(Milliseconds timeout) {
-    auto deadline = _svcCtx->getPreciseClockSource()->now() + timeout;
+size_t ServiceEntryPointImpl::numOpenSessions() const {
+    return _sessions->size();
+}
 
-    stdx::unique_lock<decltype(_sessionsMutex)> lk(_sessionsMutex);
+size_t ServiceEntryPointImpl::maxOpenSessions() const {
+    return _maxSessions;
+}
 
-    // Request that all sessions end, while holding the _sesionsMutex, loop over all the current
-    // connections and terminate them. Then wait for the number of active connections to reach zero
-    // with a condition_variable that notifies in the session cleanup hook. If we haven't closed
-    // drained all active operations within the deadline, just keep going with shutdown: the OS will
-    // do it for us when the process terminates.
-    _terminateAll(lk);
+bool ServiceEntryPointImpl::shutdownAndWait(Milliseconds timeout) {
+    auto deadline = _svcCtx->getPreciseClockSource()->now() + timeout;
 
+    // Issue a terminate to all sessions, then wait for them to drain.
+    // If there are undrained sessions after the deadline, shutdown continues.
     LOGV2(6367401, "Shutting down service entry point and waiting for sessions to join");
 
-    auto result = _waitForNoSessions(lk, deadline);
-    lk.unlock();
-
-    if (result) {
-        LOGV2(22946, "shutdown: no running workers found...");
-    } else {
-        LOGV2(
-            22947,
-            "shutdown: exhausted grace period for {workers} active workers to "
-            "drain; continuing with shutdown...",
-            "shutdown: exhausted grace period active workers to drain; continuing with shutdown...",
-            "workers"_attr = numOpenSessions());
+    bool drainedAll;
+    {
+        auto sync = _sessions->sync();
+        sync.forEach([&](auto&& ssm) { ssm.terminate(); });
+        drainedAll = sync.waitForEmpty(deadline);
+        if (!drainedAll) {
+            LOGV2(22947,
+                  "Shutdown: some sessions not drained after deadline. Continuing shutdown",
+                  "sessions"_attr = sync.size());
+        } else {
+            LOGV2(22946, "Shutdown: all sessions drained");
+        }
     }
 
     transport::ServiceExecutor::shutdownAll(_svcCtx, deadline);
 
-    return result;
+    return drainedAll;
 }
 
 void ServiceEntryPointImpl::endAllSessionsNoTagMask() {
-    auto lk = stdx::unique_lock<decltype(_sessionsMutex)>(_sessionsMutex);
-    _terminateAll(lk);
-}
-
-void ServiceEntryPointImpl::_terminateAll(WithLock) {
-    for (auto& ssm : _sessions) {
-        ssm.terminate();
-    }
+    _sessions->sync().forEach([&](auto&& ssm) { ssm.terminate(); });
 }
 
 bool ServiceEntryPointImpl::waitForNoSessions(Milliseconds timeout) {
     auto deadline = _svcCtx->getPreciseClockSource()->now() + timeout;
-    LOGV2(5342100, "Waiting until for all sessions to conclude", "deadline"_attr = deadline);
+    LOGV2(5342100, "Waiting for all sessions to conclude", "deadline"_attr = deadline);
 
-    auto lk = stdx::unique_lock<decltype(_sessionsMutex)>(_sessionsMutex);
-    return _waitForNoSessions(lk, deadline);
-}
-
-bool ServiceEntryPointImpl::_waitForNoSessions(stdx::unique_lock<decltype(_sessionsMutex)>& lk,
-                                               Date_t deadline) {
-    auto noWorkersLeft = [this] { return numOpenSessions() == 0; };
-    _sessionsCV.wait_until(lk, deadline.toSystemTimePoint(), noWorkersLeft);
-
-    return noWorkersLeft();
+    return _sessions->sync().waitForEmpty(deadline);
 }
 
 void ServiceEntryPointImpl::appendStats(BSONObjBuilder* bob) const {
+    size_t sessionCount = _sessions->size();
+    size_t sessionsCreated = _sessions->created();
 
-    size_t sessionCount = _currentConnections.load();
+    auto appendInt = [&](StringData n, auto v) { bob->append(n, static_cast<int>(v)); };
 
-    bob->append("current", static_cast<int>(sessionCount));
-    bob->append("available", static_cast<int>(_maxNumConnections - sessionCount));
-    bob->append("totalCreated", static_cast<int>(_createdConnections.load()));
+    appendInt("current", sessionCount);
+    appendInt("available", _maxSessions - sessionCount);
+    appendInt("totalCreated", sessionsCreated);
 
     invariant(_svcCtx);
-    bob->append("active", static_cast<int>(_svcCtx->getActiveClientOperations()));
+    appendInt("active", _svcCtx->getActiveClientOperations());
 
     const auto seStats = transport::ServiceExecutorStats::get(_svcCtx);
-    bob->append("threaded", static_cast<int>(seStats.usesDedicated));
-    if (serverGlobalParams.maxConnsOverride.size()) {
-        bob->append("limitExempt", static_cast<int>(seStats.limitExempt));
-    }
-
-    bob->append("exhaustIsMaster",
-                static_cast<int>(HelloMetrics::get(_svcCtx)->getNumExhaustIsMaster()));
-    bob->append("exhaustHello", static_cast<int>(HelloMetrics::get(_svcCtx)->getNumExhaustHello()));
-    bob->append("awaitingTopologyChanges",
-                static_cast<int>(HelloMetrics::get(_svcCtx)->getNumAwaitingTopologyChanges()));
+    appendInt("threaded", seStats.usesDedicated);
+    if (!serverGlobalParams.maxConnsOverride.empty())
+        appendInt("limitExempt", seStats.limitExempt);
+
+    auto&& hm = HelloMetrics::get(_svcCtx);
+    appendInt("exhaustIsMaster", hm->getNumExhaustIsMaster());
+    appendInt("exhaustHello", hm->getNumExhaustHello());
+    appendInt("awaitingTopologyChanges", hm->getNumAwaitingTopologyChanges());
 
     if (auto adminExec = transport::ServiceExecutorReserved::get(_svcCtx)) {
         BSONObjBuilder section(bob->subobjStart("adminConnections"));
diff --git a/src/mongo/transport/service_entry_point_impl.h b/src/mongo/transport/service_entry_point_impl.h
index 72f798908d5..99fbcd8b11f 100644
--- a/src/mongo/transport/service_entry_point_impl.h
+++ b/src/mongo/transport/service_entry_point_impl.h
@@ -29,27 +29,22 @@
 
 #pragma once
 
-#include <list>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
 
-#include "mongo/platform/atomic_word.h"
-#include "mongo/platform/mutex.h"
-#include "mongo/stdx/condition_variable.h"
+#include "mongo/base/status.h"
+#include "mongo/bson/bsonobjbuilder.h"
 #include "mongo/stdx/variant.h"
 #include "mongo/transport/service_entry_point.h"
-#include "mongo/transport/service_executor_fixed.h"
-#include "mongo/transport/service_executor_reserved.h"
-#include "mongo/transport/service_executor_synchronous.h"
-#include "mongo/transport/service_state_machine.h"
-#include "mongo/util/hierarchical_acquisition.h"
+#include "mongo/transport/session.h"
+#include "mongo/util/duration.h"
 #include "mongo/util/net/cidr.h"
 
 namespace mongo {
 class ServiceContext;
 
-namespace transport {
-class Session;
-}  // namespace transport
-
 /**
  * A basic entry point from the TransportLayer into a server.
  *
@@ -58,11 +53,12 @@ class Session;
  * (transport::Session).
  */
 class ServiceEntryPointImpl : public ServiceEntryPoint {
-    ServiceEntryPointImpl(const ServiceEntryPointImpl&) = delete;
-    ServiceEntryPointImpl& operator=(const ServiceEntryPointImpl&) = delete;
-
 public:
     explicit ServiceEntryPointImpl(ServiceContext* svcCtx);
+    ~ServiceEntryPointImpl();
+
+    ServiceEntryPointImpl(const ServiceEntryPointImpl&) = delete;
+    ServiceEntryPointImpl& operator=(const ServiceEntryPointImpl&) = delete;
 
     void startSession(transport::SessionHandle session) override;
 
@@ -76,32 +72,23 @@ public:
 
     void appendStats(BSONObjBuilder* bob) const override;
 
-    size_t numOpenSessions() const final {
-        return _currentConnections.load();
-    }
+    size_t numOpenSessions() const final;
 
-    size_t maxOpenSessions() const final {
-        return _maxNumConnections;
-    }
+    size_t maxOpenSessions() const final;
 
-private:
-    void _terminateAll(WithLock);
-    bool _waitForNoSessions(stdx::unique_lock<Mutex>& lk, Date_t deadline);
+    void onClientDisconnect(Client* client) final;
 
-    using SSMList = std::list<transport::ServiceStateMachine>;
-    using SSMListIterator = SSMList::iterator;
+    /** `onClientDisconnect` calls this before doing anything else. */
+    virtual void derivedOnClientDisconnect(Client* client) {}
+
+private:
+    class Sessions;
 
     ServiceContext* const _svcCtx;
-    AtomicWord<std::size_t> _nWorkers;
 
-    mutable Mutex _sessionsMutex =
-        MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "ServiceEntryPointImpl::_sessionsMutex");
-    stdx::condition_variable _sessionsCV;
-    SSMList _sessions;
+    const size_t _maxSessions;
 
-    const size_t _maxNumConnections{DEFAULT_MAX_CONN};
-    AtomicWord<size_t> _currentConnections{0};
-    AtomicWord<size_t> _createdConnections{0};
+    std::unique_ptr<Sessions> _sessions;
 };
 
 /*
diff --git a/src/mongo/transport/service_executor.cpp b/src/mongo/transport/service_executor.cpp
index 9814bfe1487..b61c55ea250 100644
--- a/src/mongo/transport/service_executor.cpp
+++ b/src/mongo/transport/service_executor.cpp
@@ -58,7 +58,7 @@ auto gInitialThreadingModel = ServiceExecutor::ThreadingModel::kDedicated;
 auto getServiceExecutorStats =
     ServiceContext::declareDecoration<synchronized_value<ServiceExecutorStats>>();
 auto getServiceExecutorContext =
-    Client::declareDecoration<boost::optional<ServiceExecutorContext>>();
+    Client::declareDecoration<std::unique_ptr<ServiceExecutorContext>>();
 }  // namespace
 
 StringData toString(ServiceExecutor::ThreadingModel threadingModel) {
@@ -97,17 +97,13 @@ ServiceExecutorStats ServiceExecutorStats::get(ServiceContext* ctx) noexcept {
 }
 
 ServiceExecutorContext* ServiceExecutorContext::get(Client* client) noexcept {
-    auto& serviceExecutorContext = getServiceExecutorContext(client);
-
-    if (!serviceExecutorContext) {
-        // Service worker Clients will never have a ServiceExecutorContext.
-        return nullptr;
-    }
-
-    return &serviceExecutorContext.get();
+    // Service worker Clients will never have a ServiceExecutorContext.
+    return getServiceExecutorContext(client).get();
 }
 
-void ServiceExecutorContext::set(Client* client, ServiceExecutorContext seCtx) noexcept {
+void ServiceExecutorContext::set(Client* client,
+                                 std::unique_ptr<ServiceExecutorContext> seCtxPtr) noexcept {
+    auto& seCtx = *seCtxPtr;
     auto& serviceExecutorContext = getServiceExecutorContext(client);
     invariant(!serviceExecutorContext);
 
@@ -138,7 +134,7 @@ void ServiceExecutorContext::set(Client* client, ServiceExecutorContext seCtx) n
                 "client"_attr = client->desc(),
                 "threadingModel"_attr = seCtx._threadingModel,
                 "canUseReserved"_attr = seCtx._canUseReserved);
-    serviceExecutorContext = std::move(seCtx);
+    serviceExecutorContext = std::move(seCtxPtr);
 }
 
 void ServiceExecutorContext::reset(Client* client) noexcept {
diff --git a/src/mongo/transport/service_executor.h b/src/mongo/transport/service_executor.h
index 89a4efa8cc5..56e43b4c404 100644
--- a/src/mongo/transport/service_executor.h
+++ b/src/mongo/transport/service_executor.h
@@ -156,7 +156,7 @@ public:
      *
      * This function may only be invoked once and only while under the Client lock.
      */
-    static void set(Client* client, ServiceExecutorContext seCtx) noexcept;
+    static void set(Client* client, std::unique_ptr<ServiceExecutorContext> seCtx) noexcept;
 
 
     /**
@@ -169,18 +169,8 @@ public:
     ServiceExecutorContext() = default;
     ServiceExecutorContext(const ServiceExecutorContext&) = delete;
     ServiceExecutorContext& operator=(const ServiceExecutorContext&) = delete;
-    ServiceExecutorContext(ServiceExecutorContext&& seCtx)
-        : _client{std::exchange(seCtx._client, nullptr)},
-          _sep{std::exchange(seCtx._sep, nullptr)},
-          _threadingModel{seCtx._threadingModel},
-          _canUseReserved{seCtx._canUseReserved} {}
-    ServiceExecutorContext& operator=(ServiceExecutorContext&& seCtx) {
-        _client = std::exchange(seCtx._client, nullptr);
-        _sep = std::exchange(seCtx._sep, nullptr);
-        _threadingModel = seCtx._threadingModel;
-        _canUseReserved = seCtx._canUseReserved;
-        return *this;
-    }
+    ServiceExecutorContext(ServiceExecutorContext&&) = delete;
+    ServiceExecutorContext& operator=(ServiceExecutorContext&&) = delete;
 
     /**
      * Set the ThreadingModel for the associated Client's service execution.
diff --git a/src/mongo/transport/service_state_machine.cpp b/src/mongo/transport/service_state_machine.cpp
index a66ed2f83b1..e71d987c4b4 100644
--- a/src/mongo/transport/service_state_machine.cpp
+++ b/src/mongo/transport/service_state_machine.cpp
@@ -121,29 +121,10 @@ Message makeExhaustMessage(Message requestMsg, DbResponse* dbresponse) {
 }
 }  // namespace
 
-class ServiceStateMachine::Impl final
-    : public std::enable_shared_from_this<ServiceStateMachine::Impl> {
+class ServiceStateMachine::Impl {
 public:
-    /*
-     * Any state may transition to EndSession in case of an error, otherwise the valid state
-     * transitions are:
-     * Source -> SourceWait -> Process -> SinkWait -> Source (standard RPC)
-     * Source -> SourceWait -> Process -> SinkWait -> Process -> SinkWait ... (exhaust)
-     * Source -> SourceWait -> Process -> Source (fire-and-forget)
-     */
-    enum class State {
-        Created,     // The session has been created, but no operations have been performed yet
-        Source,      // Request a new Message from the network to handle
-        SourceWait,  // Wait for the new Message to arrive from the network
-        Process,     // Run the Message through the database
-        SinkWait,    // Wait for the database result to be sent by the network
-        EndSession,  // End the session - the ServiceStateMachine will be invalid after this
-        Ended        // The session has ended. It is illegal to call any method besides
-                     // state() if this is the current state.
-    };
-
-    Impl(ServiceContext::UniqueClient client)
-        : _state{State::Created},
+    Impl(ServiceStateMachine* ssm, ServiceContext::UniqueClient client)
+        : _ssm{ssm},
           _serviceContext{client->getServiceContext()},
           _sep{_serviceContext->getServiceEntryPoint()},
           _clientStrand{ClientStrand::make(std::move(client))} {}
@@ -152,9 +133,11 @@ public:
         _sep->onEndSession(session());
     }
 
-    void start(ServiceExecutorContext seCtx);
+    Client* client() const {
+        return _clientStrand->getClientPointer();
+    }
 
-    void setCleanupHook(std::function<void()> hook);
+    void start();
 
     /*
      * Terminates the associated transport Session, regardless of tags.
@@ -206,34 +189,31 @@ public:
     void cleanupExhaustResources() noexcept;
 
     /*
-     * Gets the current state of connection for testing/diagnostic purposes.
-     */
-    State state() const {
-        return _state.load();
-    }
-
-    /*
      * Gets the transport::Session associated with this connection
      */
     const transport::SessionHandle& session() {
-        return _clientStrand->getClientPointer()->session();
+        return client()->session();
     }
 
     /*
      * Gets the transport::ServiceExecutor associated with this connection.
      */
     ServiceExecutor* executor() {
-        return ServiceExecutorContext::get(_clientStrand->getClientPointer())->getServiceExecutor();
+        return ServiceExecutorContext::get(client())->getServiceExecutor();
     }
 
 private:
-    AtomicWord<State> _state{State::Created};
+    /** Alias: refers to this Impl, but holds a ref to the enclosing SSM. */
+    std::shared_ptr<Impl> shared_from_this() {
+        return {_ssm->shared_from_this(), this};
+    }
 
+    ServiceStateMachine* const _ssm;
     ServiceContext* const _serviceContext;
     ServiceEntryPoint* const _sep;
 
+    AtomicWord<bool> _isTerminated{false};
     ClientStrandPtr _clientStrand;
-    std::function<void()> _cleanupHook;
 
     bool _inExhaust = false;
     boost::optional<MessageCompressorId> _compressorId;
@@ -245,8 +225,6 @@ private:
 
 void ServiceStateMachine::Impl::sourceMessage() {
     invariant(_inMessage.empty());
-    invariant(_state.load() == State::Source);
-    _state.store(State::SourceWait);
 
     // Reset the compressor only before sourcing a new message. This ensures the same compressor,
     // if any, is used for sinking exhaust messages. For moreToCome messages, this allows resetting
@@ -268,8 +246,6 @@ void ServiceStateMachine::Impl::sourceMessage() {
     const auto status = msg.getStatus();
 
     if (status.isOK()) {
-        _state.store(State::Process);
-
         // If the sourceMessage succeeded then we can move to on to process the message. We simply
         // return from here and the future chain in startNewLoop() will continue to the next state
         // normally.
@@ -296,16 +272,12 @@ void ServiceStateMachine::Impl::sourceMessage() {
               "remote"_attr = remote,
               "connectionId"_attr = session()->id());
     }
-
-    _state.store(State::EndSession);
     uassertStatusOK(status);
 }
 
 void ServiceStateMachine::Impl::sinkMessage() {
     // Sink our response to the client
-    invariant(_state.load() == State::Process);
-    _state.store(State::SinkWait);
-
+    //
     // If there was an error sinking the message to the client, then we should print an error and
     // end the session.
     //
@@ -317,12 +289,7 @@ void ServiceStateMachine::Impl::sinkMessage() {
               "error"_attr = status,
               "remote"_attr = session()->remote(),
               "connectionId"_attr = session()->id());
-        _state.store(State::EndSession);
         uassertStatusOK(status);
-    } else if (_inExhaust) {
-        _state.store(State::Process);
-    } else {
-        _state.store(State::Source);
     }
 
     // Performance testing showed a significant benefit from yielding here.
@@ -418,7 +385,6 @@ Future<void> ServiceStateMachine::Impl::processMessage() {
 
                 _outMessage = std::move(toSink);
             } else {
-                _state.store(State::Source);
                 _inMessage.reset();
                 _outMessage.reset();
                 _inExhaust = false;
@@ -426,14 +392,7 @@ Future<void> ServiceStateMachine::Impl::processMessage() {
         });
 }
 
-void ServiceStateMachine::Impl::start(ServiceExecutorContext seCtx) {
-    {
-        auto client = _clientStrand->getClientPointer();
-        stdx::lock_guard lk(*client);
-        ServiceExecutorContext::set(client, std::move(seCtx));
-    }
-
-    invariant(_state.swap(State::Source) == State::Created);
+void ServiceStateMachine::Impl::start() {
     invariant(!_inExhaust, "Cannot start the state machine in exhaust mode");
 
     scheduleNewLoop(Status::OK());
@@ -466,7 +425,6 @@ void ServiceStateMachine::Impl::scheduleNewLoop(Status status) try {
     }
 } catch (const DBException& ex) {
     LOGV2_DEBUG(5763901, 2, "Terminating session due to error", "error"_attr = ex.toStatus());
-    _state.store(State::EndSession);
     terminate();
     cleanupSession(ex.toStatus());
 }
@@ -495,14 +453,14 @@ void ServiceStateMachine::Impl::startNewLoop(const Status& executorStatus) {
 }
 
 void ServiceStateMachine::Impl::terminate() {
-    if (state() == State::Ended)
+    if (_isTerminated.swap(true))
         return;
 
     session()->end();
 }
 
 void ServiceStateMachine::Impl::terminateIfTagsDontMatch(transport::Session::TagMask tags) {
-    if (state() == State::Ended)
+    if (_isTerminated.load())
         return;
 
     auto sessionTags = session()->getTags();
@@ -522,7 +480,7 @@ void ServiceStateMachine::Impl::cleanupExhaustResources() noexcept try {
     if (!_inExhaust) {
         return;
     }
-    auto request = OpMsgRequest::parse(_inMessage);
+    auto request = OpMsgRequest::parse(_inMessage, Client::getCurrent());
     // Clean up cursor for exhaust getMore request.
     if (request.getCommandName() == "getMore"_sd) {
         auto cursorId = request.body["getMore"].Long();
@@ -542,44 +500,23 @@ void ServiceStateMachine::Impl::cleanupExhaustResources() noexcept try {
           "error"_attr = e.toStatus());
 }
 
-void ServiceStateMachine::Impl::setCleanupHook(std::function<void()> hook) {
-    invariant(state() == State::Created);
-    _cleanupHook = std::move(hook);
-}
-
 void ServiceStateMachine::Impl::cleanupSession(const Status& status) {
     LOGV2_DEBUG(5127900, 2, "Ending session", "error"_attr = status);
-
     cleanupExhaustResources();
-    auto client = _clientStrand->getClientPointer();
-    _sep->onClientDisconnect(client);
-
-    {
-        stdx::lock_guard lk(*client);
-        transport::ServiceExecutorContext::reset(client);
-    }
-
-    auto previousState = _state.swap(State::Ended);
-    invariant(previousState != State::Ended);
-
-    _inMessage.reset();
-
-    _outMessage.reset();
-
-    if (auto cleanupHook = std::exchange(_cleanupHook, {})) {
-        cleanupHook();
-    }
+    _sep->onClientDisconnect(client());
 }
 
-ServiceStateMachine::ServiceStateMachine(ServiceContext::UniqueClient client)
-    : _impl{std::make_shared<Impl>(std::move(client))} {}
+ServiceStateMachine::ServiceStateMachine(PassKeyTag, ServiceContext::UniqueClient client)
+    : _impl{std::make_unique<Impl>(this, std::move(client))} {}
+
+ServiceStateMachine::~ServiceStateMachine() = default;
 
-void ServiceStateMachine::start(ServiceExecutorContext seCtx) {
-    _impl->start(std::move(seCtx));
+Client* ServiceStateMachine::client() const {
+    return _impl->client();
 }
 
-void ServiceStateMachine::setCleanupHook(std::function<void()> hook) {
-    _impl->setCleanupHook(std::move(hook));
+void ServiceStateMachine::start() {
+    _impl->start();
 }
 
 void ServiceStateMachine::terminate() {
diff --git a/src/mongo/transport/service_state_machine.h b/src/mongo/transport/service_state_machine.h
index 67e16df2fb0..71ed2be3546 100644
--- a/src/mongo/transport/service_state_machine.h
+++ b/src/mongo/transport/service_state_machine.h
@@ -44,8 +44,15 @@ namespace transport {
  * lifecycle of each user request as a state machine. It is the glue between the stateless
  * ServiceEntryPoint and TransportLayer that ties network and database logic together for a
  * user.
+ *
+ * A `ServiceStateMachine` must be managed by a `shared_ptr`, so we force all instances
+ * to be created by the static `make` function.
  */
-class ServiceStateMachine {
+class ServiceStateMachine : public std::enable_shared_from_this<ServiceStateMachine> {
+    struct PassKeyTag {
+        explicit PassKeyTag() = default;
+    };
+    class Impl;
     ServiceStateMachine(ServiceStateMachine&) = delete;
     ServiceStateMachine& operator=(ServiceStateMachine&) = delete;
 
@@ -53,17 +60,23 @@ class ServiceStateMachine {
     ServiceStateMachine& operator=(ServiceStateMachine&&) = delete;
 
 public:
-    class Impl;
+    /** Factory function: The only public way to create instances. */
+    static std::shared_ptr<ServiceStateMachine> make(ServiceContext::UniqueClient client) {
+        return std::make_shared<ServiceStateMachine>(PassKeyTag{}, std::move(client));
+    }
 
-    /*
-     * Construct a ServiceStateMachine for a given Client.
-     */
-    ServiceStateMachine(ServiceContext::UniqueClient client);
+    /** Public must use `make` to create instances. */
+    ServiceStateMachine(PassKeyTag, ServiceContext::UniqueClient client);
+
+    ~ServiceStateMachine();
+
+    /** Returns the Client given in the constructor. */
+    Client* client() const;
 
     /*
      * start() schedules a call to _runOnce() in the future.
      */
-    void start(ServiceExecutorContext seCtx);
+    void start();
 
     /*
      * Terminates the associated transport Session, regardless of tags.
@@ -81,13 +94,8 @@ public:
      */
     void terminateIfTagsDontMatch(transport::Session::TagMask tags);
 
-    /*
-     * Sets a function to be called after the session is ended
-     */
-    void setCleanupHook(std::function<void()> hook);
-
 private:
-    std::shared_ptr<Impl> _impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 }  // namespace transport
diff --git a/src/mongo/transport/service_state_machine_test.cpp b/src/mongo/transport/service_state_machine_test.cpp
index b5f67d5fc6c..5d742ca81da 100644
--- a/src/mongo/transport/service_state_machine_test.cpp
+++ b/src/mongo/transport/service_state_machine_test.cpp
@@ -54,6 +54,7 @@
 #include "mongo/unittest/unittest.h"
 #include "mongo/util/assert_util.h"
 #include "mongo/util/clock_source_mock.h"
+#include "mongo/util/concurrency/thread_pool.h"
 #include "mongo/util/producer_consumer_queue.h"
 #include "mongo/util/tick_source_mock.h"
 
@@ -564,7 +565,7 @@ public:
         _fixture->_cleanup(session);
     }
 
-    void onClientDisconnect(Client* client) override {
+    void derivedOnClientDisconnect(Client* client) override {
         invariant(client);
         _fixture->_onClientDisconnect();
     }
diff --git a/src/mongo/unittest/SConscript b/src/mongo/unittest/SConscript
index 30b4d352ac6..7abf76854de 100644
--- a/src/mongo/unittest/SConscript
+++ b/src/mongo/unittest/SConscript
@@ -17,7 +17,6 @@ utEnv.Library(
         'matcher_core.cpp',
         'temp_dir.cpp',
         'unittest.cpp',
-        'unittest.idl',
     ],
     LIBDEPS=[
         '$BUILD_DIR/mongo/base',
diff --git a/src/mongo/unittest/death_test.cpp b/src/mongo/unittest/death_test.cpp
index 44c7b3abe4a..e7a778c97ea 100644
--- a/src/mongo/unittest/death_test.cpp
+++ b/src/mongo/unittest/death_test.cpp
@@ -35,6 +35,7 @@
 
 #include "mongo/bson/json.h"
 #include "mongo/unittest/death_test.h"
+#include "mongo/unittest/temp_dir.h"
 #include "mongo/util/exit_code.h"
 
 #ifndef _WIN32
@@ -120,7 +121,7 @@ void initDeathTest() {
 #ifdef DEATH_TEST_ENABLED
 struct DeathTestBase::Subprocess {
     void run();
-    void execChild();
+    void execChild(std::string tempPath);
     void monitorChild(FILE* fromChild);
     void prepareChild(int (&pipes)[2]);
     void invokeTest();
@@ -199,6 +200,8 @@ void DeathTestBase::Subprocess::run() {
     }
     LOGV2(6186001, "Child", "exec"_attr = doExec);
 
+    TempDir childTempPath{"DeathTestChildTempPath"};
+
     int pipes[2];
     THROWY_LIBC(pipe(pipes));
     if ((child = THROWY_LIBC(fork())) != 0) {
@@ -210,14 +213,15 @@ void DeathTestBase::Subprocess::run() {
         prepareChild(pipes);
         if (doExec) {
             // Go further: fully reboot the child with `execve`.
-            execChild();
+            execChild(childTempPath.release());
         } else {
+            TempDir::setTempPath(childTempPath.release());
             invokeTest();
         }
     }
 }
 
-void DeathTestBase::Subprocess::execChild() {
+void DeathTestBase::Subprocess::execChild(std::string tempPath) {
     auto& spawnInfo = getSpawnInfo();
     std::vector<std::string> av = spawnInfo.argVec;
     // Arrange for the subprocess to execute only this test, exactly once.
@@ -226,9 +230,11 @@ void DeathTestBase::Subprocess::execChild() {
     stripOption(av, "suite");
     stripOption(av, "filter");
     stripOption(av, "filterFileName");
+    stripOption(av, "tempPath");
     const TestInfo* info = UnitTest::getInstance()->currentTestInfo();
     av.push_back("--suite={}"_format(info->suiteName()));
     av.push_back("--filter=^{}$"_format(pcrecpp::RE::QuoteMeta(std::string{info->testName()})));
+    av.push_back("--tempPath={}"_format(tempPath));
     // The presence of this flag is how the test body in the child process knows it's in the
     // child process, and therefore to not exec again. Its value is ignored.
     av.push_back("--internalRunDeathTest=1");
diff --git a/src/mongo/unittest/temp_dir.cpp b/src/mongo/unittest/temp_dir.cpp
index 151250e8d65..22cdad347d6 100644
--- a/src/mongo/unittest/temp_dir.cpp
+++ b/src/mongo/unittest/temp_dir.cpp
@@ -52,26 +52,28 @@ namespace unittest {
 namespace moe = mongo::optionenvironment;
 
 namespace {
-boost::filesystem::path defaultRoot;
+boost::filesystem::path tempPathRoot;
 
-MONGO_INITIALIZER(SetTempDirDefaultRoot)(InitializerContext* context) {
-    if (moe::startupOptionsParsed.count("tempPath")) {
-        defaultRoot = moe::startupOptionsParsed["tempPath"].as<string>();
-    } else {
-        defaultRoot = boost::filesystem::temp_directory_path();
-    }
-
-    if (!boost::filesystem::exists(defaultRoot)) {
+void setTempPathRoot(boost::filesystem::path root) {
+    if (!boost::filesystem::exists(root)) {
         uasserted(ErrorCodes::BadValue,
-                  str::stream() << "Attempted to use a tempPath (" << defaultRoot.string()
+                  str::stream() << "Attempted to use a tempPath (" << root.string()
                                 << ") that doesn't exist");
     }
 
-    if (!boost::filesystem::is_directory(defaultRoot)) {
+    if (!boost::filesystem::is_directory(root)) {
         uasserted(ErrorCodes::BadValue,
-                  str::stream() << "Attempted to use a tempPath (" << defaultRoot.string()
+                  str::stream() << "Attempted to use a tempPath (" << root.string()
                                 << ") that exists, but isn't a directory");
     }
+    tempPathRoot = std::move(root);
+}
+
+
+MONGO_INITIALIZER(SetTempDirDefaultRoot)(InitializerContext*) {
+    if (tempPathRoot.empty()) {
+        setTempPathRoot(boost::filesystem::temp_directory_path());
+    }
 }
 }  // namespace
 
@@ -82,7 +84,7 @@ TempDir::TempDir(const std::string& namePrefix) {
     const boost::filesystem::path dirName =
         boost::filesystem::unique_path(namePrefix + "-%%%%-%%%%-%%%%-%%%%");
 
-    _path = (defaultRoot / dirName).string();
+    _path = (tempPathRoot / dirName).string();
 
     bool createdNewDirectory = boost::filesystem::create_directory(_path);
     if (!createdNewDirectory) {
@@ -94,6 +96,9 @@ TempDir::TempDir(const std::string& namePrefix) {
 }
 
 TempDir::~TempDir() {
+    if (_path.empty())
+        return;
+
     try {
         boost::filesystem::remove_all(_path);
     } catch (const std::exception& e) {
@@ -106,8 +111,7 @@ TempDir::~TempDir() {
 }
 
 void TempDir::setTempPath(string tempPath) {
-    invariant(defaultRoot.empty());
-    defaultRoot = std::move(tempPath);
+    setTempPathRoot(std::move(tempPath));
 }
 
 }  // namespace unittest
diff --git a/src/mongo/unittest/temp_dir.h b/src/mongo/unittest/temp_dir.h
index 561c7266195..711ac9c7f4c 100644
--- a/src/mongo/unittest/temp_dir.h
+++ b/src/mongo/unittest/temp_dir.h
@@ -30,6 +30,7 @@
 #pragma once
 
 #include <string>
+#include <utility>
 
 
 namespace mongo {
@@ -63,14 +64,21 @@ public:
      */
     ~TempDir();
 
+    /**
+     * Release the path encapsulated by this TempDir to be cleaned up by the caller as necessary.
+     *
+     * A released TempDir is left with an empty path, and its destructor will perform no cleanup.
+     */
+    std::string release() noexcept {
+        return std::exchange(_path, {});
+    }
+
     const std::string& path() const {
         return _path;
     }
 
     /**
-     * Set the path where TempDir() will create temporary directories. This is a workaround
-     * for situations where you might want to log, but you've not yet run the MONGO_INITIALIZERs,
-     * and should be removed if ever command line parsing is seperated from MONGO_INITIALIZERs.
+     * Set the path where TempDir() will create temporary directories.
      */
     static void setTempPath(std::string tempPath);
 
diff --git a/src/mongo/unittest/unittest.idl b/src/mongo/unittest/unittest.idl
deleted file mode 100644
index 88a9017969c..00000000000
--- a/src/mongo/unittest/unittest.idl
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (C) 2019-present MongoDB, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the Server Side Public License, version 1,
-# as published by MongoDB, Inc.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# Server Side Public License for more details.
-#
-# You should have received a copy of the Server Side Public License
-# along with this program. If not, see
-# <http://www.mongodb.com/licensing/server-side-public-license>.
-#
-# As a special exception, the copyright holders give permission to link the
-# code of portions of this program with the OpenSSL library under certain
-# conditions as described in each individual source file and distribute
-# linked combinations including the program with the OpenSSL library. You
-# must comply with the Server Side Public License in all respects for
-# all of the code used other than as permitted herein. If you modify file(s)
-# with this exception, you may extend this exception to your version of the
-# file(s), but you are not obligated to do so. If you do not wish to do so,
-# delete this exception statement from your version. If you delete this
-# exception statement from all source files in the program, then also delete
-# it in the license file.
-#
-
-global:
-    cpp_namespace: "mongo::unittest"
-
-configs:
-    tempPath:
-        description: 'Directory to place mongo::TempDir subdirectories'
-        arg_vartype: String
-        source: cli
diff --git a/src/mongo/unittest/unittest_main.cpp b/src/mongo/unittest/unittest_main.cpp
index abdade8a556..063a086a047 100644
--- a/src/mongo/unittest/unittest_main.cpp
+++ b/src/mongo/unittest/unittest_main.cpp
@@ -39,6 +39,7 @@
 #include "mongo/logv2/log_domain_global.h"
 #include "mongo/logv2/log_manager.h"
 #include "mongo/unittest/log_test.h"
+#include "mongo/unittest/temp_dir.h"
 #include "mongo/unittest/unittest.h"
 #include "mongo/unittest/unittest_options_gen.h"
 #include "mongo/util/options_parser/environment.h"
@@ -105,6 +106,10 @@ int main(int argc, char** argv) {
     environment.get("fileNameFilter", &fileNameFilter).ignore();
     environment.get("internalRunDeathTest", &internalRunDeathTest).ignore();
 
+    if (environment.count("tempPath")) {
+        ::mongo::unittest::TempDir::setTempPath(environment["tempPath"].as<std::string>());
+    }
+
     mongo::unittest::getSpawnInfo() = {argVec, internalRunDeathTest, true};
 
     if (std::any_of(verbose.cbegin(), verbose.cend(), [](char ch) { return ch != 'v'; })) {
diff --git a/src/mongo/unittest/unittest_options.idl b/src/mongo/unittest/unittest_options.idl
index 6b928ba5856..6f572a3e961 100644
--- a/src/mongo/unittest/unittest_options.idl
+++ b/src/mongo/unittest/unittest_options.idl
@@ -58,3 +58,6 @@ configs:
     internalRunDeathTest:
         description: "Used internally to resume a death test in the child process."
         arg_vartype: String
+    tempPath:
+        description: 'Directory to place mongo::TempDir subdirectories'
+        arg_vartype: String
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript
index 69e4cf54149..308e3777a44 100644
--- a/src/mongo/util/SConscript
+++ b/src/mongo/util/SConscript
@@ -571,6 +571,34 @@ env.Library(
     ],
 )
 
+pcre_env = env.Clone()
+pcre_env.InjectThirdParty(libraries=['pcre2'])
+pcre_env.Library(
+    target=[
+        'pcre_wrapper',
+    ],
+    source=[
+        'pcre.cpp',
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
+    ],
+    LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/third_party/shim_pcre2',
+    ],
+)
+
+env.Library(
+    target='pcre_util',
+    source=[
+        'pcre_util.cpp',
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
+        'pcre_wrapper',
+    ],
+)
+
 env.Benchmark(
     target='hash_table_bm',
     source='hash_table_bm.cpp',
@@ -707,6 +735,8 @@ icuEnv.CppUnitTest(
         'md5main.cpp',
         'out_of_line_executor_test.cpp',
         'packaged_task_test.cpp',
+        'pcre_test.cpp',
+        'pcre_util_test.cpp',
         'periodic_runner_impl_test.cpp',
         'processinfo_test.cpp',
         'procparser_test.cpp' if env.TargetOSIs('linux') else [],
@@ -743,6 +773,8 @@ icuEnv.CppUnitTest(
         'icu',
         'latch_analyzer' if get_option('use-diagnostic-latches') == 'on' else [],
         'md5',
+        'pcre_util',
+        'pcre_wrapper',
         'periodic_runner_impl',
         'processinfo',
         'procparser' if env.TargetOSIs('linux') else [],
diff --git a/src/mongo/util/assert_util.h b/src/mongo/util/assert_util.h
index e3ca9855b4c..71ed502aecc 100644
--- a/src/mongo/util/assert_util.h
+++ b/src/mongo/util/assert_util.h
@@ -481,12 +481,13 @@ inline void massertStatusOKWithLocation(const Status& status, const char* file,
     }
 }
 
-#define MONGO_BASE_ASSERT_VA_FAILED(fail_func, ...)                                              \
-    do {                                                                                         \
-        [&]() MONGO_COMPILER_COLD_FUNCTION {                                                     \
-            fail_func(::mongo::error_details::makeStatus(__VA_ARGS__), MONGO_SOURCE_LOCATION()); \
-        }();                                                                                     \
-        MONGO_COMPILER_UNREACHABLE;                                                              \
+#define MONGO_BASE_ASSERT_VA_FAILED(fail_func, ...)                                          \
+    do {                                                                                     \
+        auto mongoSourceLocation = MONGO_SOURCE_LOCATION();                                  \
+        [&]() MONGO_COMPILER_COLD_FUNCTION {                                                 \
+            fail_func(::mongo::error_details::makeStatus(__VA_ARGS__), mongoSourceLocation); \
+        }();                                                                                 \
+        MONGO_COMPILER_UNREACHABLE;                                                          \
     } while (false)
 
 #define MONGO_BASE_ASSERT_VA_4(fail_func, code, msg, cond)         \
@@ -709,3 +710,21 @@ Status exceptionToStatus() noexcept;
  * Like `MONGO_UNREACHABLE`, but triggers a `tassert` instead of an `invariant`
  */
 #define MONGO_UNREACHABLE_TASSERT(msgid) tasserted(msgid, "Hit a MONGO_UNREACHABLE_TASSERT!")
+
+/**
+ * Produces an invariant failure if executed. Subset of MONGO_UNREACHABLE, but specifically
+ * to indicate that the program has reached a function that is unimplemented and should be
+ * unreachable from production.
+ * Example:
+ *
+ *   void myFuncToDo() {
+ *       MONGO_UNIMPLEMENTED;
+ *   }
+ */
+#define MONGO_UNIMPLEMENTED \
+    ::mongo::invariantFailed("Hit a MONGO_UNIMPLEMENTED!", __FILE__, __LINE__);
+
+/**
+ * Like `MONGO_UNIMPLEMENTED`, but triggers a `tassert` instead of an `invariant`
+ */
+#define MONGO_UNIMPLEMENTED_TASSERT(msgid) tasserted(msgid, "Hit a MONGO_UNIMPLEMENTED_TASSERT!")
diff --git a/src/mongo/util/assert_util_test.cpp b/src/mongo/util/assert_util_test.cpp
index 8d2688eaacc..ff2f0243aef 100644
--- a/src/mongo/util/assert_util_test.cpp
+++ b/src/mongo/util/assert_util_test.cpp
@@ -336,6 +336,16 @@ DEATH_TEST(TassertTerminationTest, mongoUnreachableNonFatal, "Hit a MONGO_UNREAC
     }
 }
 
+DEATH_TEST_REGEX(TassertTerminationTest,
+                 mongoUnimplementedNonFatal,
+                 "6634500.*Hit a MONGO_UNIMPLEMENTED_TASSERT!") {
+    try {
+        MONGO_UNIMPLEMENTED_TASSERT(6634500);
+    } catch (const DBException&) {
+        // Catch the DBException, to ensure that we eventually abort during clean exit.
+    }
+}
+
 // fassert and its friends
 DEATH_TEST(FassertionTerminationTest, fassert, "40206") {
     fassert(40206, false);
@@ -392,6 +402,10 @@ DEATH_TEST(InvariantTerminationTest, invariantOverload, "Terminating with invari
     invariant(Status(ErrorCodes::InternalError, "Terminating with invariant"));
 }
 
+DEATH_TEST(InvariantTerminationTest, mongoUnimplementedFatal, "Hit a MONGO_UNIMPLEMENTED!") {
+    MONGO_UNIMPLEMENTED;
+}
+
 DEATH_TEST(InvariantTerminationTest, invariantStatusWithOverload, "Terminating with invariant") {
     invariant(StatusWith<std::string>(ErrorCodes::InternalError,
                                       "Terminating with invariantStatusWithOverload"));
diff --git a/src/mongo/util/concurrency/ticketholder.h b/src/mongo/util/concurrency/ticketholder.h
index f521b7989f8..fd51cbd7832 100644
--- a/src/mongo/util/concurrency/ticketholder.h
+++ b/src/mongo/util/concurrency/ticketholder.h
@@ -231,6 +231,9 @@ public:
     }
 
     Ticket& operator=(Ticket&& t) {
+        if (&t == this) {
+            return *this;
+        }
         invariant(!valid(), "Attempting to overwrite a valid ticket with another one");
         _ticketholder = t._ticketholder;
         _admissionContext = t._admissionContext;
diff --git a/src/mongo/util/dns_query_posix-impl.h b/src/mongo/util/dns_query_posix-impl.h
index 8c39084deaf..93431114f7f 100644
--- a/src/mongo/util/dns_query_posix-impl.h
+++ b/src/mongo/util/dns_query_posix-impl.h
@@ -188,7 +188,7 @@ public:
             uasserted(ErrorCodes::DNSProtocolError, "DNS CNAME record could not be decompressed");
         }
 
-        return std::string(&buf[0], length);
+        return std::string(&buf[0]);
     }
 
     DNSQueryType getType() const {
diff --git a/src/mongo/util/pcre.cpp b/src/mongo/util/pcre.cpp
new file mode 100644
index 00000000000..fe0f71fa640
--- /dev/null
+++ b/src/mongo/util/pcre.cpp
@@ -0,0 +1,508 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/util/pcre.h"
+
+#include <fmt/format.h>
+
+#define PCRE2_CODE_UNIT_WIDTH 8  // Select 8-bit PCRE2 library.
+#include <pcre2.h>
+
+#include "mongo/base/error_codes.h"
+#include "mongo/logv2/log.h"
+#include "mongo/util/assert_util.h"
+#include "mongo/util/errno_util.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
+
+namespace mongo::pcre {
+namespace {
+
+using namespace fmt::literals;
+using namespace std::string_literals;
+
+std::string pcre2ErrorMessage(int e) {
+    char buf[120];
+    int len = pcre2_get_error_message(e, reinterpret_cast<PCRE2_UCHAR*>(buf), sizeof(buf));
+    if (len < 0) {
+        return "Failed to get PCRE2 error message for code {}: {}"_format(e, [metaError = len] {
+            switch (metaError) {
+                case PCRE2_ERROR_NOMEMORY:
+                    return "NOMEMORY"s;
+                case PCRE2_ERROR_BADDATA:
+                    return "BADDATA"s;
+                default:
+                    return "code={}"_format(metaError);
+            }
+        }());
+    }
+    return std::string(buf, len);
+}
+
+#define X_(name) std::pair{Errc::name, PCRE2_##name},
+constexpr std::array errTable{MONGO_PCRE_ERROR_EXPAND_TABLE_(X_)};
+#undef X_
+
+Errc toErrc(int e) {
+    if (e == 0)
+        return Errc::OK;
+    auto it =
+        std::find_if(errTable.begin(), errTable.end(), [&](auto&& p) { return e == p.second; });
+    iassert(ErrorCodes::BadValue, "Unknown pcre2 error {}"_format(e), it != errTable.end());
+    return it->first;
+}
+
+int fromErrc(Errc e) {
+    if (e == Errc::OK)
+        return 0;
+    auto it =
+        std::find_if(errTable.begin(), errTable.end(), [&](auto&& p) { return e == p.first; });
+    iassert(ErrorCodes::BadValue,
+            "Unknown pcre::Errc {}"_format(static_cast<int>(e)),
+            it != errTable.end());
+    return it->second;
+}
+
+}  // namespace
+
+inline namespace options {
+#define X_(name) const CompileOptions name{PCRE2_##name};
+MONGO_PCRE_OPTION_EXPAND_TABLE_COMPILE_(X_)
+#undef X_
+
+#define X_(name) const MatchOptions name{PCRE2_##name};
+MONGO_PCRE_OPTION_EXPAND_TABLE_MATCH_(X_)
+#undef X_
+
+#define X_(name) const CompileAndMatchOptions name{PCRE2_##name};
+MONGO_PCRE_OPTION_EXPAND_TABLE_COMPILE_AND_MATCH_(X_)
+#undef X_
+}  // namespace options
+
+const std::error_category& pcreCategory() noexcept {
+    class PcreCategory : public std::error_category {
+    public:
+        const char* name() const noexcept override {
+            return "pcre2";
+        }
+        std::string message(int e) const override {
+            return pcre2ErrorMessage(fromErrc(Errc{e}));
+        }
+    };
+    static StaticImmortal<PcreCategory> singleton{};
+    return *singleton;
+}
+
+namespace detail {
+
+class MatchDataImpl;
+
+// Global. Value is historical carryover from pcre1 and pcrecpp.
+// It's user-facing, so record and enforce its value even if the
+// engine can now support longer patterns.
+inline constexpr size_t kMaxPatternLength = 32761;
+
+/** Wrapper around a pcre2_compile_context. */
+class CompileContext {
+public:
+    CompileContext() {
+        invariant(_ptr);
+    }
+
+    std::error_code setMaxPatternLength(size_t sz) {
+        invariant(_ptr);
+        if (int err = pcre2_set_max_pattern_length(_ptr.get(), sz))
+            return toErrc(err);
+        return {};
+    }
+
+    pcre2_compile_context* get() const {
+        return _ptr.get();
+    }
+
+private:
+    struct D {
+        void operator()(pcre2_compile_context* p) const {
+            pcre2_compile_context_free(p);
+        }
+    };
+    std::unique_ptr<pcre2_compile_context, D> _ptr{pcre2_compile_context_create(nullptr)};
+};
+
+/** Members implement Regex interface and are documented there. */
+class RegexImpl {
+public:
+    RegexImpl(std::string pattern, CompileOptions options) : _pattern{std::move(pattern)} {
+        int err = 0;
+        CompileContext compileContext;
+        if (auto ec = compileContext.setMaxPatternLength(kMaxPatternLength)) {
+            _error = ec;
+            return;
+        }
+        _code = pcre2_compile((const unsigned char*)_pattern.data(),
+                              _pattern.size(),
+                              static_cast<uint32_t>(options),
+                              &err,
+                              &_errorPos,
+                              compileContext.get());
+        if (!_code)
+            _error = toErrc(err);
+    }
+    ~RegexImpl() = default;
+    RegexImpl(const RegexImpl&) = default;
+    RegexImpl& operator=(const RegexImpl&) = default;
+    RegexImpl(RegexImpl&&) = default;
+    RegexImpl& operator=(RegexImpl&&) = default;
+
+    explicit operator bool() const {
+        return !_error;
+    }
+
+    std::error_code error() const {
+        return _error;
+    }
+
+    size_t errorPosition() const {
+        return _errorPos;
+    }
+
+    const std::string& pattern() const {
+        return _pattern;
+    }
+
+    CompileOptions options() const {
+        uint32_t n = 0;
+        if (*this) {
+            int e = pcre2_pattern_info(&*_code, PCRE2_INFO_ARGOPTIONS, &n);
+            iassert(6527603, errorMessage(toErrc(e)), !e);
+        }
+        return CompileOptions{n};
+    }
+
+    size_t captureCount() const {
+        uint32_t n = 0;
+        if (*this) {
+            int e = pcre2_pattern_info(&*_code, PCRE2_INFO_CAPTURECOUNT, &n);
+            iassert(6527604, errorMessage(toErrc(e)), !e);
+        }
+        return n;
+    }
+
+    size_t codeSize() const {
+        size_t tot = sizeof(*this);
+        if (*this) {
+            size_t patSz;
+            if (!pcre2_pattern_info(&*_code, PCRE2_INFO_SIZE, &patSz))
+                tot += patSz;
+        }
+        return tot;
+    }
+
+    MatchData match(std::string input, MatchOptions options, size_t startPos) const;
+    MatchData matchView(StringData input, MatchOptions options, size_t startPos) const;
+
+    int substitute(StringData replacement,
+                   std::string* str,
+                   MatchOptions options,
+                   size_t startPos) const {
+        std::string buf;
+        buf.resize((str->size() + 16) * 2);
+        bool probing = true;
+        int subs;
+        while (true) {
+            MatchOptions trialOptions = options;
+            if (probing)
+                trialOptions |= SUBSTITUTE_OVERFLOW_LENGTH;
+            size_t bufSize = buf.size();
+            subs = pcre2_substitute(&*_code,
+                                    (PCRE2_SPTR)str->c_str(),
+                                    str->size(),
+                                    startPos,
+                                    static_cast<uint32_t>(trialOptions),
+                                    (pcre2_match_data*)nullptr,
+                                    (pcre2_match_context*)nullptr,
+                                    (PCRE2_SPTR)replacement.rawData(),
+                                    replacement.size(),
+                                    (PCRE2_UCHAR*)buf.data(),
+                                    &bufSize);
+            if (subs < 0) {
+                if (probing && subs == PCRE2_ERROR_NOMEMORY) {
+                    probing = false;
+                    buf.resize(bufSize + 1);
+                    continue;
+                }
+                iasserted(ErrorCodes::UnknownError,
+                          "substitute: {}"_format(errorMessage(toErrc(subs))));
+            }
+            buf.resize(bufSize);
+            break;
+        }
+        *str = std::move(buf);
+        return subs;
+    }
+
+    pcre2_code* code() const {
+        return _code;
+    };
+
+private:
+    class CodeHandle {
+    public:
+        CodeHandle() = default;
+        CodeHandle(pcre2_code* code) : _p{code} {}
+        CodeHandle(const CodeHandle& o) : _p{pcre2_code_copy(o._p)} {}
+        CodeHandle& operator=(const CodeHandle& o) {
+            if (this != &o)
+                *this = CodeHandle{o};  // move-assign a forced copy
+            return *this;
+        }
+        CodeHandle(CodeHandle&& o) noexcept : _p{std::exchange(o._p, {})} {}
+        CodeHandle& operator=(CodeHandle&& o) noexcept {
+            using std::swap;
+            swap(_p, o._p);
+            return *this;
+        }
+
+        operator pcre2_code*() const {
+            return _p;
+        }
+
+        ~CodeHandle() {
+            pcre2_code_free(_p);
+        }
+
+    private:
+        pcre2_code* _p = nullptr;
+    };
+
+    MatchData _doMatch(std::unique_ptr<MatchDataImpl> m,
+                       MatchOptions options,
+                       size_t startPos) const;
+
+    std::string _pattern;
+    CodeHandle _code;
+    std::error_code _error;
+    size_t _errorPos;
+};
+
+/** Members implement MatchData interface and are documented there. */
+class MatchDataImpl {
+public:
+    explicit MatchDataImpl(const RegexImpl* regex) : _regex{regex} {}
+
+    explicit operator bool() const {
+        return !_error;
+    }
+
+    size_t captureCount() const {
+        return _regex->captureCount();
+    }
+
+    StringData operator[](size_t i) const {
+        invariant(_data);
+        // Using direct offset vector access. It's pairs of size_t offsets.
+        // Captures can be unpopulated, represented by PCRE2_UNSET elements.
+        size_t* p = pcre2_get_ovector_pointer(&*_data);
+        size_t n = pcre2_get_ovector_count(&*_data);
+        if (!(i < n))
+            iasserted(ErrorCodes::NoSuchKey, "Access element {} of {}"_format(i, n));
+        size_t b = p[2 * i + 0];
+        size_t e = p[2 * i + 1];
+        if (b == PCRE2_UNSET)
+            return {};
+        return StringData(_input.substr(b, e - b));
+    }
+
+    StringData operator[](const std::string& name) const {
+        invariant(*_regex);
+        int rc = pcre2_substring_number_from_name(_regex->code(), (PCRE2_SPTR)name.c_str());
+        if (rc < 0) {
+            iasserted(ErrorCodes::NoSuchKey,
+                      "MatchData[{}]: {}"_format(name, errorMessage(toErrc(rc))));
+        }
+        return (*this)[rc];
+    }
+
+    std::vector<StringData> getMatchList() const {
+        std::vector<StringData> vec;
+        if (*_regex) {
+            for (size_t i = 0; i <= captureCount(); ++i)
+                vec.push_back((*this)[i]);
+        }
+        return vec;
+    }
+
+    std::vector<StringData> getCaptures() const {
+        std::vector<StringData> vec;
+        if (*_regex) {
+            for (size_t i = 1; i <= captureCount(); ++i)
+                vec.push_back((*this)[i]);
+        }
+        return vec;
+    }
+
+    std::error_code error() const {
+        return _error;
+    }
+
+    StringData input() const {
+        return _input;
+    }
+
+    size_t startPos() const {
+        return _startPos;
+    }
+
+    void setInput(std::string s) {
+        _input = _inputStorage = std::move(s);
+    }
+
+    void setInputView(StringData s) {
+        _input = s;
+    }
+
+    pcre2_match_data* matchData() const {
+        return _data.get();
+    }
+
+    void doMatch(MatchOptions options, size_t startPos) {
+        _startPos = startPos;
+        _data.reset(pcre2_match_data_create_from_pattern(_regex->code(), nullptr));
+        int matched = pcre2_match(_regex->code(),
+                                  (PCRE2_SPTR)_input.rawData(),
+                                  _input.size(),
+                                  startPos,
+                                  static_cast<uint32_t>(options),
+                                  _data.get(),
+                                  nullptr);
+        if (matched < 0)
+            _error = toErrc(matched);
+        _highestCaptureIndex = matched;
+    }
+
+private:
+    struct FreeMatchData {
+        void operator()(pcre2_match_data* md) const {
+            pcre2_match_data_free(md);
+        }
+    };
+
+    const RegexImpl* _regex;
+    std::error_code _error;
+    size_t _highestCaptureIndex = size_t(-1);
+    std::string _inputStorage;
+    StringData _input;
+    size_t _startPos = 0;
+    std::unique_ptr<pcre2_match_data, FreeMatchData> _data;
+};
+
+MatchData RegexImpl::match(std::string input, MatchOptions options, size_t startPos) const {
+    auto m = std::make_unique<MatchDataImpl>(this);
+    m->setInput(std::move(input));
+    return _doMatch(std::move(m), options, startPos);
+}
+
+MatchData RegexImpl::matchView(StringData input, MatchOptions options, size_t startPos) const {
+    auto m = std::make_unique<MatchDataImpl>(this);
+    m->setInputView(input);
+    return _doMatch(std::move(m), options, startPos);
+}
+
+MatchData RegexImpl::_doMatch(std::unique_ptr<MatchDataImpl> m,
+                              MatchOptions options,
+                              size_t startPos) const {
+    if (*this)
+        m->doMatch(options, startPos);
+    return MatchData{std::move(m)};
+}
+
+}  // namespace detail
+
+Regex::Regex(std::string pattern, CompileOptions options)
+    : _impl{std::make_unique<detail::RegexImpl>(std::move(pattern), options)} {}
+
+Regex::~Regex() = default;
+
+Regex::Regex(const Regex& that)
+    : _impl{that._impl ? std::make_unique<detail::RegexImpl>(*that._impl) : nullptr} {}
+
+Regex& Regex::operator=(const Regex& that) {
+    if (this != &that)
+        *this = Regex{that};  // move-assign of forced copy
+    return *this;
+}
+
+Regex::Regex(Regex&&) noexcept = default;
+Regex& Regex::operator=(Regex&&) noexcept = default;
+
+MatchData::MatchData(std::unique_ptr<detail::MatchDataImpl> impl) : _impl{std::move(impl)} {}
+MatchData::~MatchData() = default;
+MatchData::MatchData(MatchData&&) noexcept = default;
+MatchData& MatchData::operator=(MatchData&&) noexcept = default;
+
+
+#define UNPAREN_ARGS_(...) __VA_ARGS__
+#define UNPAREN_STRIP_(x) x
+#define UNPAREN(x) UNPAREN_STRIP_(UNPAREN_ARGS_ x)
+
+// Define pimpl-forwarding const member functions.
+#define IFWD(Class, MemberFunc, Ret, Args, FwdArgs) \
+    UNPAREN(Ret) Class::MemberFunc Args const {     \
+        invariant(_impl, "Use after move");         \
+        return _impl->MemberFunc FwdArgs;           \
+    }
+
+IFWD(Regex, pattern, (const std::string&), (), ())
+IFWD(Regex, options, (CompileOptions), (), ())
+IFWD(Regex, operator bool,(), (), ())
+IFWD(Regex, error, (std::error_code), (), ())
+IFWD(Regex, errorPosition, (size_t), (), ())
+IFWD(Regex, captureCount, (size_t), (), ())
+IFWD(Regex, codeSize, (size_t), (), ())
+IFWD(Regex, match, (MatchData), (std::string in, MatchOptions opt, size_t p), (in, opt, p))
+IFWD(Regex, matchView, (MatchData), (StringData in, MatchOptions opt, size_t p), (in, opt, p))
+IFWD(Regex,
+     substitute,
+     (int),
+     (StringData r, std::string* s, MatchOptions o, size_t p),
+     (r, s, o, p))
+
+IFWD(MatchData, operator bool,(), (), ())
+IFWD(MatchData, captureCount, (size_t), (), ())
+IFWD(MatchData, operator[],(StringData), (size_t i), (i))
+IFWD(MatchData, operator[],(StringData), (const std::string& name), (name))
+IFWD(MatchData, getCaptures, (std::vector<StringData>), (), ())
+IFWD(MatchData, getMatchList, (std::vector<StringData>), (), ())
+IFWD(MatchData, error, (std::error_code), (), ())
+IFWD(MatchData, input, (StringData), (), ())
+IFWD(MatchData, startPos, (size_t), (), ())
+
+#undef IFWD
+
+}  // namespace mongo::pcre
diff --git a/src/mongo/util/pcre.h b/src/mongo/util/pcre.h
new file mode 100644
index 00000000000..ff9f705f4fc
--- /dev/null
+++ b/src/mongo/util/pcre.h
@@ -0,0 +1,642 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <vector>
+
+#include "mongo/base/string_data.h"
+
+namespace mongo::pcre {
+
+/*
+ * Mongo's C++ wrapper for the PCRE2 library. Applies mongo-isms like
+ * StringData.
+ *
+ * This wrapper is deliberately low-level and intended to be ignorant of mongo
+ * server code's app-level preferences. It provides only a general-purpose PCRE2
+ * wrapper.
+ *
+ * Care is taken to make this library self-contained through strict
+ * encapsulation. No code depending on this library needs to include the pcre2
+ * header or take on a dependency on the third_party/pcre2 library.
+ */
+
+/**
+ * The complete list of PCRE2 errors, with the `PCRE2_` prefix stripped.
+ * These are publicly available as members of the `Errc` enum class below.
+ * E.g., `mongo::pcre::Errc::ERROR_BAD_SUBPATTERN_REFERENCE`.
+ */
+#define MONGO_PCRE_ERROR_EXPAND_TABLE_(X)        \
+    X(ERROR_END_BACKSLASH)                       \
+    X(ERROR_END_BACKSLASH_C)                     \
+    X(ERROR_UNKNOWN_ESCAPE)                      \
+    X(ERROR_QUANTIFIER_OUT_OF_ORDER)             \
+    X(ERROR_QUANTIFIER_TOO_BIG)                  \
+    X(ERROR_MISSING_SQUARE_BRACKET)              \
+    X(ERROR_ESCAPE_INVALID_IN_CLASS)             \
+    X(ERROR_CLASS_RANGE_ORDER)                   \
+    X(ERROR_QUANTIFIER_INVALID)                  \
+    X(ERROR_INTERNAL_UNEXPECTED_REPEAT)          \
+    X(ERROR_INVALID_AFTER_PARENS_QUERY)          \
+    X(ERROR_POSIX_CLASS_NOT_IN_CLASS)            \
+    X(ERROR_POSIX_NO_SUPPORT_COLLATING)          \
+    X(ERROR_MISSING_CLOSING_PARENTHESIS)         \
+    X(ERROR_BAD_SUBPATTERN_REFERENCE)            \
+    X(ERROR_NULL_PATTERN)                        \
+    X(ERROR_BAD_OPTIONS)                         \
+    X(ERROR_MISSING_COMMENT_CLOSING)             \
+    X(ERROR_PARENTHESES_NEST_TOO_DEEP)           \
+    X(ERROR_PATTERN_TOO_LARGE)                   \
+    X(ERROR_HEAP_FAILED)                         \
+    X(ERROR_UNMATCHED_CLOSING_PARENTHESIS)       \
+    X(ERROR_INTERNAL_CODE_OVERFLOW)              \
+    X(ERROR_MISSING_CONDITION_CLOSING)           \
+    X(ERROR_LOOKBEHIND_NOT_FIXED_LENGTH)         \
+    X(ERROR_ZERO_RELATIVE_REFERENCE)             \
+    X(ERROR_TOO_MANY_CONDITION_BRANCHES)         \
+    X(ERROR_CONDITION_ASSERTION_EXPECTED)        \
+    X(ERROR_BAD_RELATIVE_REFERENCE)              \
+    X(ERROR_UNKNOWN_POSIX_CLASS)                 \
+    X(ERROR_INTERNAL_STUDY_ERROR)                \
+    X(ERROR_UNICODE_NOT_SUPPORTED)               \
+    X(ERROR_PARENTHESES_STACK_CHECK)             \
+    X(ERROR_CODE_POINT_TOO_BIG)                  \
+    X(ERROR_LOOKBEHIND_TOO_COMPLICATED)          \
+    X(ERROR_LOOKBEHIND_INVALID_BACKSLASH_C)      \
+    X(ERROR_UNSUPPORTED_ESCAPE_SEQUENCE)         \
+    X(ERROR_CALLOUT_NUMBER_TOO_BIG)              \
+    X(ERROR_MISSING_CALLOUT_CLOSING)             \
+    X(ERROR_ESCAPE_INVALID_IN_VERB)              \
+    X(ERROR_UNRECOGNIZED_AFTER_QUERY_P)          \
+    X(ERROR_MISSING_NAME_TERMINATOR)             \
+    X(ERROR_DUPLICATE_SUBPATTERN_NAME)           \
+    X(ERROR_INVALID_SUBPATTERN_NAME)             \
+    X(ERROR_UNICODE_PROPERTIES_UNAVAILABLE)      \
+    X(ERROR_MALFORMED_UNICODE_PROPERTY)          \
+    X(ERROR_UNKNOWN_UNICODE_PROPERTY)            \
+    X(ERROR_SUBPATTERN_NAME_TOO_LONG)            \
+    X(ERROR_TOO_MANY_NAMED_SUBPATTERNS)          \
+    X(ERROR_CLASS_INVALID_RANGE)                 \
+    X(ERROR_OCTAL_BYTE_TOO_BIG)                  \
+    X(ERROR_INTERNAL_OVERRAN_WORKSPACE)          \
+    X(ERROR_INTERNAL_MISSING_SUBPATTERN)         \
+    X(ERROR_DEFINE_TOO_MANY_BRANCHES)            \
+    X(ERROR_BACKSLASH_O_MISSING_BRACE)           \
+    X(ERROR_INTERNAL_UNKNOWN_NEWLINE)            \
+    X(ERROR_BACKSLASH_G_SYNTAX)                  \
+    X(ERROR_PARENS_QUERY_R_MISSING_CLOSING)      \
+    X(ERROR_VERB_ARGUMENT_NOT_ALLOWED)           \
+    X(ERROR_VERB_UNKNOWN)                        \
+    X(ERROR_SUBPATTERN_NUMBER_TOO_BIG)           \
+    X(ERROR_SUBPATTERN_NAME_EXPECTED)            \
+    X(ERROR_INTERNAL_PARSED_OVERFLOW)            \
+    X(ERROR_INVALID_OCTAL)                       \
+    X(ERROR_SUBPATTERN_NAMES_MISMATCH)           \
+    X(ERROR_MARK_MISSING_ARGUMENT)               \
+    X(ERROR_INVALID_HEXADECIMAL)                 \
+    X(ERROR_BACKSLASH_C_SYNTAX)                  \
+    X(ERROR_BACKSLASH_K_SYNTAX)                  \
+    X(ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS)       \
+    X(ERROR_BACKSLASH_N_IN_CLASS)                \
+    X(ERROR_CALLOUT_STRING_TOO_LONG)             \
+    X(ERROR_UNICODE_DISALLOWED_CODE_POINT)       \
+    X(ERROR_UTF_IS_DISABLED)                     \
+    X(ERROR_UCP_IS_DISABLED)                     \
+    X(ERROR_VERB_NAME_TOO_LONG)                  \
+    X(ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG)      \
+    X(ERROR_MISSING_OCTAL_OR_HEX_DIGITS)         \
+    X(ERROR_VERSION_CONDITION_SYNTAX)            \
+    X(ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS)      \
+    X(ERROR_CALLOUT_NO_STRING_DELIMITER)         \
+    X(ERROR_CALLOUT_BAD_STRING_DELIMITER)        \
+    X(ERROR_BACKSLASH_C_CALLER_DISABLED)         \
+    X(ERROR_QUERY_BARJX_NEST_TOO_DEEP)           \
+    X(ERROR_BACKSLASH_C_LIBRARY_DISABLED)        \
+    X(ERROR_PATTERN_TOO_COMPLICATED)             \
+    X(ERROR_LOOKBEHIND_TOO_LONG)                 \
+    X(ERROR_PATTERN_STRING_TOO_LONG)             \
+    X(ERROR_INTERNAL_BAD_CODE)                   \
+    X(ERROR_INTERNAL_BAD_CODE_IN_SKIP)           \
+    X(ERROR_NO_SURROGATES_IN_UTF16)              \
+    X(ERROR_BAD_LITERAL_OPTIONS)                 \
+    X(ERROR_SUPPORTED_ONLY_IN_UNICODE)           \
+    X(ERROR_INVALID_HYPHEN_IN_OPTIONS)           \
+    X(ERROR_ALPHA_ASSERTION_UNKNOWN)             \
+    X(ERROR_SCRIPT_RUN_NOT_AVAILABLE)            \
+    X(ERROR_TOO_MANY_CAPTURES)                   \
+    X(ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED) \
+    X(ERROR_BACKSLASH_K_IN_LOOKAROUND)           \
+    X(ERROR_NOMATCH)                             \
+    X(ERROR_PARTIAL)                             \
+    X(ERROR_UTF8_ERR1)                           \
+    X(ERROR_UTF8_ERR2)                           \
+    X(ERROR_UTF8_ERR3)                           \
+    X(ERROR_UTF8_ERR4)                           \
+    X(ERROR_UTF8_ERR5)                           \
+    X(ERROR_UTF8_ERR6)                           \
+    X(ERROR_UTF8_ERR7)                           \
+    X(ERROR_UTF8_ERR8)                           \
+    X(ERROR_UTF8_ERR9)                           \
+    X(ERROR_UTF8_ERR10)                          \
+    X(ERROR_UTF8_ERR11)                          \
+    X(ERROR_UTF8_ERR12)                          \
+    X(ERROR_UTF8_ERR13)                          \
+    X(ERROR_UTF8_ERR14)                          \
+    X(ERROR_UTF8_ERR15)                          \
+    X(ERROR_UTF8_ERR16)                          \
+    X(ERROR_UTF8_ERR17)                          \
+    X(ERROR_UTF8_ERR18)                          \
+    X(ERROR_UTF8_ERR19)                          \
+    X(ERROR_UTF8_ERR20)                          \
+    X(ERROR_UTF8_ERR21)                          \
+    X(ERROR_UTF16_ERR1)                          \
+    X(ERROR_UTF16_ERR2)                          \
+    X(ERROR_UTF16_ERR3)                          \
+    X(ERROR_UTF32_ERR1)                          \
+    X(ERROR_UTF32_ERR2)                          \
+    X(ERROR_BADDATA)                             \
+    X(ERROR_MIXEDTABLES)                         \
+    X(ERROR_BADMAGIC)                            \
+    X(ERROR_BADMODE)                             \
+    X(ERROR_BADOFFSET)                           \
+    X(ERROR_BADOPTION)                           \
+    X(ERROR_BADREPLACEMENT)                      \
+    X(ERROR_BADUTFOFFSET)                        \
+    X(ERROR_CALLOUT)                             \
+    X(ERROR_DFA_BADRESTART)                      \
+    X(ERROR_DFA_RECURSE)                         \
+    X(ERROR_DFA_UCOND)                           \
+    X(ERROR_DFA_UFUNC)                           \
+    X(ERROR_DFA_UITEM)                           \
+    X(ERROR_DFA_WSSIZE)                          \
+    X(ERROR_INTERNAL)                            \
+    X(ERROR_JIT_BADOPTION)                       \
+    X(ERROR_JIT_STACKLIMIT)                      \
+    X(ERROR_MATCHLIMIT)                          \
+    X(ERROR_NOMEMORY)                            \
+    X(ERROR_NOSUBSTRING)                         \
+    X(ERROR_NOUNIQUESUBSTRING)                   \
+    X(ERROR_NULL)                                \
+    X(ERROR_RECURSELOOP)                         \
+    X(ERROR_DEPTHLIMIT)                          \
+    X(ERROR_RECURSIONLIMIT)                      \
+    X(ERROR_UNAVAILABLE)                         \
+    X(ERROR_UNSET)                               \
+    X(ERROR_BADOFFSETLIMIT)                      \
+    X(ERROR_BADREPESCAPE)                        \
+    X(ERROR_REPMISSINGBRACE)                     \
+    X(ERROR_BADSUBSTITUTION)                     \
+    X(ERROR_BADSUBSPATTERN)                      \
+    X(ERROR_TOOMANYREPLACE)                      \
+    X(ERROR_BADSERIALIZEDDATA)                   \
+    X(ERROR_HEAPLIMIT)                           \
+    X(ERROR_CONVERT_SYNTAX)                      \
+    X(ERROR_INTERNAL_DUPMATCH)                   \
+    X(ERROR_DFA_UINVALID_UTF)                    \
+    /**/
+
+/**
+ * These values are usable as `CompileOptions` OR `MatchOptions`.
+ * See `CompileAndMatchOptions` below.
+ */
+#define MONGO_PCRE_OPTION_EXPAND_TABLE_COMPILE_AND_MATCH_(X) \
+    X(ANCHORED)                                              \
+    X(NO_UTF_CHECK)                                          \
+    X(ENDANCHORED)                                           \
+    /**/
+
+/** Options for the `Regex` constructor. See `CompileOptions`. */
+#define MONGO_PCRE_OPTION_EXPAND_TABLE_COMPILE_(X) \
+    X(ALLOW_EMPTY_CLASS)                           \
+    X(ALT_BSUX)                                    \
+    X(AUTO_CALLOUT)                                \
+    X(CASELESS)                                    \
+    X(DOLLAR_ENDONLY)                              \
+    X(DOTALL)                                      \
+    X(DUPNAMES)                                    \
+    X(EXTENDED)                                    \
+    X(FIRSTLINE)                                   \
+    X(MATCH_UNSET_BACKREF)                         \
+    X(MULTILINE)                                   \
+    X(NEVER_UCP)                                   \
+    X(NEVER_UTF)                                   \
+    X(NO_AUTO_CAPTURE)                             \
+    X(NO_AUTO_POSSESS)                             \
+    X(NO_DOTSTAR_ANCHOR)                           \
+    X(NO_START_OPTIMIZE)                           \
+    X(UCP)                                         \
+    X(UNGREEDY)                                    \
+    X(UTF)                                         \
+    X(NEVER_BACKSLASH_C)                           \
+    X(ALT_CIRCUMFLEX)                              \
+    X(ALT_VERBNAMES)                               \
+    X(USE_OFFSET_LIMIT)                            \
+    X(EXTENDED_MORE)                               \
+    X(LITERAL)                                     \
+    X(MATCH_INVALID_UTF)                           \
+    /**/
+
+/** Options for match and/or substitute calls. See `MatchOptions`. */
+#define MONGO_PCRE_OPTION_EXPAND_TABLE_MATCH_(X) \
+    X(NOTBOL)                                    \
+    X(NOTEOL)                                    \
+    X(NOTEMPTY)                                  \
+    X(NOTEMPTY_ATSTART)                          \
+    X(PARTIAL_SOFT)                              \
+    X(PARTIAL_HARD)                              \
+    X(DFA_RESTART)                               \
+    X(DFA_SHORTEST)                              \
+    X(SUBSTITUTE_GLOBAL)                         \
+    X(SUBSTITUTE_EXTENDED)                       \
+    X(SUBSTITUTE_UNSET_EMPTY)                    \
+    X(SUBSTITUTE_UNKNOWN_UNSET)                  \
+    X(SUBSTITUTE_OVERFLOW_LENGTH)                \
+    X(NO_JIT)                                    \
+    X(COPY_MATCHED_SUBJECT)                      \
+    X(SUBSTITUTE_LITERAL)                        \
+    X(SUBSTITUTE_MATCHED)                        \
+    X(SUBSTITUTE_REPLACEMENT_ONLY)               \
+    /**/
+
+#if 0
+/** Extended compile options for the compile context. Not yet used. */
+#define MONGO_PCRE_OPTION_EXPAND_TABLE_EXTRA_(X) \
+    X(EXTRA_ALLOW_SURROGATE_ESCAPES)             \
+    X(EXTRA_BAD_ESCAPE_IS_LITERAL)               \
+    X(EXTRA_MATCH_WORD)                          \
+    X(EXTRA_MATCH_LINE)                          \
+    X(EXTRA_ESCAPED_CR_IS_LF)                    \
+    X(EXTRA_ALT_BSUX)                            \
+    X(EXTRA_ALLOW_LOOKAROUND_BSK)                \
+    /**/
+#endif  // 0
+
+/**
+ * The `std::error_code` enum type for the PCRE2 error number space.
+ * `std::is_error_code_enum` is specialized for this enum type, which enables
+ * `Errc` to be implicitly convertible to `std::error_code`.
+ */
+enum class Errc : int {
+    OK = 0,
+#define X_(name) name,
+    MONGO_PCRE_ERROR_EXPAND_TABLE_(X_)
+#undef X_
+};
+
+/** Category for a pcre2 API error code. */
+const std::error_category& pcreCategory() noexcept;
+
+/** Wrap a pcre2 API error code in a std::error_code{e,errorCategory()}. */
+inline std::error_code pcreError(int e) noexcept {
+    return std::error_code(e, pcreCategory());
+}
+
+/**
+ * Creates a `std::error_code` from an `Errc`.
+ * An implicit `std::error_code` constructor finds this function by ADL.
+ */
+inline std::error_code make_error_code(Errc e) noexcept {
+    return pcreError(static_cast<std::underlying_type_t<Errc>>(e));
+}
+
+namespace detail {
+/**
+ * A typesafe wrapper around `uint32_t`, representing the bitfields of PCRE2
+ * options. A CRTP base class for the `Options` types.
+ */
+template <typename D>
+class Options {
+public:
+    constexpr Options() noexcept = default;
+    constexpr explicit Options(uint32_t v) noexcept : _v{v} {}
+    constexpr explicit operator uint32_t() const noexcept {
+        return _v;
+    }
+    constexpr explicit operator bool() const noexcept {
+        return _v;
+    }
+    constexpr friend D& operator|=(D& a, D b) noexcept {
+        return a = D{a._v | b._v};
+    }
+    constexpr friend D& operator&=(D& a, D b) noexcept {
+        return a = D{a._v & b._v};
+    }
+    constexpr friend D operator~(D a) noexcept {
+        return D{~a._v};
+    }
+    constexpr friend D operator|(D a, D b) noexcept {
+        return a |= b;
+    }
+    constexpr friend D operator&(D a, D b) noexcept {
+        return a &= b;
+    }
+
+private:
+    uint32_t _v = 0;
+};
+}  // namespace detail
+
+/** The bitfield of Regex compile (constructor) options. */
+class CompileOptions : public detail::Options<CompileOptions> {
+public:
+    using detail::Options<CompileOptions>::Options;
+};
+
+/** The bitfield of `Regex` match options. */
+class MatchOptions : public detail::Options<MatchOptions> {
+public:
+    using detail::Options<MatchOptions>::Options;
+};
+
+/**
+ * A few of the PCRE2 options' bit positions are usable as compile options OR
+ * match options. We model this by making this type implicitly convertible to
+ * both `CompileOptions` and `MatchOptions`.
+ */
+class CompileAndMatchOptions : public detail::Options<CompileAndMatchOptions> {
+public:
+    using detail::Options<CompileAndMatchOptions>::Options;
+    constexpr operator CompileOptions() const noexcept {
+        return CompileOptions{uint32_t{*this}};
+    }
+    constexpr operator MatchOptions() const noexcept {
+        return MatchOptions{uint32_t{*this}};
+    }
+};
+
+/**
+ * @{
+ * The `CompileOptions`, `MatchOptions`, and `CompileAndMatchOptions` values
+ * are declared as `extern const` and not as more basic enum or constexpr values.
+ * This arrangement allows the variables to be given definitions in the
+ * pcre.cpp file, where the pcre2 library's corresponding macros are available
+ * to their initializers.
+ *
+ * It can be assumed that these variables have static constant initialization.
+ * That is, they are available for use in static initializers.
+ *
+ * The options values are given an inilne namespace so they can be brought into
+ * a local scope with a `using namespace pcre::options;` directive.
+ */
+inline namespace options {
+#define X_(name) extern const CompileOptions name;
+MONGO_PCRE_OPTION_EXPAND_TABLE_COMPILE_(X_)
+#undef X_
+
+#define X_(name) extern const MatchOptions name;
+MONGO_PCRE_OPTION_EXPAND_TABLE_MATCH_(X_)
+#undef X_
+
+#define X_(name) extern const CompileAndMatchOptions name;
+MONGO_PCRE_OPTION_EXPAND_TABLE_COMPILE_AND_MATCH_(X_)
+#undef X_
+}  // namespace options
+/** @} */
+
+class MatchData;
+
+namespace detail {
+class RegexImpl;
+class MatchDataImpl;
+}  // namespace detail
+
+/**
+ * Wrapper class encapsulating the PCRE2 regular expression library.
+ * See https://www.pcre.org/current/doc/html/
+ */
+class Regex {
+public:
+    Regex(std::string pattern, CompileOptions options);
+
+    explicit Regex(std::string pattern) : Regex{std::move(pattern), CompileOptions{}} {}
+
+    ~Regex();
+
+    Regex(const Regex&);
+
+    Regex& operator=(const Regex&);
+
+    Regex(Regex&&) noexcept;
+
+    Regex& operator=(Regex&&) noexcept;
+
+    /** The pattern string from the constructor. */
+    const std::string& pattern() const;
+
+    /** The Options from the constructor. */
+    CompileOptions options() const;
+
+    /** True if this Regex was created without error. */
+    explicit operator bool() const;
+
+    /** The error saved from the compile of this Regex. */
+    std::error_code error() const;
+
+    /** Position in the pattern at which the compile `error` occurred. */
+    size_t errorPosition() const;
+
+    /** Count of subpattern captures in this pattern. */
+    size_t captureCount() const;
+
+    /** The size of the compiled regex. */
+    size_t codeSize() const;
+
+    /**
+     * Creates a MatchData by applying this regex to an `input` string.
+     *
+     * Options supplied at `match` time cannot be optimized as well as behaviors
+     * like '^', '$' built into the Regex directly.
+     */
+    MatchData match(std::string input, MatchOptions options, size_t startPos) const;
+    MatchData match(std::string input, MatchOptions options) const;
+    MatchData match(std::string input) const;
+
+    /** Can avoid a string copy when input will outlive the returned MatchData. */
+    MatchData matchView(StringData input, MatchOptions options, size_t startPos) const;
+    MatchData matchView(StringData input, MatchOptions options) const;
+    MatchData matchView(StringData input) const;
+
+    /**
+     * True if all of `input` matches.
+     * If possible, add '^' and '$' to the `Regex` pattern instead, as this
+     * optimizes better than match-supplied options.
+     *
+     * Legacy: prefer `Regex::matchView` with `ANCHOR|ENDANCHOR` options.
+     */
+    bool fullMatch(StringData input) const;
+
+    /**
+     * True if a substring of `input` matches.
+     * Note that PCRE2 documentation uses the term "partial match" to mean
+     * something very different.
+     *
+     * Legacy: prefer `Regex::matchView`.
+     */
+    bool partialMatch(StringData input) const;
+
+    /**
+     * Replaces occurrences in `str` of this pattern with `replacement`.
+     * Additional substitute `options` can change behavior. Important ones:
+     *
+     *  - SUBSTITUTE_GLOBAL: Replace all occurrences
+     *  - SUBSTITUTE_LITERAL: No $ variable expansions in replacement
+     *  - SUBSTITUTE_EXTENDED: Better escapes, bash-like substitutions
+     *
+     * See https://www.pcre.org/current/doc/html/pcre2api.html#SEC36
+     */
+    int substitute(StringData replacement,
+                   std::string* str,
+                   MatchOptions options = {},
+                   size_t startPos = 0) const;
+
+private:
+    std::unique_ptr<detail::RegexImpl> _impl;
+};
+
+/**
+ * Represents the result of a Regex match (or matchView operation).
+ * The MatchData refers to the Regex that produced it, so the Regex
+ * must outlive any MatchData it produces.
+ */
+class MatchData {
+public:
+    /** Implementation detail used by Regex to create these objects. */
+    explicit MatchData(std::unique_ptr<detail::MatchDataImpl> impl);
+
+    ~MatchData();
+
+    MatchData(MatchData&&) noexcept;
+    MatchData& operator=(MatchData&&) noexcept;
+
+    /** True if the match succeeded. */
+    explicit operator bool() const;
+
+    /**
+     * Returns the number of subpatterns captured by this match,
+     * Does not count the `m[0]` element: only subpattern captures.
+     */
+    size_t captureCount() const;
+
+    /**
+     * @{
+     * Returns a match group by index or by name. Element zero (by index) is
+     * the full matched substring, followed by captures.
+     *
+     * An empty and null capture are slightly different and can be
+     * distinguished by their rawData pointer. The difference doesn't
+     * often matter though. E.g.,
+     *    Regex{"(A|B(C))"}.match("A")[2].rawData() == nullptr
+     * because capture group 2 (the `C`) was on the inactive `B` branch.
+     * Throws `ExceptionFor<NoSuchKey>` if capture not found.
+     * Requires `i <= captureCount()`.
+     */
+    StringData operator[](size_t i) const;
+    StringData operator[](const std::string& name) const;
+    /** @} */
+
+    /**
+     * All capture groups. For MatchData `m`:
+     *     {m[1]... m[captureCount()]};
+     */
+    std::vector<StringData> getCaptures() const;
+
+    /** Same as `getCaptures`, but as `std::vector<std::string>`. */
+    std::vector<std::string> getCapturesStrings() const {
+        return _strVec(getCaptures());
+    }
+
+    /**
+     * The matched substring, followed by the `getCaptures()` list.
+     * For MatchData `m`:
+     *     {m[0], m[1]... m[m.captureCount()]};
+     */
+    std::vector<StringData> getMatchList() const;
+
+    /** Same as `getMatchList`, but as `std::vector<std::string>`. */
+    std::vector<std::string> getMatchListStrings() const {
+        return _strVec(getMatchList());
+    }
+
+    /** Error saved from the match that created this object. */
+    std::error_code error() const;
+
+    /**
+     * The input to the match that created this object. If this MatchData was
+     * created by a `match` call, the `input` refers to a string owned by this
+     * object. If this MatchData was created by a `matchView` call, then this
+     * `input` result refers to the StringData provided to it.
+     */
+    StringData input() const;
+
+    size_t startPos() const;
+
+private:
+    static std::vector<std::string> _strVec(const std::vector<StringData>& v) {
+        std::vector<std::string> r;
+        r.reserve(v.size());
+        for (StringData s : v)
+            r.push_back(std::string{s});
+        return r;
+    }
+
+    std::unique_ptr<detail::MatchDataImpl> _impl;
+};
+
+inline MatchData Regex::match(std::string input, MatchOptions options) const {
+    return match(std::move(input), options, 0);
+}
+inline MatchData Regex::match(std::string input) const {
+    return match(std::move(input), MatchOptions{}, 0);
+}
+inline MatchData Regex::matchView(StringData input, MatchOptions options) const {
+    return matchView(input, options, 0);
+}
+inline MatchData Regex::matchView(StringData input) const {
+    return matchView(input, MatchOptions{}, 0);
+}
+
+inline bool Regex::fullMatch(StringData input) const {
+    return !matchView(input, ANCHORED | ENDANCHORED).error();
+}
+
+inline bool Regex::partialMatch(StringData input) const {
+    return !matchView(input).error();
+}
+
+}  // namespace mongo::pcre
+
+namespace std {
+template <>
+struct is_error_code_enum<mongo::pcre::Errc> : std::true_type {};
+}  // namespace std
diff --git a/src/mongo/util/pcre_test.cpp b/src/mongo/util/pcre_test.cpp
new file mode 100644
index 00000000000..75e2a575d65
--- /dev/null
+++ b/src/mongo/util/pcre_test.cpp
@@ -0,0 +1,358 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/util/pcre.h"
+
+#include <fmt/format.h>
+
+#include "mongo/unittest/assert_that.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::pcre {
+namespace {
+using namespace fmt::literals;
+using namespace std::string_literals;
+using namespace unittest::match;
+
+/**
+ * In C++20, u8 literals yield char8_t[N].
+ * These require explicit conversion to `std::string` and `StringData`.
+ */
+template <typename Out, typename Ch, size_t N>
+Out u8Cast(const Ch (&in)[N]) {
+    const Ch* inp = in;
+    auto cp = reinterpret_cast<const char*>(inp);
+    return Out{cp, cp + N - 1};
+}
+
+TEST(PcreTest, GoodPatterns) {
+    const char* goodPatterns[] = {
+        "hi",
+        "h(i)",
+    };
+    for (auto p : goodPatterns)
+        ASSERT_TRUE(!!Regex{p});
+}
+
+TEST(PcreTest, BadPatterns) {
+    struct {
+        std::string in;
+        std::error_code err;
+    } badPatterns[]{
+        {"h(", Errc::ERROR_MISSING_CLOSING_PARENTHESIS},
+        {"h)", Errc::ERROR_UNMATCHED_CLOSING_PARENTHESIS},
+        {"h\\", Errc::ERROR_END_BACKSLASH},
+    };
+    for (auto [in, err] : badPatterns) {
+        Regex re{in};
+        ASSERT_FALSE(!!re);
+        ASSERT_EQ(re.error(), err);
+    }
+}
+
+TEST(PcreTest, RegexCopyConstruct) {
+    Regex abc("ab*c");
+    Regex re = abc;
+    ASSERT(re);
+    ASSERT_EQ(re.pattern(), "ab*c");
+    ASSERT_TRUE(re.matchView("abbbc"));
+    ASSERT_FALSE(re.matchView("def"));
+}
+
+TEST(PcreTest, RegexCopyAssign) {
+    Regex re("ab*c");
+    Regex def("de*f");
+    ASSERT_TRUE(re);
+    ASSERT_TRUE(re.matchView("abbbc"));
+    ASSERT_FALSE(re.matchView("deeef"));
+    re = def;
+    ASSERT_TRUE(def);
+    ASSERT_TRUE(re);
+    ASSERT_FALSE(re.matchView("abbbc"));
+    ASSERT_TRUE(re.matchView("deeef"));
+}
+
+TEST(PcreTest, RegexMoveConstruct) {
+    Regex src("ab*c");
+    Regex dst = std::move(src);
+    ASSERT_TRUE(dst);
+    ASSERT_TRUE(dst.matchView("abbbc"));
+    ASSERT_FALSE(dst.matchView("def"));
+}
+
+TEST(PcreTest, RegexMoveAssign) {
+    Regex dst("ab*c");
+    Regex src("de*f");
+    dst = std::move(src);  // move-assign
+    ASSERT_TRUE(dst);
+    ASSERT_FALSE(dst.matchView("abbbc"));
+    ASSERT_TRUE(dst.matchView("deeef"));
+    {
+        // Moved-from Regex can be assigned to, and can be destroyed.
+        Regex other("gh*i");
+        src = std::move(other);
+        ASSERT_TRUE(src);
+        ASSERT_FALSE(src.matchView("deeef"));
+        ASSERT_TRUE(src.matchView("ghhhi"));
+    }
+}
+
+TEST(PcreTest, CodeSize) {
+    auto reSize = [](std::string p) { return Regex{std::move(p)}.codeSize(); };
+    ASSERT_LT(reSize(""), reSize("hi"));
+    ASSERT_LT(reSize("hi"), reSize("^(hi)*|(\\d{45})$"));
+}
+
+TEST(PcreTest, MatchView) {
+    Regex re{"hi"};
+    ASSERT_EQ(re.matchView("hi").error(), std::error_code{});
+    ASSERT_EQ(re.matchView("hello").error(), Errc::ERROR_NOMATCH);
+    ASSERT_EQ(re.matchView("thigh").error(), std::error_code{});
+}
+
+// While `match` copies the input and results refer to the copy, `matchView`
+// results refer to the input directly.
+TEST(PcreTest, MatchDataInputStorage) {
+    Regex re{"hi"};
+    const std::string in = "i";
+    ASSERT_NE(re.match(in).input().rawData(), in.data());
+    ASSERT_EQ(re.matchView(in).input().rawData(), in.data());
+}
+
+TEST(PcreTest, StartPos) {
+    Regex hiRe{"hi"};
+    Regex hiRePrefix{"^hi"};
+    StringData ohi = "ohi"_sd;
+    StringData hi = ohi.substr(1);
+
+    ASSERT_TRUE(hiRe.matchView(hi, {}, 0));
+    ASSERT_FALSE(hiRe.matchView(hi, {}, 1));
+
+    ASSERT_TRUE(hiRe.matchView(ohi, {}, 0));
+    ASSERT_TRUE(hiRe.matchView(ohi, {}, 1));
+
+    // PCRE2 checks the startPos range internally.
+    ASSERT_EQ(hiRe.matchView(ohi, {}, 3).error(), Errc::ERROR_NOMATCH);
+    ASSERT_EQ(hiRe.matchView(ohi, {}, 4).error(), Errc::ERROR_BADOFFSET);
+
+    // Using startPos=1 is different from startPos=0 on a substring.
+    ASSERT_TRUE(hiRePrefix.matchView(hi, {}, 0));
+    ASSERT_FALSE(hiRePrefix.matchView(ohi, {}, 1));
+
+    // `MatchData` retains the `startPos` from the match call.
+    for (size_t i = 0; i != ohi.size(); ++i)
+        ASSERT_EQ(hiRe.matchView(ohi, {}, i).startPos(), i) << " i="_format(i);
+}
+
+TEST(PcreTest, FullMatch) {
+    Regex re{"hi"};
+    ASSERT_FALSE(re.fullMatch("hello"));
+    ASSERT_TRUE(re.fullMatch("hi"));
+    ASSERT_FALSE(re.fullMatch("hii"));
+    ASSERT_FALSE(re.fullMatch("hhi"));
+}
+
+TEST(PcreTest, PartialMatch) {
+    Regex re{"abc"};
+    ASSERT_FALSE(re.partialMatch(""));
+    ASSERT_FALSE(re.partialMatch("a"));
+    ASSERT_FALSE(re.partialMatch("bc"));
+    ASSERT_TRUE(re.partialMatch("abc"));
+    ASSERT_TRUE(re.partialMatch("zabc"));
+    ASSERT_TRUE(re.partialMatch("abcz"));
+}
+
+TEST(PcreTest, CompileOptions) {
+    std::string pattern = "a.b";
+    std::array subjects{"a\nb"s, "A_b"s, "A\nb"s};
+    struct Spec {
+        CompileOptions opt;
+        std::array<bool, 3> outMatch;
+    };
+    for (auto&& [opt, outMatch] : {
+             Spec{{}, {0, 0, 0}},                 //
+             Spec{DOTALL, {1, 0, 0}},             //
+             Spec{CASELESS, {0, 1, 0}},           //
+             Spec{DOTALL | CASELESS, {1, 1, 1}},  //
+         }) {
+        Regex re{pattern, opt};
+        for (size_t i = 0; i < subjects.size(); ++i)
+            ASSERT_EQ(re.fullMatch(subjects[i]), outMatch[i])
+                << "opt={}, subject={}"_format(uint32_t(opt), subjects[i]);
+    }
+}
+
+TEST(PcreTest, CaptureCount) {
+    auto count = [](std::string p) {
+        Regex re(std::move(p));
+        ASSERT_TRUE(!!re) << errorMessage(re.error());
+        return re.captureCount();
+    };
+    ASSERT_EQ(count("hi"), 0) << "none";
+    ASSERT_EQ(count("()"), 1) << "empty";
+    ASSERT_EQ(count("a(b*)c"), 1) << "single";
+    ASSERT_EQ(count("(\\d*):(\\w*)"), 2) << "sequential";
+    ASSERT_EQ(count("(\\d*|(b*))c"), 2) << "nested";
+    ASSERT_EQ(count("a(?:b|c)d"), 0) << "solely non-capturing group";
+    ASSERT_EQ(count("a(?:b|(?:c*))d"), 0) << "multiple non-capturing groups";
+    ASSERT_EQ(count("a(?:b|(c*))d"), 1) << "mix of capturing and non-capturing groups";
+}
+
+TEST(PcreTest, Captures) {
+    Regex re("a(b*)c");
+    ASSERT_EQ(re.captureCount(), 1);
+    auto subject = "123abbbc456"_sd;
+    auto m = re.matchView(subject);
+    ASSERT_EQ(m.captureCount(), 1);
+    ASSERT_TRUE(!!m);
+    ASSERT_EQ(m[0], "abbbc");
+    ASSERT_EQ(m[0].rawData(), subject.rawData() + 3);
+    ASSERT_EQ(m[1], "bbb");
+    ASSERT_EQ(m[1].rawData(), subject.rawData() + 4);
+    ASSERT_THROWS(m[2], ExceptionFor<ErrorCodes::NoSuchKey>);
+}
+
+TEST(PcreTest, SkippedCapture) {
+    Regex re("the ((red|white) (king|queen))");
+    ASSERT_THAT(re.matchView("the red queen").getMatchList(),
+                ElementsAre(Eq("the red queen"), Eq("red queen"), Eq("red"), Eq("queen")));
+    // Same, but second capture group is skipped.
+    Regex reWithSkip("the ((?:red|white) (king|queen))");
+    ASSERT_THAT(reWithSkip.matchView("the white queen").getMatchList(),
+                ElementsAre(Eq("the white queen"), Eq("white queen"), Eq("queen")));
+    ASSERT_THAT(reWithSkip.matchView("the red queen").getMatchList(),
+                ElementsAre(Eq("the red queen"), Eq("red queen"), Eq("queen")));
+}
+
+TEST(PcreTest, UnusedLastCapture) {
+    Regex re("(a)|(b)");
+    auto m = re.match("a");
+    ASSERT_THAT(m.getMatchList(), ElementsAre(Eq("a"), Eq("a"), Eq("")));
+}
+
+TEST(PcreTest, NullCapture) {
+    static constexpr auto sb = "b"_sd;
+    ASSERT_THAT(Regex("(a*)b").matchView(sb)[1].rawData(), Eq(sb.rawData())) << "Empty";
+    ASSERT_THAT(Regex("(?:b|(a))").matchView(sb)[1].rawData(), Eq(nullptr)) << "Null";
+}
+
+TEST(PcreTest, CapturesByName) {
+    Regex re("a(?P<bees>b*)c");
+    ASSERT_EQ(re.captureCount(), 1);
+    auto subject = "123abbbc456"_sd;
+    auto m = re.matchView(subject);
+    ASSERT_TRUE(!!m);
+    ASSERT_EQ(m[1], "bbb");
+    ASSERT_THROWS(m[2], ExceptionFor<ErrorCodes::NoSuchKey>);
+    ASSERT_EQ(m["bees"], "bbb");
+    ASSERT_THROWS(m["seas"], ExceptionFor<ErrorCodes::NoSuchKey>);
+}
+
+TEST(PcreTest, Utf) {
+    StringData subject = u8Cast<StringData>(u8"é");
+    ASSERT_EQ(subject, "\xc3\xa9"_sd);
+    ASSERT_TRUE(Regex("^..$").matchView(subject)) << "é is 2 bytes";
+    ASSERT_TRUE(Regex("^.$", UTF).matchView(subject)) << "é is 1 UTF-8 character";
+}
+
+TEST(PcreTest, BadUtfEncoding) {
+    // The UTF_ERR codes are obscure.
+    // See https://www.pcre.org/current/doc/html/pcre2unicode.html
+    Regex re("^.$", UTF);
+    ASSERT_TRUE(!!re);
+    struct Spec {
+        std::string in;
+        std::error_code err;
+    } specs[] = {
+        {"\xbf", Errc::ERROR_UTF8_ERR20},                  // isolated bit7 code point
+        {"\x80", Errc::ERROR_UTF8_ERR20},                  // isolated bit7 code point
+        {"\xfe", Errc::ERROR_UTF8_ERR21},                  // invalid byte value
+        {"\xff", Errc::ERROR_UTF8_ERR21},                  // invalid byte value
+        {"\xef\xbf\xbf", {}},                              // (U+ffff)
+        {"\xf8\xa0\x8f\xbf\xbf", Errc::ERROR_UTF8_ERR11},  // (U+10ffff) no 5-byte codes
+        {"\xd0\x80", {}},
+        {"\xe8\x80\x80", {}},
+        {"\xf4\x80\x80\x80", {}},
+        // missing 1 code point
+        {"\xc0", Errc::ERROR_UTF8_ERR1},
+        {"\xe0\x80", Errc::ERROR_UTF8_ERR1},
+        {"\xf0\x80\x80", Errc::ERROR_UTF8_ERR1},
+        {"\xf8\x80\x80\x80", Errc::ERROR_UTF8_ERR1},
+        {"\xfc\x80\x80\x80\x80", Errc::ERROR_UTF8_ERR1},
+        // missing 2 code points
+        {"\xe0", Errc::ERROR_UTF8_ERR2},
+        {"\xf0\x80", Errc::ERROR_UTF8_ERR2},
+        {"\xf8\x80\x80", Errc::ERROR_UTF8_ERR2},
+        {"\xfc\x80\x80\x80", Errc::ERROR_UTF8_ERR2},
+        // missing 3 code points
+        {"\xf0", Errc::ERROR_UTF8_ERR3},
+        {"\xf8\x80", Errc::ERROR_UTF8_ERR3},
+        {"\xfc\x80\x80", Errc::ERROR_UTF8_ERR3},
+        // missing 4 code points
+        {"\xf8", Errc::ERROR_UTF8_ERR4},
+        {"\xfc\x80", Errc::ERROR_UTF8_ERR4},
+        // missing 5 code points
+        {"\xfc", Errc::ERROR_UTF8_ERR5},
+        // Emoji ?
+        {u8Cast<std::string>(u8"🍌"), {}},
+    };
+    for (auto&& [in, err] : specs) {
+        ASSERT_EQ(re.matchView(in).error(), err);
+    }
+}
+
+std::string subst(std::string re, StringData rep, std::string subject, MatchOptions options = {}) {
+    Regex{std::move(re)}.substitute(rep, &subject, options);
+    return subject;
+}
+
+TEST(PcreTest, Substitute) {
+    ASSERT_EQ(subst("funky", "pretty", "I feel funky funky."),  //
+              "I feel pretty funky.");
+    ASSERT_EQ(subst("funky", "pretty", "I feel funky funky.", SUBSTITUTE_GLOBAL),  //
+              "I feel pretty pretty.");
+    ASSERT_EQ(subst("a(b*)c", "A${1}C", "_abbbc_"), "_AbbbC_");
+}
+
+TEST(PcreTest, SubstituteFlags) {
+    std::string re = R"re(\[(\w+):(\w+):(\w+)\])re";
+    StringData repl = "$3 $2 $1";
+    std::string str = "The [fox:brown:quick] jumped over [dog:lazy:the].";
+    ASSERT_EQ(subst(re, repl, str),  //
+              "The quick brown fox jumped over [dog:lazy:the].");
+    ASSERT_EQ(subst(re, repl, str, SUBSTITUTE_LITERAL),  //
+              "The $3 $2 $1 jumped over [dog:lazy:the].");
+    ASSERT_EQ(subst(re, repl, str, SUBSTITUTE_GLOBAL),  //
+              "The quick brown fox jumped over the lazy dog.");
+    ASSERT_EQ(subst(re, repl, str, SUBSTITUTE_GLOBAL | SUBSTITUTE_LITERAL),  //
+              "The $3 $2 $1 jumped over $3 $2 $1.");
+}
+
+}  // namespace
+}  // namespace mongo::pcre
diff --git a/src/mongo/util/pcre_util.cpp b/src/mongo/util/pcre_util.cpp
new file mode 100644
index 00000000000..4f4dd3c63a8
--- /dev/null
+++ b/src/mongo/util/pcre_util.cpp
@@ -0,0 +1,80 @@
+/**
+ *    Copyright (C) 2019-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+#include "mongo/util/pcre_util.h"
+
+#include <fmt/format.h>
+
+#include "mongo/base/error_codes.h"
+#include "mongo/util/assert_util.h"
+#include "mongo/util/pcre.h"
+
+namespace mongo::pcre_util {
+
+using namespace fmt::literals;
+
+pcre::CompileOptions parseOptions(StringData optionFlags, StringData opName) {
+    pcre::CompileOptions opt = pcre::UTF;
+    for (char flag : optionFlags) {
+        switch (flag) {
+            case 'i':  // case insensitive
+                opt |= pcre::CASELESS;
+                continue;
+            case 'm':  // newlines match ^ and $
+                opt |= pcre::MULTILINE;
+                continue;
+            case 'x':  // extended mode
+                opt |= pcre::EXTENDED;
+                continue;
+            case 's':  // allows dot to include newline chars
+                opt |= pcre::DOTALL;
+                continue;
+            case 'u':
+                continue;
+            default:
+                uasserted(6527600, "{} invalid flag in regex options: {}"_format(opName, flag));
+        }
+    }
+    return opt;
+}
+
+std::string quoteMeta(StringData str) {
+    std::string result;
+    for (char c : str) {
+        if (c == '\0') {
+            result += "\\0";
+            continue;
+        }
+        if (!ctype::isAlnum(c) && c != '_' && !(c & 0x80))
+            result += '\\';
+        result += c;
+    }
+    return result;
+}
+
+}  // namespace mongo::pcre_util
diff --git a/src/mongo/util/pcre_util.h b/src/mongo/util/pcre_util.h
new file mode 100644
index 00000000000..deb7129e9bc
--- /dev/null
+++ b/src/mongo/util/pcre_util.h
@@ -0,0 +1,60 @@
+/**
+ *    Copyright (C) 2019-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/base/string_data.h"
+#include "mongo/util/pcre.h"
+
+/**
+ * This library collects Mongo-specific PCRE conventions which are useful
+ * but shouldn't be part of the main pcre.h library.
+ */
+namespace mongo::pcre_util {
+/**
+ * Builds `pcre::CompileOptions` from the input options string.
+ * The `pcre::UTF` option is also set by default.
+ * Throws `uassert` 51108 on invalid flags including the `opName` in its reason.
+ *
+ * Valid flags:
+ *   'i': CASELESS
+ *   'm': MULTILINE
+ *   's': DOTALL
+ *   'u': UTF (redundant, but accepted)
+ *   'x': EXTENDED
+ */
+pcre::CompileOptions parseOptions(StringData optionFlags, StringData opName = "");
+
+/**
+ * Escapes all potentially meaningful regex characters in the provided string.
+ * The returned string, used as a `mongo::pcre::Regex`, will match `str`.
+ */
+std::string quoteMeta(StringData str);
+
+}  // namespace mongo::pcre_util
diff --git a/src/mongo/util/pcre_util_test.cpp b/src/mongo/util/pcre_util_test.cpp
new file mode 100644
index 00000000000..720ff5c31ad
--- /dev/null
+++ b/src/mongo/util/pcre_util_test.cpp
@@ -0,0 +1,112 @@
+/**
+ *    Copyright (C) 2019-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/util/pcre_util.h"
+
+#include <fmt/format.h>
+
+#include "mongo/base/string_data.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/ctype.h"
+#include "mongo/util/pcre.h"
+
+namespace mongo::pcre_util {
+namespace {
+
+using namespace fmt::literals;
+
+// Test compares `CompileOptions` as integers.
+TEST(PcreUtilTest, ParseOptions) {
+    using namespace pcre::options;
+    auto parse = [](StringData flags) { return static_cast<uint32_t>(parseOptions(flags)); };
+    auto expect = [](pcre::CompileOptions o) { return static_cast<uint32_t>(o); };
+    ASSERT_EQ(parse(""), expect(UTF)) << " UTF is on by default";
+    ASSERT_EQ(parse("i"), expect(UTF | CASELESS));
+    ASSERT_EQ(parse("m"), expect(UTF | MULTILINE));
+    ASSERT_EQ(parse("s"), expect(UTF | DOTALL));
+    ASSERT_EQ(parse("u"), expect(UTF));
+    ASSERT_EQ(parse("x"), expect(UTF | EXTENDED));
+    ASSERT_EQ(parse("imsux"), expect(CASELESS | MULTILINE | DOTALL | UTF | EXTENDED));
+    ASSERT_EQ(parse("xusmi"), expect(CASELESS | MULTILINE | DOTALL | UTF | EXTENDED));
+
+    auto isBadFlagException = [](const DBException& ex) { return ex.code() == 51108; };
+    ASSERT_THROWS_WITH_CHECK(parse("z"), DBException, isBadFlagException);
+    ASSERT_THROWS_WITH_CHECK(parse("iz"), DBException, isBadFlagException);
+}
+
+TEST(PcreUtilTest, QuoteMeta) {
+    ASSERT_EQ(quoteMeta(""), "");
+    ASSERT_EQ(quoteMeta("abc_def_123"_sd), "abc_def_123");
+    ASSERT_EQ(quoteMeta("🍌"_sd), "🍌");
+    ASSERT_EQ(quoteMeta("\0"_sd), "\\0") << "NUL";
+    ASSERT_EQ(quoteMeta("\n"_sd), "\\\n") << "one escape";
+    ASSERT_EQ(quoteMeta("a\n\nb"_sd), "a\\\n\\\nb") << "two adjacent escapes";
+    ASSERT_EQ(quoteMeta("a\nb\nc"_sd), "a\\\nb\\\nc") << "two nonadjacent escapes";
+
+    // All the single chars except '\0', which is already tested and behaves differently.
+    for (int i = 1; i <= CHAR_MAX; ++i) {
+        char c = i;
+        StringData in(&c, 1);
+        std::string out = quoteMeta(in);
+
+        // [a-zA-Z0-9_] and bit7 chars are not escaped. Everything else is.
+        bool shouldEscape = [&] {
+            if (ctype::isAlnum(c))
+                return false;
+            if (c == '_')
+                return false;
+            if (static_cast<unsigned char>(c) >= 0x80)
+                return false;
+            return true;
+        }();
+
+        auto hexdump = [](StringData in) {
+            std::string r = "[";
+            StringData sep;
+            for (unsigned char c : in) {
+                static constexpr auto d = "0123456789abcdef"_sd;
+                r += sep;
+                r += d[(c >> 4) & 0xf];
+                r += d[(c >> 0) & 0xf];
+                sep = ",";
+            }
+            r += "]";
+            return r;
+        };
+        auto note = "{} => {}"_format(hexdump(in), hexdump(out));
+        if (shouldEscape) {
+            ASSERT_EQ(out, "\\" + in) << note;
+        } else {
+            ASSERT_EQ(out, in) << note;
+        }
+    }
+}
+
+}  // namespace
+}  // namespace mongo::pcre_util
diff --git a/src/mongo/util/procparser_test.cpp b/src/mongo/util/procparser_test.cpp
index 08db075dbf7..f2cda841579 100644
--- a/src/mongo/util/procparser_test.cpp
+++ b/src/mongo/util/procparser_test.cpp
@@ -510,25 +510,6 @@ TEST(FTDCProcNetstat, TestLocalNetstat) {
     ASSERT_KEY("IpExt:InOctets");
 }
 
-// Test we can parse the /proc/net/snmp on this machine and assert we have some expected fields
-// Some keys can vary between distros, so we test only for the existence of a few basic ones
-TEST(FTDCProcNetstat, TestLocalNetSnmp) {
-
-    BSONObjBuilder builder;
-
-    std::vector<StringData> keys{"Tcp:"_sd, "Ip:"_sd};
-
-    ASSERT_OK(procparser::parseProcNetstatFile(keys, "/proc/net/snmp", &builder));
-
-    BSONObj obj = builder.obj();
-    auto stringMap = toStringMap(obj);
-    LOGV2(23367, "OBJ:{obj}", "obj"_attr = obj);
-    ASSERT_KEY("Ip:InReceives");
-    ASSERT_KEY("Ip:OutRequests");
-    ASSERT_KEY("Tcp:InSegs");
-    ASSERT_KEY("Tcp:OutSegs");
-}
-
 TEST(FTDCProcNetstat, TestLocalNonExistentNetstat) {
     std::vector<StringData> keys{};
     BSONObjBuilder builder;
diff --git a/src/mongo/util/queue.h b/src/mongo/util/queue.h
index ee0caf01ece..176e719cfdf 100644
--- a/src/mongo/util/queue.h
+++ b/src/mongo/util/queue.h
@@ -107,11 +107,6 @@ public:
         }
     }
 
-    void pushBlocking(const T& t) {
-        std::vector<T> vec{t};
-        pushAllBlocking(vec.begin(), vec.end());
-    }
-
     bool empty() const {
         stdx::lock_guard<Latch> lk(_lock);
         return _queue.empty();
diff --git a/src/mongo/util/signal_handlers.h b/src/mongo/util/signal_handlers.h
index 584ae2d21e5..0ff58d34b1a 100644
--- a/src/mongo/util/signal_handlers.h
+++ b/src/mongo/util/signal_handlers.h
@@ -46,8 +46,7 @@ void setupSignalHandlers();
 /**
  * Starts the thread to handle asynchronous signals.
  *
- * This must be the first thread started from the main thread. Call this immediately after
- * initializeServerGlobalState().
+ * This must be the first thread started from the main thread.
  */
 void startSignalProcessingThread(LogFileStatus rotate = LogFileStatus::kNeedToRotateLogFile);
 
diff --git a/src/mongo/util/stacktrace_posix.cpp b/src/mongo/util/stacktrace_posix.cpp
index 3ea5a1e0424..1eb979c31c8 100644
--- a/src/mongo/util/stacktrace_posix.cpp
+++ b/src/mongo/util/stacktrace_posix.cpp
@@ -446,8 +446,9 @@ void printStackTraceImpl(const Options& options, StackTraceSink* sink = nullptr)
     if (!err.empty()) {
         if (sink) {
             *sink << fmt::format(FMT_STRING("Error collecting stack trace: {}"), err);
+        } else {
+            LOGV2_ERROR(31430, "Error collecting stack trace", "error"_attr = err);
         }
-        LOGV2_ERROR(31430, "Error collecting stack trace", "error"_attr = err);
     }
     stack_trace_detail::logBacktraceObject(bob.done(), sink, options.withHumanReadable);
 }
diff --git a/src/third_party/SConscript b/src/third_party/SConscript
index af08535b963..4eb7c79d9c8 100644
--- a/src/third_party/SConscript
+++ b/src/third_party/SConscript
@@ -32,6 +32,15 @@ thirdPartyEnvironmentModifications = {
     'timelib': {'CPPPATH': ['#/src/third_party/timelib' + timelibSuffix], },
     'unwind': {},
     'variant': {'CPPPATH': ['#src/third_party/variant' + variantSuffix + '/include'], },
+    'mozjs': {
+        'CPPPATH': [
+            '#/src/third_party/mozjs/include',
+            '#/src/third_party/mozjs/mongo_sources',
+            '#/src/third_party/mozjs/platform/' + env["TARGET_ARCH"] + "/" + env["TARGET_OS"] +
+            "/include",
+        ],
+        'FORCEINCLUDES': ['js-config.h', ],
+    }
 }
 
 
@@ -73,7 +82,7 @@ if not use_system_version_of_library('pcre'):
 
 if not use_system_version_of_library('pcre2'):
     thirdPartyEnvironmentModifications['pcre2'] = {
-        'CPPPATH': ['#/src/third_party/pcre2'],
+        'CPPPATH': ['#/src/third_party/pcre2/src'],
     }
 
 if not use_system_version_of_library('boost'):
@@ -124,21 +133,6 @@ if not use_system_version_of_library('google-benchmark'):
         'CPPPATH': ['#/src/third_party/benchmark/dist/include'],
     }
 
-# TODO: figure out if we want to offer system versions of mozjs.  Mozilla
-# hasn't offered a source tarball since 24, but in theory they could.
-#
-#if not use_system_version_of_library('mozjs'):
-if True:
-    thirdPartyEnvironmentModifications['mozjs'] = {
-        'CPPPATH': [
-            '#/src/third_party/mozjs/include',
-            '#/src/third_party/mozjs/mongo_sources',
-            '#/src/third_party/mozjs/platform/' + env["TARGET_ARCH"] + "/" + env["TARGET_OS"] +
-            "/include",
-        ],
-        'FORCEINCLUDES': ['js-config.h', ],
-    }
-
 if "tom" in env["MONGO_CRYPTO"]:
     thirdPartyEnvironmentModifications['tomcrypt'] = {
         'CPPPATH': ['#/src/third_party/tomcrypt' + tomcryptSuffix + '/src/headers'],
@@ -269,6 +263,10 @@ s2Env.InjectThirdParty(libraries=[
 s2Env.InjectMongoIncludePaths()
 s2Env.SConscript('s2/SConscript', exports={'env': s2Env})
 
+if jsEngine:
+    mozjsEnv = env.Clone()
+    mozjsEnv.SConscript('mozjs/SConscript', exports={'env': mozjsEnv})
+
 if use_libunwind:
     unwindEnv = env.Clone(LIBDEPS_NO_INHERIT=[
         '$BUILD_DIR/third_party/shim_allocator',
@@ -448,16 +446,6 @@ else:
 
 benchmarkEnv.ShimLibrary(name="benchmark")
 
-if jsEngine:
-    mozjsEnv = env.Clone()
-    mozjsEnv.SConscript('mozjs/SConscript', exports={'env': mozjsEnv})
-    mozjsEnv = mozjsEnv.Clone(LIBDEPS_INTERFACE=[
-        'mozjs/mozjs',
-        'shim_zlib',
-    ])
-
-    mozjsEnv.ShimLibrary(name="mozjs")
-
 if "tom" in env["MONGO_CRYPTO"]:
     tomcryptEnv = env.Clone()
     tomcryptEnv.SConscript(
diff --git a/src/third_party/wiredtiger/INSTALL b/src/third_party/wiredtiger/INSTALL
index ef25361a312..bda8fa4a68a 100644
--- a/src/third_party/wiredtiger/INSTALL
+++ b/src/third_party/wiredtiger/INSTALL
@@ -1,6 +1,6 @@
-WiredTiger 10.0.2: (November 30, 2021)
+WiredTiger 11.0.0: (May 12, 2022)
 
-This is version 10.0.2 of WiredTiger.
+This is version 11.0.0 of WiredTiger.
 
 Instructions for configuring, building, and installing WiredTiger are available online.
 
diff --git a/src/third_party/wiredtiger/NEWS b/src/third_party/wiredtiger/NEWS
deleted file mode 100644
index d6428243a15..00000000000
--- a/src/third_party/wiredtiger/NEWS
+++ /dev/null
@@ -1,12 +0,0 @@
-Ticket reference tags refer to tickets in the MongoDB JIRA tracking system:
-https://jira.mongodb.org
-
-WiredTiger release 11.0.1, 2022-XX-XX
-------------------------------------
-
-The WiredTiger 11.0 release is a complete refresh of the WiredTiger storage engine.
-Applications written to earlier versions of the WiredTiger API will require review
-(although the API structure is substantially the same as previous releases). Applications
-using row-store data objects can use those objects without change. Applications using
-fixed- or variable-length column-store data objects will require a dump and reload cycle
-to import those objects into a 11.0 release environment.
diff --git a/src/third_party/wiredtiger/README b/src/third_party/wiredtiger/README
index df921a6d884..bc5b9b822c0 100644
--- a/src/third_party/wiredtiger/README
+++ b/src/third_party/wiredtiger/README
@@ -1,14 +1,14 @@
-WiredTiger 10.0.2: (November 30, 2021)
+WiredTiger 11.0.0: (May 12, 2022)
 
-This is version 10.0.2 of WiredTiger.
+This is version 11.0.0 of WiredTiger.
 
 WiredTiger release packages and documentation can be found at:
 
 	https://source.wiredtiger.com/
 
-The documentation for this specific release can be found at:
+The documentation can be found at:
 
-	https://source.wiredtiger.com/develop/index.html
+	https://source.wiredtiger.com
 
 The WiredTiger source code can be found at:
 
diff --git a/src/third_party/wiredtiger/RELEASE_INFO b/src/third_party/wiredtiger/RELEASE_INFO
index 954ca8646b1..65b6ccda24f 100644
--- a/src/third_party/wiredtiger/RELEASE_INFO
+++ b/src/third_party/wiredtiger/RELEASE_INFO
@@ -1,6 +1,6 @@
-WIREDTIGER_VERSION_MAJOR=10
+WIREDTIGER_VERSION_MAJOR=11
 WIREDTIGER_VERSION_MINOR=0
-WIREDTIGER_VERSION_PATCH=2
+WIREDTIGER_VERSION_PATCH=0
 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH"
 
 WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"`
diff --git a/src/third_party/wiredtiger/cmake/configs/base.cmake b/src/third_party/wiredtiger/cmake/configs/base.cmake
index cdfd5abb36f..83dd332dbdb 100644
--- a/src/third_party/wiredtiger/cmake/configs/base.cmake
+++ b/src/third_party/wiredtiger/cmake/configs/base.cmake
@@ -212,11 +212,26 @@ config_bool(
     DEFAULT OFF
 )
 
+# Setup the WiredTiger build to use Debug settings as unless the build type was explicitly
+# configured. Primary users of the build are our developers, who want as much help diagnosing
+# issues as possible. Builds targeted for release to customers should switch to a "Release" setting.
+set(default_build_type "Debug")
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  # Notify callers that our build chooses Debug, not the default empty
+  message(STATUS "Defaulting build type to '${default_build_type}'.")
+  set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
+      STRING "Type of build selected." FORCE)
+endif()
+
 set(default_optimize_level)
 if("${WT_OS}" STREQUAL "windows")
-    set(default_optimize_level "/O2")
+    set(default_optimize_level "/Od")
 else()
-    set(default_optimize_level "-O3")
+    # Ideally this would choose an optimization level of Og. Which is the recommended configuration
+    # for build-debug cycles when using GCC and is a synonym in clang for O1.
+    # Unfortunately at the moment, WiredTiger code generates compiler warnings (as errors) when
+    # built with Og.
+    set(default_optimize_level "-O1")
 endif()
 config_string(
     CC_OPTIMIZE_LEVEL
diff --git a/src/third_party/wiredtiger/cmake/configs/modes.cmake b/src/third_party/wiredtiger/cmake/configs/modes.cmake
index 1d0bb897ac1..ee7137874cb 100644
--- a/src/third_party/wiredtiger/cmake/configs/modes.cmake
+++ b/src/third_party/wiredtiger/cmake/configs/modes.cmake
@@ -130,6 +130,9 @@ else()
     endif()
 endif()
 
+# Sanitizer builds should have debug info available and optimization off
+set(san_debug_flags "-O1 -g ${no_omit_frame_flag}")
+
 # UBSAN build variant flags.
 set(ubsan_link_flags "-fsanitize=undefined")
 set(ubsan_compiler_c_flag "-fsanitize=undefined")
@@ -147,22 +150,22 @@ set(tsan_compiler_cxx_flag "-fsanitize=thread")
 
 # Define our custom build variants.
 define_build_mode(ASan
-    C_COMPILER_FLAGS ${asan_compiler_c_flag} ${no_omit_frame_flag}
-    CXX_COMPILER_FLAGS ${asan_compiler_cxx_flag} ${no_omit_frame_flag}
+    C_COMPILER_FLAGS ${asan_compiler_c_flag} ${san_debug_flags}
+    CXX_COMPILER_FLAGS ${asan_compiler_cxx_flag} ${san_debug_flags}
     LINK_FLAGS ${asan_link_flags}
     LIBS ${asan_lib_flags}
 )
 
 define_build_mode(UBSan
-    C_COMPILER_FLAGS ${ubsan_compiler_c_flag} ${no_omit_frame_flag}
-    CXX_COMPILER_FLAGS ${ubsan_compiler_cxx_flag} ${no_omit_frame_flag}
+    C_COMPILER_FLAGS ${ubsan_compiler_c_flag} ${san_debug_flags}
+    CXX_COMPILER_FLAGS ${ubsan_compiler_cxx_flag} ${san_debug_flags}
     LINK_FLAGS ${ubsan_link_flags}
     # Disable UBSan on MSVC compilers (unsupported).
     DEPENDS "NOT MSVC"
 )
 
 define_build_mode(MSan
-    C_COMPILER_FLAGS ${msan_compiler_c_flag} ${no_omit_frame_flag}
+    C_COMPILER_FLAGS ${msan_compiler_c_flag} ${san_debug_flags}
     CXX_COMPILER_FLAGS ${msan_compiler_cxx_flag}
     LINK_FLAGS ${msan_link_flags}
     # Disable MSan on MSVC and GNU compilers (unsupported).
@@ -170,7 +173,7 @@ define_build_mode(MSan
 )
 
 define_build_mode(TSan
-    C_COMPILER_FLAGS ${tsan_compiler_c_flag} ${no_omit_frame_flag}
+    C_COMPILER_FLAGS ${tsan_compiler_c_flag} ${san_debug_flags}
     CXX_COMPILER_FLAGS ${tsan_compiler_cxx_flag}
     LINK_FLAGS ${tsan_link_flags}
     # Disable TSan on MSVC compilers (unsupported).
diff --git a/src/third_party/wiredtiger/cmake/configs/version.cmake b/src/third_party/wiredtiger/cmake/configs/version.cmake
index 714da8bb3de..287a2582d71 100644
--- a/src/third_party/wiredtiger/cmake/configs/version.cmake
+++ b/src/third_party/wiredtiger/cmake/configs/version.cmake
@@ -1,5 +1,5 @@
-# Generated by dist/s_version. Do not modify. 
-set(WT_VERSION_MAJOR 10)
+# Generated by dist/s_version. Do not modify.
+set(WT_VERSION_MAJOR 11)
 set(WT_VERSION_MINOR 0)
-set(WT_VERSION_PATCH 2)
-set(WT_VERSION_STRING "WiredTiger 10.0.2: (December 21, 2021)") 
+set(WT_VERSION_PATCH 0)
+set(WT_VERSION_STRING "WiredTiger 11.0.0: (May 12, 2022)")
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index c21bf618c28..76fe1d4e9b6 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -201,6 +201,9 @@ tiered_config = [
             time in seconds to retain data on tiered storage on the local tier for faster
             read access''',
             min='0', max='10000'),
+        Config('object_target_size', '0', r'''
+            this option is no longer supported, retained for backward compatibility''',
+            min='0', undoc=True),
         ]),
 ]
 
@@ -566,6 +569,12 @@ connection_runtime_config = [
             operation. This allows memory sanitizers to detect inappropriate references to
             memory owned by cursors.''',
             type='boolean'),
+        Config('cursor_reposition', 'false', r'''
+            if true, for the operations with snapshot isolation the cursor temporarily releases the
+            page that requires force eviction, then repositions back to the page for further
+            operations. A page release encourages eviction of hot or large pages, which is more
+            likely to succeed without a cursor keeping the page pinned.''',
+            type='boolean'),
         Config('eviction', 'false', r'''
             if true, modify internal algorithms to change skew to force history store eviction
             to happen more aggressively. This includes but is not limited to not skewing newest,
@@ -945,9 +954,6 @@ tiered_storage_configuration_common = [
         time in seconds to retain data on tiered storage on the local tier for faster read
         access''',
         min='0', max='10000'),
-    Config('object_target_size', '10M', r'''
-        the approximate size of objects before creating them on the tiered storage tier''',
-        min='100K', max='10TB'),
 ]
 connection_reconfigure_tiered_storage_configuration = [
     Config('tiered_storage', '', r'''
@@ -1289,11 +1295,11 @@ methods = {
         configure import of an existing object into the currently running database''',
         type='category', subconfig=[
         Config('compare_timestamp', 'oldest_timestamp', r'''
-            Allow importing files with timestamps smaller or equal to the configured global
+            allow importing files with timestamps smaller or equal to the configured global
             timestamps. Note the history of the files are not imported together and thus snapshot
             read of historical data will not work with the option "stable_timestamp". (The \c
             oldest and \c stable arguments are deprecated short-hand for \c oldest_timestamp
-            and \c stable_timestamp, respectively.)''',
+            and \c stable_timestamp, respectively)''',
             choices=['oldest', 'oldest_timestamp', 'stable', 'stable_timestamp']),
         Config('enabled', 'false', r'''
             whether to import the input URI from disk''',
@@ -1304,8 +1310,8 @@ methods = {
         Config('file_metadata', '', r'''
             the file configuration extracted from the metadata of the export database'''),
         Config('metadata_file', '', r'''
-            File that contains all the relevant metadata information for the URI to import. The file
-            is generated by backup:export cursor.'''),
+            a text file that contains all the relevant metadata information for the URI to import.
+            The file is generated by backup:export cursor'''),
         ]),
 ]),
 
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index 310babf2e7f..d283197be81 100755
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -68,13 +68,16 @@ def flag_declare(name):
                     sys.exit(1)
 
                 # Calculate number of hex bytes, create format string
-                fmt = "0x%%0%dxu" % ((start + max_flags + 3) / 4)
+                if end <= 32:
+                    fmt = "0x%%0%dxu" % ((start + max_flags + 3) / 4)
+                else:
+                    fmt = "0x%%0%dxull" % ((start + max_flags + 3) / 4)
 
                 # Generate the flags starting from an offset set from the start value.
                 tfile.write(header)
                 v = 1 << start
                 for d in sorted(defines):
-                    tfile.write(re.sub("0x[01248u]*", fmt % v, d))
+                    tfile.write(re.sub("0x[01248]*ul*", fmt % v, d))
                     v = v * 2
                 tfile.write(line)
 
diff --git a/src/third_party/wiredtiger/dist/s_string b/src/third_party/wiredtiger/dist/s_string
index 1de04dda731..11d7390dae1 100755
--- a/src/third_party/wiredtiger/dist/s_string
+++ b/src/third_party/wiredtiger/dist/s_string
@@ -45,9 +45,11 @@ check() {
 }
 
 # List of files to spellchk.
+# FIXME-WT-9433 Skip files in the test/cppsuite folder.
 l=`(cd .. &&
-    find bench examples ext src test -name '*.[chsy]' &&
-    find src -name '*.in' && find test -name '*.cxx' && find test -name '*.cpp')`
+    find bench examples ext src test -not -path "test/cppsuite/*" -name '*.[chsy]' &&
+    find src -name '*.in' &&
+    find test -not -path "test/cppsuite/*" -name '*.cpp')`
 
 usage()
 {
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index ca2adbbb5cd..3377d0354e4 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -1361,6 +1361,7 @@ regionp
 reinitialization
 relocked
 repl
+repositions
 resizable
 resize
 resizing
@@ -1511,6 +1512,7 @@ trk
 trk's
 trun
 trunc
+truncations
 trylock
 tryrdlock
 trywrlock
diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void
index 99967958839..3b1871ef63e 100755
--- a/src/third_party/wiredtiger/dist/s_void
+++ b/src/third_party/wiredtiger/dist/s_void
@@ -38,6 +38,7 @@ func_ok()
 	    -e '/int __config_parser_close$/d' \
 	    -e '/int __curlog_reset$/d' \
 	    -e '/int __cursor_fix_implicit$/d' \
+	    -e '/int __delete_redo_window_cleanup_skip$/d' \
 	    -e '/int __handle_close_default$/d' \
 	    -e '/int __handle_progress_default$/d' \
 	    -e '/int __im_file_close$/d' \
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 455e077b05e..35211191ada 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -344,6 +344,7 @@ conn_stats = [
     CursorStat('cursor_search_hs', 'cursor search history store calls'),
     CursorStat('cursor_search_near', 'cursor search near calls'),
     CursorStat('cursor_truncate', 'cursor truncate calls'),
+    CursorStat('cursor_truncate_keys_deleted', 'cursor truncates performed on individual keys'),
     CursorStat('cursor_update', 'cursor update calls'),
     CursorStat('cursor_update_bytes', 'cursor update key and value bytes', 'size'),
     CursorStat('cursor_update_bytes_changed', 'cursor update value size change', 'size'),
diff --git a/src/third_party/wiredtiger/ext/storage_sources/dir_store/dir_store.c b/src/third_party/wiredtiger/ext/storage_sources/dir_store/dir_store.c
index 49691778fb4..390f4866466 100644
--- a/src/third_party/wiredtiger/ext/storage_sources/dir_store/dir_store.c
+++ b/src/third_party/wiredtiger/ext/storage_sources/dir_store/dir_store.c
@@ -129,7 +129,7 @@ static int dir_store_configure_int(DIR_STORE *, WT_CONFIG_ARG *, const char *, u
 static int dir_store_delay(DIR_STORE *);
 static int dir_store_err(DIR_STORE *, WT_SESSION *, int, const char *, ...);
 static int dir_store_file_copy(
-  DIR_STORE *, WT_SESSION *, const char *, const char *, WT_FS_OPEN_FILE_TYPE);
+  DIR_STORE *, WT_SESSION *, const char *, const char *, WT_FS_OPEN_FILE_TYPE, bool);
 static int dir_store_get_directory(const char *, const char *, ssize_t len, bool, char **);
 static int dir_store_path(WT_FILE_SYSTEM *, const char *, const char *, char **);
 static int dir_store_stat(
@@ -608,7 +608,7 @@ dir_store_exist(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *na
  */
 static int
 dir_store_file_copy(DIR_STORE *dir_store, WT_SESSION *session, const char *src_path,
-  const char *dest_path, WT_FS_OPEN_FILE_TYPE type)
+  const char *dest_path, WT_FS_OPEN_FILE_TYPE type, bool enoent_okay)
 {
     WT_FILE_HANDLE *dest, *src;
     WT_FILE_SYSTEM *wt_fs;
@@ -633,7 +633,12 @@ dir_store_file_copy(DIR_STORE *dir_store, WT_SESSION *session, const char *src_p
     }
     if ((ret = wt_fs->fs_open_file(wt_fs, session, src_path, type, WT_FS_OPEN_READONLY, &src)) !=
       0) {
-        ret = dir_store_err(dir_store, session, ret, "%s: cannot open for read", src_path);
+        /*
+         * It is normal and possible that the source file was dropped. Don't print out an error
+         * message in that case, but still return the ENOENT error value.
+         */
+        if ((ret != 0 && ret != ENOENT) || (ret == ENOENT && !enoent_okay))
+            ret = dir_store_err(dir_store, session, ret, "%s: cannot open for read", src_path);
         goto err;
     }
 
@@ -708,7 +713,7 @@ dir_store_flush(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session, WT_FILE_
         goto err;
 
     if ((ret = dir_store_file_copy(
-           dir_store, session, src_path, dest_path, WT_FS_OPEN_FILE_TYPE_DATA)) != 0)
+           dir_store, session, src_path, dest_path, WT_FS_OPEN_FILE_TYPE_DATA, true)) != 0)
         goto err;
 
     dir_store->object_writes++;
@@ -1010,7 +1015,7 @@ dir_store_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *nam
             goto err;
 
         if ((ret = dir_store_file_copy(
-               dir_store, session, bucket_path, cache_path, WT_FS_OPEN_FILE_TYPE_DATA)) != 0)
+               dir_store, session, bucket_path, cache_path, WT_FS_OPEN_FILE_TYPE_DATA, false)) != 0)
             goto err;
 
         dir_store->object_reads++;
@@ -1143,6 +1148,7 @@ dir_store_remove_if_exists(
               FS2DS(file_system), session, errno, "%s: dir_store_remove stat", file_path);
             goto err;
         }
+        ret = 0;
     } else {
         if ((ret = wt_fs->fs_remove(wt_fs, session, file_path, flags)) != 0) {
             ret =
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 9aba994d645..19049b0995e 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-master",
-    "commit": "2f0657608187ffa9231a01f034d2f6cbde5dba5c"
+    "commit": "a1f21fe61929c12b360d8b50f03bc1584d7b10fd"
 }
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 2cbf4352e2a..2eafebe31be 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -1855,10 +1855,11 @@ __cursor_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
 {
     WT_DECL_RET;
     WT_SESSION_IMPL *session;
+    size_t records_truncated;
     uint64_t yield_count, sleep_usecs;
 
     session = CUR2S(start);
-    yield_count = sleep_usecs = 0;
+    records_truncated = yield_count = sleep_usecs = 0;
 
 /*
  * First, call the cursor search method to re-position the cursor: we may not have a cursor position
@@ -1881,12 +1882,17 @@ retry:
 
     for (;;) {
         WT_ERR(rmfunc(start, NULL, WT_UPDATE_TOMBSTONE));
+        ++records_truncated;
 
-        if (stop != NULL && __cursor_equals(start, stop))
+        if (stop != NULL && __cursor_equals(start, stop)) {
+            WT_STAT_CONN_INCRV(session, cursor_truncate_keys_deleted, records_truncated);
             return (0);
+        }
 
-        if ((ret = __wt_btcur_next(start, true)) == WT_NOTFOUND)
+        if ((ret = __wt_btcur_next(start, true)) == WT_NOTFOUND) {
+            WT_STAT_CONN_INCRV(session, cursor_truncate_keys_deleted, records_truncated);
             return (0);
+        }
         WT_ERR(ret);
 
         start->compare = 0; /* Exact match */
@@ -1964,7 +1970,7 @@ err:
  *     Discard a cursor range from the tree.
  */
 int
-__wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
+__wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, bool *is_col_fix)
 {
     WT_BTREE *btree;
     WT_DECL_RET;
@@ -2005,6 +2011,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
     switch (btree->type) {
     case BTREE_COL_FIX:
         WT_ERR(__cursor_truncate_fix(start, stop, __cursor_col_modify));
+        *is_col_fix = true;
         break;
     case BTREE_COL_VAR:
         WT_ERR(__cursor_truncate(start, stop, __cursor_col_modify));
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index b9d9ceae927..d2a67943062 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -261,7 +261,19 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
         WT_ERR(__wt_curhs_open(session, NULL, &ds->hs_cursor));
 
     if (ds->hs_cursor != NULL) {
-        F_SET(ds->hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+        /*
+         * For debugging dumps, we want to see everything, not just what is currently visible in
+         * whatever arbitrary context we may have inherited. By default, however, suppress obsolete
+         * entries (those with globally visible stop times). For checkpoint cursors, dump those as
+         * well, not because they are more interesting when reading a checkpoint but because the
+         * visible-all test to hide them needs a copy of the checkpoint snapshot and that's not
+         * easily available. (If we are dumping pages from a checkpoint cursor, it is actually
+         * accessible in the cursor; but the logic for substituting it into the session is private
+         * to cur_file.c and I don't want to either change that or paste a second copy of it. Hiding
+         * a few obsolete history store entries isn't worth either of those changes.
+         */
+        F_SET(ds->hs_cursor,
+          WT_READING_CHECKPOINT(session) ? WT_CURSTD_HS_READ_ALL : WT_CURSTD_HS_READ_COMMITTED);
         WT_ERR(__wt_scr_alloc(session, 0, &ds->hs_key));
         WT_ERR(__wt_scr_alloc(session, 0, &ds->hs_value));
     }
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index 53b68ae1c76..11626a99341 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -95,8 +95,9 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
     /*
      * There should be no previous page-delete information: if the previous fast-truncate didn't
      * instantiate the page, then we'd never get here to do another delete; if the previous fast-
-     * truncate did instantiate the page, then any fast-truncate information was removed at that
-     * point and/or when the fast-truncate transaction was resolved.
+     * truncate did instantiate the page, then (for a read-write tree; we can't get here in a
+     * readonly tree) any fast-truncate information was removed at that point and/or when the
+     * fast-truncate transaction was resolved.
      */
     WT_ASSERT(session, ref->ft_info.del == NULL);
 
@@ -147,7 +148,7 @@ err:
 
 /*
  * __wt_delete_page_rollback --
- *     Abort fast-truncate operations.
+ *     Transaction rollback for a fast-truncate operation.
  */
 int
 __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
@@ -185,18 +186,40 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
     }
 
     /*
-     * If the page is still "deleted", it's as we left it, simply reset the state. Otherwise, the
-     * page is in an in-memory state, which means it was instantiated at some point. Walk any list
-     * of update structures and abort them. We can't use the normal read path to get the pages with
-     * updates (the original page may have split, so there many be more than one page), because the
-     * session may have closed the cursor, we no longer have the reference to the tree required for
-     * a hazard pointer. We're safe since pages with unresolved transactions aren't going anywhere.
+     * There are two possible cases:
+     *
+     * 1. The state is WT_REF_DELETED. In this case ft_info.del cannot be null, because the
+     * operation cannot reach global visibility while its transaction remains uncommitted. The page
+     * itself is as we left it, so we can just reset the state.
+     *
+     * 2. The state is WT_REF_MEM. We check ft_info.update for a list of updates to abort. Allow the
+     * update list to be null to be conservative.
      */
     if (current_state == WT_REF_DELETED)
         current_state = ref->ft_info.del->previous_ref_state;
-    else if ((updp = ref->ft_info.update) != NULL)
-        for (; *updp != NULL; ++updp)
-            (*updp)->txnid = WT_TXN_ABORTED;
+    else {
+        if ((updp = ref->ft_info.update) != NULL)
+            /*
+             * Walk any list of update structures and abort them. We can't use the normal read path
+             * to get the pages with updates (the original page may have split, so there may be more
+             * than one page), because the session may have closed the cursor, and we no longer have
+             * the reference to the tree required for a hazard pointer. We're safe since pages with
+             * unresolved transactions aren't going anywhere.
+             */
+            for (; *updp != NULL; ++updp)
+                (*updp)->txnid = WT_TXN_ABORTED;
+        WT_ASSERT(session, ref->page != NULL && ref->page->modify != NULL);
+        /*
+         * Drop any page_deleted information that has been moved to the modify structure. Note that
+         * while this must have been an instantiated page, the information (and flag) is only kept
+         * until the page is reconciled for the first time after instantiation, so it might not be
+         * set now.
+         */
+        if (ref->page->modify->instantiated) {
+            ref->page->modify->instantiated = false;
+            __wt_free(session, ref->page->modify->page_del);
+        }
+    }
 
     /*
      * Don't set the WT_PAGE_DELETED transaction ID to aborted, discard any WT_UPDATE list or set
@@ -210,6 +233,72 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
 }
 
 /*
+ * __delete_redo_window_cleanup_internal --
+ *     Process one internal page for __wt_delete_redo_window_cleanup. This fixes up the transaction
+ *     IDs in the delete info. Since we're called at the end of recovery there's no need to lock the
+ *     ref or worry about races.
+ */
+static void
+__delete_redo_window_cleanup_internal(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+    WT_REF *child;
+
+    WT_ASSERT(session, F_ISSET(ref, WT_REF_FLAG_INTERNAL));
+    if (ref->page != NULL) {
+        WT_INTL_FOREACH_BEGIN (session, ref->page, child) {
+            if (child->state == WT_REF_DELETED && child->ft_info.del != NULL)
+                __cell_redo_page_del_cleanup(session, ref->page->dsk, child->ft_info.del);
+        }
+        WT_INTL_FOREACH_END;
+    }
+}
+
+/*
+ * __delete_redo_window_cleanup_skip --
+ *     Tree-walk skip function for __wt_delete_redo_window_cleanup. This skips all leaf pages; we'll
+ *     visit all in-memory internal pages via the flag settings on the tree-walk call.
+ */
+static int
+__delete_redo_window_cleanup_skip(
+  WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool visible_all, bool *skipp)
+{
+    WT_UNUSED(ref);
+    WT_UNUSED(session);
+    WT_UNUSED(context);
+    WT_UNUSED(visible_all);
+
+    *skipp = F_ISSET(ref, WT_REF_FLAG_LEAF);
+    return (0);
+}
+
+/*
+ * __wt_delete_redo_window_cleanup --
+ *     Clear old transaction IDs from already-loaded page_del structures to make them look like we
+ *     just unpacked the information. Called after the tree write generation is bumped during
+ *     recovery so that old transaction IDs don't come back to life. Note that this can only fail if
+ *     something goes wrong in the tree walk; it doesn't itself ever fail.
+ */
+int
+__wt_delete_redo_window_cleanup(WT_SESSION_IMPL *session)
+{
+    WT_DECL_RET;
+    WT_REF *ref;
+
+    /*
+     * Walk the tree and look for internal pages holding fast-truncate information. Note: we pass
+     * WT_READ_VISIBLE_ALL because we have no snapshot, but we aren't actually doing any visibility
+     * checks.
+     */
+    ref = NULL;
+    while ((ret = __wt_tree_walk_custom_skip(session, &ref, __delete_redo_window_cleanup_skip, NULL,
+              WT_READ_CACHE | WT_READ_VISIBLE_ALL)) == 0 &&
+      ref != NULL)
+        WT_WITH_PAGE_INDEX(session, __delete_redo_window_cleanup_internal(session, ref));
+
+    return (ret);
+}
+
+/*
  * __wt_delete_page_skip --
  *     If iterating a cursor, skip deleted pages that are either visible to us or globally visible.
  */
@@ -242,6 +331,9 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
      * The fast-truncate structure can be freed as soon as the delete is stable: it is only read
      * when the ref state is locked. It is worth checking every time we come through because once
      * this is freed, we no longer need synchronization to check the ref.
+     *
+     * Note that if the visible_all flag is set, skip already reflects the visible_all result so we
+     * don't need to do it twice.
      */
     if (skip && ref->ft_info.del != NULL &&
       (visible_all ||
@@ -271,8 +363,8 @@ __tombstone_update_alloc(
      */
     if (page_del != NULL) {
         upd->txnid = page_del->txnid;
-        upd->start_ts = page_del->timestamp;
         upd->durable_ts = page_del->durable_timestamp;
+        upd->start_ts = page_del->timestamp;
         upd->prepare_state = page_del->prepare_state;
     }
     *updp = upd;
@@ -284,39 +376,58 @@ __tombstone_update_alloc(
  *     Instantiate an entirely deleted row-store leaf page.
  */
 int
-__wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
+__wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_DELETED *page_del)
 {
-    WT_BTREE *btree;
     WT_DECL_RET;
-    WT_INSERT *ins;
-    WT_INSERT_HEAD *insert;
     WT_PAGE *page;
-    WT_PAGE_DELETED *page_del;
     WT_ROW *rip;
     WT_TIME_WINDOW tw;
     WT_UPDATE **upd_array, **update_list, *upd;
     size_t size, total_size;
     uint32_t count, i;
 
-    btree = S2BT(session);
+    /*
+     * An operation is accessing a "deleted" page, and we're building an in-memory version of the
+     * page (making it look like all entries in the page were individually updated by a remove
+     * operation). We end up here if a transaction used a truncate call to delete the page without
+     * reading it, and something else that can't yet see the truncation decided to read the page.
+     *
+     * This can happen after the truncate transaction resolves, but it can also happen before. In
+     * the latter case, we need to keep track of the updates we populate the page with, so they can
+     * be found when the transaction resolves. The page we're loading might split, in which case
+     * finding the updates any other way would become a problem.
+     *
+     * The page_del structure passed in is either ref->ft_info.del, or under certain circumstances
+     * when that's unavailable, one extracted from the parent page's address cell.
+     */
+
     page = ref->page;
-    page_del = NULL;
     update_list = NULL;
 
+    /* For now fast-truncate is only supported for row-store. */
+    WT_ASSERT(session, page->type == WT_PAGE_ROW_LEAF);
+
     WT_STAT_CONN_DATA_INCR(session, cache_read_deleted);
 
     /* Track the prepared, fast-truncate pages we've had to instantiate. */
-    if (ref->ft_info.del != NULL && ref->ft_info.del->prepare_state != WT_PREPARE_INIT)
+    if (page_del != NULL && page_del->prepare_state != WT_PREPARE_INIT)
         WT_STAT_CONN_DATA_INCR(session, cache_read_deleted_prepared);
 
     /*
-     * Give the page a modify structure. If the tree is already dirty and so will be written, mark
-     * the page dirty. (We want to free the deleted pages, but if the handle is read-only or if the
-     * application never modifies the tree, we're not able to do so.)
+     * Give the page a modify structure and mark the page dirty if the tree isn't read-only. If the
+     * tree can be written, the page must be marked dirty: otherwise it can be discarded, and that
+     * will lose the truncate information if the parent page hasn't been reconciled since the
+     * truncation happened.
+     *
+     * If the tree cannot be written (checked in page-modify-set), we won't dirty the page. In this
+     * case the truncate information must have been read from the parent page's on-disk cell, so we
+     * can fetch it again if we discard the page and then reread it.
+     *
+     * Truncates can appear in read-only trees (whether a read-only open of the live database or via
+     * a checkpoint cursor) if they were not yet globally visible when the tree was checkpointed.
      */
     WT_RET(__wt_page_modify_init(session, page));
-    if (btree->modified)
-        __wt_page_modify_set(session, page);
+    __wt_page_modify_set(session, page);
 
     /* Allocate the per-page update array if one doesn't already exist. */
     if (page->entries != 0 && page->modify->mod_row_update == NULL)
@@ -324,61 +435,33 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
           session, page, page->modify->mod_row_update, upd_array, page->entries);
 
     /*
-     * An operation is accessing a "deleted" page, and we're building an in-memory version of the
-     * page (making it look like all entries in the page were individually updated by a remove
-     * operation). There are two cases where we end up here:
-     *
-     * First, a running transaction used a truncate call to delete the page without reading it, in
-     * which case the page reference includes a structure with a transaction ID; the page we're
-     * building might split in the future, so we update that structure to include references to all
-     * of the update structures we create, so the transaction can abort.
-     *
-     * Second, a truncate call deleted a page and the truncate resolved, but an older transaction or
-     * the stable timestamp forced us to keep the old version of the page around, and then we
-     * crashed and recovered or we're running inside a checkpoint, and now we're being forced to
-     * read that page.
-     *
-     * If there's a page-deleted structure that's not yet globally visible, get a reference and
-     * migrate transaction ID and timestamp information to the updates (globally visible means the
-     * updates don't require that information).
+     * Copy the page-deleted structure's timestamp information into an update for each row on the
+     * page. If the page-deleted structure is NULL, that means the truncate is globally visible, and
+     * therefore committed.
      *
      * If the truncate operation is not yet resolved, link updates in the page-deleted structure so
-     * they can be found when the transaction is aborted or committed, even if they have moved to
-     * other pages.
+     * they can be found when the transaction is resolved, even if they have moved to other pages.
      */
-    page_del = __wt_page_del_active(session, ref, true) ? ref->ft_info.del : NULL;
     if (page_del != NULL && !page_del->committed) {
         count = 0;
-        if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
-            WT_SKIP_FOREACH (ins, insert)
-                ++count;
-        WT_ROW_FOREACH (page, rip, i) {
+        WT_ROW_FOREACH (page, rip, i)
             ++count;
-            if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
-                WT_SKIP_FOREACH (ins, insert)
-                    ++count;
-        }
         WT_RET(__wt_calloc_def(session, count + 1, &update_list));
     }
 
-    /* Walk the page entries, giving each one a tombstone. */
     total_size = size = 0;
     count = 0;
     upd_array = page->modify->mod_row_update;
-    if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
-        WT_SKIP_FOREACH (ins, insert) {
-            WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size));
-            total_size += size;
-            upd->next = ins->upd;
-            ins->upd = upd;
 
-            if (update_list != NULL)
-                update_list[count++] = upd;
-        }
+    /* We just read the page and it's still locked. The insert lists should be empty. */
+    WT_ASSERT(session, WT_ROW_INSERT_SMALLEST(page) == NULL);
+
+    /* Walk the page entries, giving each one a tombstone. */
     WT_ROW_FOREACH (page, rip, i) {
         /*
          * Retrieve the stop time point from the page's row. If we find an existing stop time point
-         * we don't need to append a tombstone.
+         * we don't need to append a tombstone. Such rows would not have been visible to the
+         * original truncate operation and were, logically, skipped over rather than re-deleted.
          */
         __wt_read_row_time_window(session, page, rip, &tw);
         if (!WT_TIME_WINDOW_HAS_STOP(&tw)) {
@@ -389,35 +472,31 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
 
             if (update_list != NULL)
                 update_list[count++] = upd;
-
-            if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
-                WT_SKIP_FOREACH (ins, insert) {
-                    WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size));
-                    total_size += size;
-                    upd->next = ins->upd;
-                    ins->upd = upd;
-
-                    if (update_list != NULL)
-                        update_list[count++] = upd;
-                }
         }
+
+        /* We just read the page and it's still locked. The insert lists should be empty. */
+        WT_ASSERT(session, WT_ROW_INSERT(page, rip) == NULL);
     }
-    __wt_cache_page_inmem_incr(session, page, total_size);
 
     /*
-     * We no longer need the WT_PAGE_DELETED structure, all of its information should have been
-     * transferred to the list of WT_UPDATE structures (if any).
+     * Move the WT_PAGE_DELETED structure to page->modify; all of its information has been copied to
+     * the list of WT_UPDATE structures (if any), but we may still need it for internal page
+     * reconciliation.
      *
-     * Except when the tree is read-only; in a read-only tree, eviction will just discard the
-     * instantiated page instead of saving it, so instead of this being a permanent transition we
-     * need to be able to regenerate the instantiated page arbitrarily many times. Note that keeping
-     * the structure around would cause horrible things to happen in reconciliation if we ever
-     * reached that code; but we won't.
+     * Note: when the page_del passed in isn't the one in the ref, there should be none in the ref.
+     * This only happens in readonly trees (see bt_page.c) and is a consequence of it being possible
+     * for a deleted page to be in WT_REF_DISK state if it's already been instantiated once and then
+     * evicted. In this case we can set modify->page_del to NULL regardless of the truncation's
+     * visibility (rather than copying the passed-in information); modify->page_del is only used by
+     * parent-page reconciliation and readonly trees shouldn't ever reach that code.
      */
-    if (!F_ISSET(btree, WT_BTREE_READONLY))
-        __wt_overwrite_and_free(session, ref->ft_info.del);
-    if (update_list != NULL)
-        ref->ft_info.update = update_list;
+    WT_ASSERT(session, page_del == ref->ft_info.del || ref->ft_info.del == NULL);
+    page->modify->instantiated = true;
+    page->modify->page_del = ref->ft_info.del;
+    /* We don't need to null ft_info.del because assigning ft_info.update overwrites it. */
+    ref->ft_info.update = update_list;
+
+    __wt_cache_page_inmem_incr(session, page, total_size);
 
     return (0);
 
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index fcabc73ac45..9a0d1f8630c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -215,6 +215,7 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
     __wt_ovfl_discard_free(session, page);
 
     __wt_free(session, page->modify->ovfl_track);
+    __wt_free(session, page->modify->page_del);
     __wt_spin_destroy(session, &page->modify->page_lock);
 
     __wt_free(session, page->modify);
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 1cc2db1294e..65528f2fff6 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -738,7 +738,6 @@ __inmem_col_var(
 static int
 __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
 {
-    WT_BTREE *btree;
     WT_CELL_UNPACK_ADDR unpack;
     WT_DECL_ITEM(current);
     WT_DECL_RET;
@@ -747,8 +746,6 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
     uint32_t hint;
     bool overflow_keys;
 
-    btree = S2BT(session);
-
     WT_RET(__wt_scr_alloc(session, 0, &current));
 
     /*
@@ -810,16 +807,6 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
             }
             WT_REF_SET_STATE(ref, WT_REF_DELETED);
 
-            /*
-             * If the tree is already dirty and so will be written, mark the page dirty. (We want to
-             * free the deleted pages, but if the handle is read-only or if the application never
-             * modifies the tree, we're not able to do so.)
-             */
-            if (btree->modified) {
-                WT_ERR(__wt_page_modify_init(session, page));
-                __wt_page_only_modify_set(session, page);
-            }
-
             ref->addr = unpack.cell;
             ++refp;
             break;
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index c77f409694d..2f6f4e3eb88 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -95,6 +95,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
     WT_DECL_RET;
     WT_ITEM tmp;
     WT_PAGE *notused;
+    WT_PAGE_DELETED *del;
     uint32_t page_flags;
     uint8_t previous_state;
     bool prepare;
@@ -155,15 +156,32 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
     if (prepare)
         WT_ERR(__wt_page_inmem_prepare(session, ref));
 
-skip_read:
     /*
      * In the case of a fast delete, move all of the page's records to a deleted state based on the
      * fast-delete information. Skip for special commands that don't care about an in-memory state.
+     *
+     * Note: there are three possible cases - the state was WT_REF_DELETED and ft_info.del was NULL;
+     * the state was WT_REF_DELETED and ft_info.del was non-NULL; and the state was WT_REF_DISK and
+     * the parent page cell was a WT_CELL_ADDR_DEL cell. The last is only valid in a readonly tree.
+     *
+     * ft_info.del gets cleared and set to NULL if the deletion is found to be globally visible;
+     * this can happen in any of several places.
      */
-    if (previous_state == WT_REF_DELETED &&
+    del = NULL;
+    if (previous_state == WT_REF_DISK) {
+        WT_ASSERT(session, ref->ft_info.del == NULL);
+        if (addr.del_set) {
+            WT_ASSERT(session, F_ISSET(S2BT(session), WT_BTREE_READONLY));
+            del = &addr.del;
+        }
+    } else
+        del = ref->ft_info.del;
+
+    if ((previous_state == WT_REF_DELETED || del != NULL) &&
       !F_ISSET(S2BT(session), WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
-        WT_ERR(__wt_delete_page_instantiate(session, ref));
+        WT_ERR(__wt_delete_page_instantiate(session, ref, del));
 
+skip_read:
     F_CLR(ref, WT_REF_FLAG_READING);
     WT_REF_SET_STATE(ref, WT_REF_MEM);
 
@@ -225,7 +243,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
          ;) {
         switch (current_state = ref->state) {
         case WT_REF_DELETED:
-            if (LF_ISSET(WT_READ_NO_WAIT))
+            /* Optionally limit reads to cache-only. */
+            if (LF_ISSET(WT_READ_CACHE | WT_READ_NO_WAIT))
                 return (WT_NOTFOUND);
             if (LF_ISSET(WT_READ_SKIP_DELETED) &&
               __wt_delete_page_skip(session, ref, !F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)))
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 2c6f8449e3c..1fd13c47fe0 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1271,8 +1271,15 @@ __split_parent_climb(WT_SESSION_IMPL *session, WT_PAGE *page)
      * to a different part of the tree where it will be written; in other words, in one part of the
      * tree we'll skip the newly created insert split chunk, but we'll write it upon finding it in a
      * different part of the tree.
-     */
-    if (__wt_btree_syncing_by_other_session(session)) {
+     *
+     * Historically we allowed checkpoint itself to trigger an internal split here. That wasn't
+     * correct, since if that split climbs the tree above the immediate parent the checkpoint walk
+     * will potentially miss some internal pages. This is wrong as checkpoint needs to reconcile the
+     * entire internal tree structure. Non checkpoint cursor traversal doesn't care the internal
+     * tree structure as they just want to get the next leaf page correctly. Therefore, it is OK to
+     * split concurrently to cursor operations.
+     */
+    if (WT_BTREE_SYNCING(S2BT(session))) {
         __split_internal_unlock(session, page);
         return (0);
     }
diff --git a/src/third_party/wiredtiger/src/config/config_collapse.c b/src/third_party/wiredtiger/src/config/config_collapse.c
index 19879c597b1..17a8406e133 100644
--- a/src/third_party/wiredtiger/src/config/config_collapse.c
+++ b/src/third_party/wiredtiger/src/config/config_collapse.c
@@ -40,14 +40,10 @@ __wt_config_collapse(WT_SESSION_IMPL *session, const char **cfg, char **config_r
             WT_ERR_MSG(session, EINVAL, "Invalid configuration key found: '%s'", k.str);
         WT_ERR(__wt_config_get(session, cfg, &k, &v));
         /* Include the quotes around string keys/values. */
-        if (k.type == WT_CONFIG_ITEM_STRING) {
-            --k.str;
-            k.len += 2;
-        }
-        if (v.type == WT_CONFIG_ITEM_STRING) {
-            --v.str;
-            v.len += 2;
-        }
+        if (k.type == WT_CONFIG_ITEM_STRING)
+            WT_CONFIG_PRESERVE_QUOTES(session, &k);
+        if (v.type == WT_CONFIG_ITEM_STRING)
+            WT_CONFIG_PRESERVE_QUOTES(session, &v);
         WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,", (int)k.len, k.str, (int)v.len, v.str));
     }
 
@@ -121,14 +117,10 @@ __config_merge_scan(
             WT_ERR_MSG(session, EINVAL, "Invalid configuration key found: '%s'", k.str);
 
         /* Include the quotes around string keys/values. */
-        if (k.type == WT_CONFIG_ITEM_STRING) {
-            --k.str;
-            k.len += 2;
-        }
-        if (v.type == WT_CONFIG_ITEM_STRING) {
-            --v.str;
-            v.len += 2;
-        }
+        if (k.type == WT_CONFIG_ITEM_STRING)
+            WT_CONFIG_PRESERVE_QUOTES(session, &k);
+        if (v.type == WT_CONFIG_ITEM_STRING)
+            WT_CONFIG_PRESERVE_QUOTES(session, &v);
 
         /*
          * !!!
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 31d4de2953c..7e1cfbfdaab 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -62,7 +62,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_compatibility_sub
 static const WT_CONFIG_CHECK confchk_wiredtiger_open_debug_mode_subconfigs[] = {
   {"checkpoint_retention", "int", NULL, "min=0,max=1024", NULL, 0},
   {"corruption_abort", "boolean", NULL, NULL, NULL, 0},
-  {"cursor_copy", "boolean", NULL, NULL, NULL, 0}, {"eviction", "boolean", NULL, NULL, NULL, 0},
+  {"cursor_copy", "boolean", NULL, NULL, NULL, 0},
+  {"cursor_reposition", "boolean", NULL, NULL, NULL, 0},
+  {"eviction", "boolean", NULL, NULL, NULL, 0},
   {"flush_checkpoint", "boolean", NULL, NULL, NULL, 0},
   {"log_retention", "int", NULL, "min=0,max=1024", NULL, 0},
   {"realloc_exact", "boolean", NULL, NULL, NULL, 0},
@@ -112,9 +114,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_statistics_log_su
   {"wait", "int", NULL, "min=0,max=100000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
 
 static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs[] = {
-  {"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
-  {"object_target_size", "int", NULL, "min=100K,max=10TB", NULL, 0},
-  {NULL, NULL, NULL, NULL, NULL, 0}};
+  {"local_retention", "int", NULL, "min=0,max=10000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
 
 static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
   {"block_cache", "category", NULL, NULL, confchk_wiredtiger_open_block_cache_subconfigs, 12},
@@ -124,7 +124,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
   {"checkpoint", "category", NULL, NULL, confchk_wiredtiger_open_checkpoint_subconfigs, 2},
   {"compatibility", "category", NULL, NULL,
     confchk_WT_CONNECTION_reconfigure_compatibility_subconfigs, 1},
-  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 11},
+  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 12},
   {"error_prefix", "string", NULL, NULL, NULL, 0},
   {"eviction", "category", NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2},
   {"eviction_checkpoint_target", "int", NULL, "min=0,max=10TB", NULL, 0},
@@ -151,7 +151,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
   {"statistics_log", "category", NULL, NULL,
     confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5},
   {"tiered_storage", "category", NULL, NULL,
-    confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs, 2},
+    confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs, 1},
   {"timing_stress_for_test", "list", NULL,
     "choices=[\"aggressive_sweep\",\"backup_rename\","
     "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
@@ -291,7 +291,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create_tiered_storage_subconfigs
   {"bucket_prefix", "string", NULL, NULL, NULL, 0},
   {"cache_directory", "string", NULL, NULL, NULL, 0},
   {"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
-  {"name", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+  {"name", "string", NULL, NULL, NULL, 0}, {"object_target_size", "int", NULL, "min=0", NULL, 0},
+  {NULL, NULL, NULL, NULL, NULL, 0}};
 
 static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
   {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
@@ -335,7 +336,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
   {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
   {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
   {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
-    6},
+    7},
   {"type", "string", NULL, NULL, NULL, 0},
   {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
   {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
@@ -493,7 +494,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = {
   {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
   {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
   {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
-    6},
+    7},
   {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
   {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
   {"write_timestamp_usage", "string", NULL,
@@ -545,7 +546,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
   {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
   {"tiered_object", "boolean", NULL, NULL, NULL, 0},
   {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
-    6},
+    7},
   {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
   {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
   {"version", "string", NULL, NULL, NULL, 0},
@@ -611,7 +612,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
   {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
   {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
   {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
-    6},
+    7},
   {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
   {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
   {"write_timestamp_usage", "string", NULL,
@@ -664,7 +665,7 @@ static const WT_CONFIG_CHECK confchk_object_meta[] = {
   {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
   {"tiered_object", "boolean", NULL, NULL, NULL, 0},
   {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
-    6},
+    7},
   {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
   {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
   {"version", "string", NULL, NULL, NULL, 0},
@@ -733,7 +734,7 @@ static const WT_CONFIG_CHECK confchk_tier_meta[] = {
   {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
   {"tiered_object", "boolean", NULL, NULL, NULL, 0},
   {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
-    6},
+    7},
   {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
   {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
   {"version", "string", NULL, NULL, NULL, 0},
@@ -787,7 +788,7 @@ static const WT_CONFIG_CHECK confchk_tiered_meta[] = {
   {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
   {"tiered_object", "boolean", NULL, NULL, NULL, 0},
   {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
-    6},
+    7},
   {"tiers", "list", NULL, NULL, NULL, 0},
   {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
   {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
@@ -833,9 +834,7 @@ static const WT_CONFIG_CHECK confchk_tiered_storage_subconfigs[] = {
   {"cache_directory", "string", NULL, NULL, NULL, 0},
   {"interval", "int", NULL, "min=1,max=1000", NULL, 0},
   {"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
-  {"name", "string", NULL, NULL, NULL, 0},
-  {"object_target_size", "int", NULL, "min=100K,max=10TB", NULL, 0},
-  {NULL, NULL, NULL, NULL, NULL, 0}};
+  {"name", "string", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
 
 static const WT_CONFIG_CHECK confchk_wiredtiger_open_transaction_sync_subconfigs[] = {
   {"enabled", "boolean", NULL, NULL, NULL, 0},
@@ -855,7 +854,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
   {"checkpoint_sync", "boolean", NULL, NULL, NULL, 0},
   {"compatibility", "category", NULL, NULL, confchk_wiredtiger_open_compatibility_subconfigs, 3},
   {"config_base", "boolean", NULL, NULL, NULL, 0}, {"create", "boolean", NULL, NULL, NULL, 0},
-  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 11},
+  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 12},
   {"direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", NULL, 0},
   {"encryption", "category", NULL, NULL, confchk_wiredtiger_open_encryption_subconfigs, 3},
   {"error_prefix", "string", NULL, NULL, NULL, 0},
@@ -893,7 +892,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
     "\"clear\",\"tree_walk\"]",
     NULL, 0},
   {"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
-  {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+  {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 7},
   {"timing_stress_for_test", "list", NULL,
     "choices=[\"aggressive_sweep\",\"backup_rename\","
     "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
@@ -938,7 +937,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
   {"checkpoint_sync", "boolean", NULL, NULL, NULL, 0},
   {"compatibility", "category", NULL, NULL, confchk_wiredtiger_open_compatibility_subconfigs, 3},
   {"config_base", "boolean", NULL, NULL, NULL, 0}, {"create", "boolean", NULL, NULL, NULL, 0},
-  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 11},
+  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 12},
   {"direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", NULL, 0},
   {"encryption", "category", NULL, NULL, confchk_wiredtiger_open_encryption_subconfigs, 3},
   {"error_prefix", "string", NULL, NULL, NULL, 0},
@@ -976,7 +975,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
     "\"clear\",\"tree_walk\"]",
     NULL, 0},
   {"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
-  {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+  {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 7},
   {"timing_stress_for_test", "list", NULL,
     "choices=[\"aggressive_sweep\",\"backup_rename\","
     "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
@@ -1020,7 +1019,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
   {"checkpoint", "category", NULL, NULL, confchk_wiredtiger_open_checkpoint_subconfigs, 2},
   {"checkpoint_sync", "boolean", NULL, NULL, NULL, 0},
   {"compatibility", "category", NULL, NULL, confchk_wiredtiger_open_compatibility_subconfigs, 3},
-  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 11},
+  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 12},
   {"direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", NULL, 0},
   {"encryption", "category", NULL, NULL, confchk_wiredtiger_open_encryption_subconfigs, 3},
   {"error_prefix", "string", NULL, NULL, NULL, 0},
@@ -1057,7 +1056,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
     "\"clear\",\"tree_walk\"]",
     NULL, 0},
   {"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
-  {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+  {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 7},
   {"timing_stress_for_test", "list", NULL,
     "choices=[\"aggressive_sweep\",\"backup_rename\","
     "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
@@ -1099,7 +1098,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
   {"checkpoint", "category", NULL, NULL, confchk_wiredtiger_open_checkpoint_subconfigs, 2},
   {"checkpoint_sync", "boolean", NULL, NULL, NULL, 0},
   {"compatibility", "category", NULL, NULL, confchk_wiredtiger_open_compatibility_subconfigs, 3},
-  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 11},
+  {"debug_mode", "category", NULL, NULL, confchk_wiredtiger_open_debug_mode_subconfigs, 12},
   {"direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", NULL, 0},
   {"encryption", "category", NULL, NULL, confchk_wiredtiger_open_encryption_subconfigs, 3},
   {"error_prefix", "string", NULL, NULL, NULL, 0},
@@ -1136,7 +1135,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
     "\"clear\",\"tree_walk\"]",
     NULL, 0},
   {"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
-  {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+  {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 7},
   {"timing_stress_for_test", "list", NULL,
     "choices=[\"aggressive_sweep\",\"backup_rename\","
     "\"checkpoint_reserved_txnid_delay\",\"checkpoint_slow\","
@@ -1194,9 +1193,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
     "checkpoint=(log_size=0,wait=0),compatibility=(release=),"
     "debug_mode=(checkpoint_retention=0,corruption_abort=true,"
-    "cursor_copy=false,eviction=false,flush_checkpoint=false,"
-    "log_retention=0,realloc_exact=false,rollback_error=0,"
-    "slow_checkpoint=false,table_logging=false,"
+    "cursor_copy=false,cursor_reposition=false,eviction=false,"
+    "flush_checkpoint=false,log_retention=0,realloc_exact=false,"
+    "rollback_error=0,slow_checkpoint=false,table_logging=false,"
     "update_restore_evict=false),error_prefix=,"
     "eviction=(threads_max=8,threads_min=1),"
     "eviction_checkpoint_target=1,eviction_dirty_target=5,"
@@ -1211,8 +1210,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "path=\".\"),shared_cache=(chunk=10MB,name=,quota=0,reserve=0,"
     "size=500MB),statistics=none,statistics_log=(json=false,"
     "on_close=false,sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
-    "tiered_storage=(local_retention=300,object_target_size=10M),"
-    "timing_stress_for_test=,verbose=[]",
+    "tiered_storage=(local_retention=300),timing_stress_for_test=,"
+    "verbose=[]",
     confchk_WT_CONNECTION_reconfigure, 30},
   {"WT_CONNECTION.rollback_to_stable", "", NULL, 0}, {"WT_CONNECTION.set_file_system", "", NULL, 0},
   {"WT_CONNECTION.set_timestamp",
@@ -1266,8 +1265,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "prefix_compression=false,prefix_compression_min=4,source=,"
     "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,local_retention=300,name=),type=file,"
-    "value_format=u,verbose=[],write_timestamp_usage=none",
+    "cache_directory=,local_retention=300,name=,object_target_size=0)"
+    ",type=file,value_format=u,verbose=[],write_timestamp_usage=none",
     confchk_WT_SESSION_create, 48},
   {"WT_SESSION.drop",
     "checkpoint_wait=true,force=false,lock_wait=true,"
@@ -1338,8 +1337,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "prefix_compression=false,prefix_compression_min=4,"
     "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,local_retention=300,name=),value_format=u,"
-    "verbose=[],write_timestamp_usage=none",
+    "cache_directory=,local_retention=300,name=,object_target_size=0)"
+    ",value_format=u,verbose=[],write_timestamp_usage=none",
     confchk_file_config, 40},
   {"file.meta",
     "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
@@ -1358,8 +1357,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "prefix_compression_min=4,readonly=false,split_deepen_min_child=0"
     ",split_deepen_per_child=0,split_pct=90,tiered_object=false,"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,local_retention=300,name=),value_format=u,"
-    "verbose=[],version=(major=0,minor=0),write_timestamp_usage=none",
+    "cache_directory=,local_retention=300,name=,object_target_size=0)"
+    ",value_format=u,verbose=[],version=(major=0,minor=0),"
+    "write_timestamp_usage=none",
     confchk_file_meta, 47},
   {"index.meta",
     "app_metadata=,assert=(commit_timestamp=none,"
@@ -1388,8 +1388,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "os_cache_max=0,prefix_compression=false,prefix_compression_min=4"
     ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,local_retention=300,name=),value_format=u,"
-    "verbose=[],write_timestamp_usage=none",
+    "cache_directory=,local_retention=300,name=,object_target_size=0)"
+    ",value_format=u,verbose=[],write_timestamp_usage=none",
     confchk_lsm_meta, 44},
   {"object.meta",
     "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
@@ -1408,8 +1408,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "prefix_compression_min=4,readonly=false,split_deepen_min_child=0"
     ",split_deepen_per_child=0,split_pct=90,tiered_object=false,"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,local_retention=300,name=),value_format=u,"
-    "verbose=[],version=(major=0,minor=0),write_timestamp_usage=none",
+    "cache_directory=,local_retention=300,name=,object_target_size=0)"
+    ",value_format=u,verbose=[],version=(major=0,minor=0),"
+    "write_timestamp_usage=none",
     confchk_object_meta, 49},
   {"table.meta",
     "app_metadata=,assert=(commit_timestamp=none,"
@@ -1434,9 +1435,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "prefix_compression=false,prefix_compression_min=4,readonly=false"
     ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
     "tiered_object=false,tiered_storage=(auth_token=,bucket=,"
-    "bucket_prefix=,cache_directory=,local_retention=300,name=),"
-    "value_format=u,verbose=[],version=(major=0,minor=0),"
-    "write_timestamp_usage=none",
+    "bucket_prefix=,cache_directory=,local_retention=300,name=,"
+    "object_target_size=0),value_format=u,verbose=[],version=(major=0"
+    ",minor=0),write_timestamp_usage=none",
     confchk_tier_meta, 50},
   {"tiered.meta",
     "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
@@ -1456,8 +1457,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "readonly=false,split_deepen_min_child=0,split_deepen_per_child=0"
     ",split_pct=90,tiered_object=false,tiered_storage=(auth_token=,"
     "bucket=,bucket_prefix=,cache_directory=,local_retention=300,"
-    "name=),tiers=,value_format=u,verbose=[],version=(major=0,"
-    "minor=0),write_timestamp_usage=none",
+    "name=,object_target_size=0),tiers=,value_format=u,verbose=[],"
+    "version=(major=0,minor=0),write_timestamp_usage=none",
     confchk_tiered_meta, 52},
   {"wiredtiger_open",
     "backup_restore_target=,"
@@ -1470,12 +1471,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
     "compatibility=(release=,require_max=,require_min=),"
     "config_base=true,create=false,debug_mode=(checkpoint_retention=0"
-    ",corruption_abort=true,cursor_copy=false,eviction=false,"
-    "flush_checkpoint=false,log_retention=0,realloc_exact=false,"
-    "rollback_error=0,slow_checkpoint=false,table_logging=false,"
-    "update_restore_evict=false),direct_io=,encryption=(keyid=,name=,"
-    "secretkey=),error_prefix=,eviction=(threads_max=8,threads_min=1)"
-    ",eviction_checkpoint_target=1,eviction_dirty_target=5,"
+    ",corruption_abort=true,cursor_copy=false,cursor_reposition=false"
+    ",eviction=false,flush_checkpoint=false,log_retention=0,"
+    "realloc_exact=false,rollback_error=0,slow_checkpoint=false,"
+    "table_logging=false,update_restore_evict=false),direct_io=,"
+    "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+    "eviction=(threads_max=8,threads_min=1),"
+    "eviction_checkpoint_target=1,eviction_dirty_target=5,"
     "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
     ",eviction_updates_target=0,eviction_updates_trigger=0,"
     "exclusive=false,extensions=,file_extend=,"
@@ -1494,11 +1496,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "statistics=none,statistics_log=(json=false,on_close=false,"
     "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,interval=60,local_retention=300,name=,"
-    "object_target_size=10M),timing_stress_for_test=,"
-    "transaction_sync=(enabled=false,method=fsync),"
-    "use_environment=true,use_environment_priv=false,verbose=[],"
-    "verify_metadata=false,write_through=",
+    "cache_directory=,interval=60,local_retention=300,name=),"
+    "timing_stress_for_test=,transaction_sync=(enabled=false,"
+    "method=fsync),use_environment=true,use_environment_priv=false,"
+    "verbose=[],verify_metadata=false,write_through=",
     confchk_wiredtiger_open, 58},
   {"wiredtiger_open_all",
     "backup_restore_target=,"
@@ -1511,12 +1512,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
     "compatibility=(release=,require_max=,require_min=),"
     "config_base=true,create=false,debug_mode=(checkpoint_retention=0"
-    ",corruption_abort=true,cursor_copy=false,eviction=false,"
-    "flush_checkpoint=false,log_retention=0,realloc_exact=false,"
-    "rollback_error=0,slow_checkpoint=false,table_logging=false,"
-    "update_restore_evict=false),direct_io=,encryption=(keyid=,name=,"
-    "secretkey=),error_prefix=,eviction=(threads_max=8,threads_min=1)"
-    ",eviction_checkpoint_target=1,eviction_dirty_target=5,"
+    ",corruption_abort=true,cursor_copy=false,cursor_reposition=false"
+    ",eviction=false,flush_checkpoint=false,log_retention=0,"
+    "realloc_exact=false,rollback_error=0,slow_checkpoint=false,"
+    "table_logging=false,update_restore_evict=false),direct_io=,"
+    "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+    "eviction=(threads_max=8,threads_min=1),"
+    "eviction_checkpoint_target=1,eviction_dirty_target=5,"
     "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
     ",eviction_updates_target=0,eviction_updates_trigger=0,"
     "exclusive=false,extensions=,file_extend=,"
@@ -1535,11 +1537,11 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "statistics=none,statistics_log=(json=false,on_close=false,"
     "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,interval=60,local_retention=300,name=,"
-    "object_target_size=10M),timing_stress_for_test=,"
-    "transaction_sync=(enabled=false,method=fsync),"
-    "use_environment=true,use_environment_priv=false,verbose=[],"
-    "verify_metadata=false,version=(major=0,minor=0),write_through=",
+    "cache_directory=,interval=60,local_retention=300,name=),"
+    "timing_stress_for_test=,transaction_sync=(enabled=false,"
+    "method=fsync),use_environment=true,use_environment_priv=false,"
+    "verbose=[],verify_metadata=false,version=(major=0,minor=0),"
+    "write_through=",
     confchk_wiredtiger_open_all, 59},
   {"wiredtiger_open_basecfg",
     "backup_restore_target=,"
@@ -1552,9 +1554,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
     "compatibility=(release=,require_max=,require_min=),"
     "debug_mode=(checkpoint_retention=0,corruption_abort=true,"
-    "cursor_copy=false,eviction=false,flush_checkpoint=false,"
-    "log_retention=0,realloc_exact=false,rollback_error=0,"
-    "slow_checkpoint=false,table_logging=false,"
+    "cursor_copy=false,cursor_reposition=false,eviction=false,"
+    "flush_checkpoint=false,log_retention=0,realloc_exact=false,"
+    "rollback_error=0,slow_checkpoint=false,table_logging=false,"
     "update_restore_evict=false),direct_io=,encryption=(keyid=,name=,"
     "secretkey=),error_prefix=,eviction=(threads_max=8,threads_min=1)"
     ",eviction_checkpoint_target=1,eviction_dirty_target=5,"
@@ -1575,10 +1577,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "statistics=none,statistics_log=(json=false,on_close=false,"
     "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,interval=60,local_retention=300,name=,"
-    "object_target_size=10M),timing_stress_for_test=,"
-    "transaction_sync=(enabled=false,method=fsync),verbose=[],"
-    "verify_metadata=false,version=(major=0,minor=0),write_through=",
+    "cache_directory=,interval=60,local_retention=300,name=),"
+    "timing_stress_for_test=,transaction_sync=(enabled=false,"
+    "method=fsync),verbose=[],verify_metadata=false,version=(major=0,"
+    "minor=0),write_through=",
     confchk_wiredtiger_open_basecfg, 53},
   {"wiredtiger_open_usercfg",
     "backup_restore_target=,"
@@ -1591,9 +1593,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
     "compatibility=(release=,require_max=,require_min=),"
     "debug_mode=(checkpoint_retention=0,corruption_abort=true,"
-    "cursor_copy=false,eviction=false,flush_checkpoint=false,"
-    "log_retention=0,realloc_exact=false,rollback_error=0,"
-    "slow_checkpoint=false,table_logging=false,"
+    "cursor_copy=false,cursor_reposition=false,eviction=false,"
+    "flush_checkpoint=false,log_retention=0,realloc_exact=false,"
+    "rollback_error=0,slow_checkpoint=false,table_logging=false,"
     "update_restore_evict=false),direct_io=,encryption=(keyid=,name=,"
     "secretkey=),error_prefix=,eviction=(threads_max=8,threads_min=1)"
     ",eviction_checkpoint_target=1,eviction_dirty_target=5,"
@@ -1614,10 +1616,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
     "statistics=none,statistics_log=(json=false,on_close=false,"
     "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
     "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
-    "cache_directory=,interval=60,local_retention=300,name=,"
-    "object_target_size=10M),timing_stress_for_test=,"
-    "transaction_sync=(enabled=false,method=fsync),verbose=[],"
-    "verify_metadata=false,write_through=",
+    "cache_directory=,interval=60,local_retention=300,name=),"
+    "timing_stress_for_test=,transaction_sync=(enabled=false,"
+    "method=fsync),verbose=[],verify_metadata=false,write_through=",
     confchk_wiredtiger_open_usercfg, 52},
   {NULL, NULL, NULL, 0}};
 
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 31e06f4ec1b..5cc8e30c78f 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1975,6 +1975,12 @@ __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[])
     else
         FLD_CLR(conn->debug_flags, WT_CONN_DEBUG_CURSOR_COPY);
 
+    WT_RET(__wt_config_gets(session, cfg, "debug_mode.cursor_reposition", &cval));
+    if (cval.val)
+        FLD_SET(conn->debug_flags, WT_CONN_DEBUG_CURSOR_REPOSITION);
+    else
+        FLD_CLR(conn->debug_flags, WT_CONN_DEBUG_CURSOR_REPOSITION);
+
     WT_RET(__wt_config_gets(session, cfg, "debug_mode.eviction", &cval));
     if (cval.val)
         F_SET(cache, WT_CACHE_EVICT_DEBUG_MODE);
@@ -2262,7 +2268,7 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
     WT_CONNECTION_IMPL *conn;
     WT_DECL_RET;
     const WT_NAME_FLAG *ft;
-    uint64_t flags;
+    uint32_t flags;
 
     conn = S2C(session);
 
@@ -2371,10 +2377,8 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
     __wt_config_init(session, &parser, base_config);
     while ((ret = __wt_config_next(&parser, &k, &v)) == 0) {
         /* Fix quoting for non-trivial settings. */
-        if (v.type == WT_CONFIG_ITEM_STRING) {
-            --v.str;
-            v.len += 2;
-        }
+        if (v.type == WT_CONFIG_ITEM_STRING)
+            WT_CONFIG_PRESERVE_QUOTES(session, &v);
         WT_ERR(__wt_fprintf(session, fs, "%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str));
     }
     WT_ERR_NOTFOUND_OK(ret, false);
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index ebcb0cc272a..ff6cf27a8c6 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -970,12 +970,13 @@ restart:
  *     Update the open dhandles write generation, run write generation and base write generation
  *     number.
  */
-void
+int
 __wt_dhandle_update_write_gens(WT_SESSION_IMPL *session)
 {
     WT_BTREE *btree;
     WT_CONNECTION_IMPL *conn;
     WT_DATA_HANDLE *dhandle;
+    WT_DECL_RET;
 
     conn = S2C(session);
 
@@ -988,15 +989,26 @@ __wt_dhandle_update_write_gens(WT_SESSION_IMPL *session)
             continue;
         btree = (WT_BTREE *)dhandle->handle;
 
-        WT_ASSERT(session, btree != NULL);
-
         /*
          * Initialize the btree write generation numbers after rollback to stable so that the
          * transaction ids of the pages will be reset when loaded from disk to memory.
          */
         btree->write_gen = btree->base_write_gen = btree->run_write_gen =
           WT_MAX(btree->write_gen, conn->base_write_gen);
+
+        /*
+         * Clear out any transaction IDs that might have been already loaded and cached, as they are
+         * now outdated. Currently this is only known to happen in the page_del structure associated
+         * with truncated pages.
+         */
+        if (btree->root.page == NULL)
+            continue;
+
+        WT_WITH_BTREE(session, btree, ret = __wt_delete_redo_window_cleanup(session));
+        WT_RET(ret);
     }
+
+    return (0);
 }
 
 /*
diff --git a/src/third_party/wiredtiger/src/conn/conn_tiered.c b/src/third_party/wiredtiger/src/conn/conn_tiered.c
index 5337f55f2dd..af98e6516ba 100644
--- a/src/third_party/wiredtiger/src/conn/conn_tiered.c
+++ b/src/third_party/wiredtiger/src/conn/conn_tiered.c
@@ -295,6 +295,8 @@ err:
     __wt_scr_free(session, &buf);
     if (tracking)
         WT_TRET(__wt_meta_track_off(session, true, ret != 0));
+    if (ret == ENOENT)
+        ret = 0;
     return (ret);
 }
 
@@ -338,32 +340,34 @@ __tier_do_operation(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint32_t id, co
     else {
         /* WT_TIERED_WORK_FLUSH */
         /* This call make take a while, and may fail due to network timeout. */
-        WT_ERR(storage_source->ss_flush(
-          storage_source, &session->iface, bucket_fs, local_name, tmp, NULL));
-
-        WT_WITH_CHECKPOINT_LOCK(session,
-          WT_WITH_SCHEMA_LOCK(
-            session, ret = __tier_flush_meta(session, tiered, local_uri, obj_uri)));
+        ret = storage_source->ss_flush(
+          storage_source, &session->iface, bucket_fs, local_name, tmp, NULL);
+        if (ret == 0)
+            WT_WITH_CHECKPOINT_LOCK(session,
+              WT_WITH_SCHEMA_LOCK(
+                session, ret = __tier_flush_meta(session, tiered, local_uri, obj_uri)));
         /*
          * If a user did a flush_tier with sync off, it is possible that a drop happened before the
-         * flush work unit was processed. Ignore non-existent errors.
+         * flush work unit was processed. Ignore non-existent errors from either previous call.
          */
         if (ret == ENOENT)
             ret = 0;
-        WT_ERR(ret);
+        else {
+            WT_ERR(ret);
 
-        /*
-         * After successful flushing, push a work unit to perform whatever post-processing the
-         * shared storage wants to do for this object. Note that this work unit is unrelated to the
-         * drop local work unit below. They do not need to be in any order and do not interfere with
-         * each other.
-         */
-        WT_ERR(__wt_tiered_put_flush_finish(session, tiered, id));
-        /*
-         * After successful flushing, push a work unit to drop the local object in the future. The
-         * object will be removed locally after the local retention period expires.
-         */
-        WT_ERR(__wt_tiered_put_drop_local(session, tiered, id));
+            /*
+             * After successful flushing, push a work unit to perform whatever post-processing the
+             * shared storage wants to do for this object. Note that this work unit is unrelated to
+             * the drop local work unit below. They do not need to be in any order and do not
+             * interfere with each other.
+             */
+            WT_ERR(__wt_tiered_put_flush_finish(session, tiered, id));
+            /*
+             * After successful flushing, push a work unit to drop the local object in the future.
+             * The object will be removed locally after the local retention period expires.
+             */
+            WT_ERR(__wt_tiered_put_drop_local(session, tiered, id));
+        }
     }
 
 err:
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index cb898aacc1c..488a3ba27c0 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -345,7 +345,7 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval)
             __wt_verbose(session, WT_VERB_BACKUP, "Free blk[%u] entry", i);
             break;
         }
-        __wt_verbose(session, WT_VERB_BACKUP, "Entry blk[%u] has flags 0x%" PRIx64, i, blk->flags);
+        __wt_verbose(session, WT_VERB_BACKUP, "Entry blk[%u] has flags 0x%" PRIx8, i, blk->flags);
     }
     /*
      * We didn't find an entry. This should not happen.
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index 9eeb0ec0fbe..20cc8145c48 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -1153,6 +1153,9 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, c
      * history store. (This is not normally done by applications; but it is done by a couple tests,
      * and furthermore any internally opened history store cursors come through here, so this case
      * does matter.)
+     *
+     * This initialization is repeated when opening the underlying data handle, which is ugly, but
+     * cleanup requires the initialization have happened even if not opening a checkpoint handle.
      */
     ckpt_snapshot.ckpt_id = 0;
     ckpt_snapshot.oldest_ts = WT_TS_NONE;
diff --git a/src/third_party/wiredtiger/src/docs/backup.dox b/src/third_party/wiredtiger/src/docs/backup.dox
index ea5a1b9e14f..9aee1284e83 100644
--- a/src/third_party/wiredtiger/src/docs/backup.dox
+++ b/src/third_party/wiredtiger/src/docs/backup.dox
@@ -240,6 +240,16 @@ An example of opening the backup data source for log-based incremental backup:
 
 @snippet ex_all.c incremental backup
 
+@section backup_export Export tables using backup cursor
+
+The URI \c backup:export can be used to generate \c WiredTiger.export - a text file that
+contains metadata for all objects in the database. The file can be specified as the value
+for \c metadata_file to import a table. See WT_SESSION::create for more details.
+The cursor operates like a normal backup cursor, it can be used to iterate over all files needed
+for a backup. The main difference is that ::wiredtiger_open will
+ignore \c WiredTiger.export, it will not try to start the system using the file nor delete it.
+As with all backup cursors, there can only be one backup cursor of any type open at a time.
+
 @section backup_o_direct Backup and O_DIRECT
 
 Many Linux systems do not support mixing \c O_DIRECT and memory mapping
diff --git a/src/third_party/wiredtiger/src/docs/cursor-ops.dox b/src/third_party/wiredtiger/src/docs/cursor-ops.dox
index 1804e391a20..256b446d84d 100644
--- a/src/third_party/wiredtiger/src/docs/cursor-ops.dox
+++ b/src/third_party/wiredtiger/src/docs/cursor-ops.dox
@@ -78,10 +78,11 @@ can traverse through is particularly useful when there are a larger number
 of records present outside of the key range which are not visible to the
 search_near caller.
 
-Cursor positions do not survive transactions: cursors that are open during
-WT_SESSION::begin_transaction, WT_SESSION::commit_transaction or
-WT_SESSION::rollback_transaction will lose their position as if
-WT_CURSOR::reset was called.
+After a transaction is successfully committed, cursors in the session retain
+their position, as well as any currently set keys or values they may have.
+If a transaction is rolled back for any reason, cursors in the session are
+reset (as if the WT_CURSOR::reset method was called), discarding any cursor
+position as well as any currently set keys or values.
 
 Cursors can be configured to move to a random position with WT_CURSOR::next
 is called, see @subpage cursor_random for details.
@@ -146,7 +147,7 @@ WT_CURSOR::search and WT_CURSOR::search_near; the operations that modify
 the underlying data are WT_CURSOR::insert, WT_CURSOR::update and
 WT_CURSOR::remove.
 
-If a cursor operation fails (for example, due to a ::WT_ROLLBACK error),
+If a cursor operation fails (for example, due to a ::WT_NOTFOUND error),
 it may be retried without calling WT_CURSOR::set_key or
 WT_CURSOR::set_value again.  That is, the cursor may still reference the
 application-supplied memory until the cursor is successfully positioned,
diff --git a/src/third_party/wiredtiger/src/docs/data-sources.dox b/src/third_party/wiredtiger/src/docs/data-sources.dox
index da9a8673cf0..789491beca7 100644
--- a/src/third_party/wiredtiger/src/docs/data-sources.dox
+++ b/src/third_party/wiredtiger/src/docs/data-sources.dox
@@ -22,9 +22,16 @@ Some administrative tasks can be accomplished using the following special
 cursor types that give access to data managed by WiredTiger:
 <table>
 @hrow{URI, Type, Notes}
-@row{<tt>backup:[query_id]</tt>,
-	backup cursor (optionally only returning block incremental ids if
-	<code>query_id</code> is appended),
+@row{<tt>backup:</tt>,
+	backup cursor,
+	key=<code>string</code>\, see @ref backup for details}
+@row{<tt>backup:export</tt>,
+	export cursor that generates a text file WiredTiger.export. The file contains metadata for all
+	objects in the database. It can be used in the import process as the value for
+	<code>metadata_file</code> configuration option,
+	key=<code>string</code>\, see @ref backup_export for details}
+@row{<tt>backup:query_id</tt>,
+	backup cursor that only returns block incremental ids,
 	key=<code>string</code>\, see @ref backup for details}
 @row{<code>log:</code>,
 	log cursor,
diff --git a/src/third_party/wiredtiger/src/docs/explain-isolation.dox b/src/third_party/wiredtiger/src/docs/explain-isolation.dox
index ecb8175e51c..d07074f8e31 100644
--- a/src/third_party/wiredtiger/src/docs/explain-isolation.dox
+++ b/src/third_party/wiredtiger/src/docs/explain-isolation.dox
@@ -10,8 +10,8 @@ phantoms are possible.
 Transactions cannot see changes made by other transactions before those
 transactions are committed.  Dirty reads are not possible;
 non-repeatable reads and phantoms are possible.  Committed changes from
-concurrent transactions become visible when no cursor is positioned in
-the read-committed transaction.
+concurrent transactions become visible periodically during the lifecycle
+of the transaction.
 
 - <code>snapshot</code>:
 Transactions read the versions of records committed before the transaction
diff --git a/src/third_party/wiredtiger/src/docs/timestamp-txn.dox b/src/third_party/wiredtiger/src/docs/timestamp-txn.dox
index 9bafa55f2f8..a900ed763fd 100644
--- a/src/third_party/wiredtiger/src/docs/timestamp-txn.dox
+++ b/src/third_party/wiredtiger/src/docs/timestamp-txn.dox
@@ -65,6 +65,66 @@ the library will log an error message and drop core at the failing check.
 These are best-effort checks by WiredTiger, and there are cases where
 application misbehavior will not be detected.
 
+@section timestamp_txn_api_commit_timestamp Setting the transaction's commit timestamp
+
+The \c commit time is the time at which other transactions with appropriately set
+read timestamps will see the transaction's updates.
+
+The commit timestamp can be set at any point in the transaction's lifecycle.
+For prepared transactions, however, it can only be set after the transaction
+has been successfully prepared.
+
+\warning Commit (and prepare) timestamps must not be set in the past
+of any read timestamp
+that has ever been used. This rule is enforced by assertions in diagnostic
+builds, but if applications violate this rule in non-diagnostic builds, data
+consistency can be violated.
+Similarly, because reading without a read timestamp reads the latest
+values for all keys, one must not commit into the past of such a
+transaction.
+
+Applications using timestamps usually specify a timestamp to the
+WT_SESSION::commit_transaction method to set the commit time for all updates in
+the transaction.
+
+For prepared transactions, the commit timestamp must not be before the prepare
+timestamp. Otherwise, the commit timestamp must be after the stable timestamp.
+
+@section timestamp_txn_api_commit_multi_timestamp Setting multiple commit timestamps
+
+Applications may set different commit timestamps for different updates in a
+single transaction by calling WT_SESSION::timestamp_transaction repeatedly to
+set a new commit timestamp between updates in the transaction.  Each new commit
+timestamp is applied to any subsequent updates. This gives applications the
+ability to commit updates that take effect at different times;
+that is, it is possible to create chains of updates where each
+update appears at a different time to readers.  For transactions that set
+multiple commit timestamps, the first commit timestamp set is also required to
+be the earliest: the second and subsequent commit timestamps may not be
+earlier than the first commit timestamp.  This feature is not compatible with
+prepared transactions, which must use only a single commit timestamp.
+
+This functionality is generally available as a mechanism to allow an optimized
+implementation of re-creating a timestamp view of a data set. For example, in
+a MongoDB replica set, content is generated on one node where transactions are
+assigned a single commit timestamp, that content is re-created on each other
+member in a replica set. When re-creating the content multiple changes from
+the original node are batched together into a single WiredTiger transaction.
+
+@section timestamp_txn_api_read_timestamp Setting the transaction's read timestamp
+
+Setting the transaction's read timestamp causes a transaction to not see any
+commits with a newer timestamp. (Updates may still conflict with commits having
+a newer timestamp, of course.),
+
+The read timestamp may be set to any time equal to or after the system's
+\c oldest timestamp.
+
+This restriction is enforced and applications can rely on an error return to
+detect attempts to set the read timestamp older than the \c oldest timestamp.
+
+The read timestamp may only be set once in the lifetime of a transaction.
+
 @section timestamp_txn_api_query Querying transaction timestamp information
 
 The following table lists the timestamps that can be queried using
@@ -119,55 +179,4 @@ points in the transaction's lifetime, using WT_SESSION::timestamp_transaction:
 | prepare_timestamp | > stable and >= any system read timestamp | the transaction's prepare timestamp, see @ref timestamp_prepare for details |
 | read_timestamp | >= oldest | the transaction's read timestamp, see @ref timestamp_txn_api_read_timestamp for details |
 
-@section timestamp_txn_api_commit_timestamp Setting the transaction's commit timestamp
-
-The \c commit time is the time at which other transactions with appropriately set
-read timestamps will see the transaction's updates.
-
-The commit timestamp can be set at any point in the transaction's lifecycle.
-For prepared transactions, however, it can only be set after the transaction
-has been successfully prepared.
-
-\warning Commit (and prepare) timestamps must not be set in the past
-of any read timestamp
-that has ever been used. This rule is enforced by assertions in diagnostic
-builds, but if applications violate this rule in non-diagnostic builds, data
-consistency can be violated.
-Similarly, because reading without a read timestamp reads the latest
-values for all keys, one must not commit into the past of such a
-transaction.
-
-Applications using timestamps usually specify a timestamp to the
-WT_SESSION::commit_transaction method to set the commit time for all updates in
-the transaction.
-
-Applications may set different commit timestamps for different updates in a
-single transaction by calling WT_SESSION::timestamp_transaction repeatedly to
-set a new commit timestamp between updates in the transaction.  Each new commit
-timestamp is applied to any subsequent updates. This gives applications the
-ability to commit updates that take effect at different times;
-that is, it is possible to create chains of updates where each
-update appears at a different time to readers.  For transactions that set
-multiple commit timestamps, the first commit timestamp set is also required to
-be the earliest: the second and subsequent commit timestamps may not be
-earlier than the first commit timestamp.  This feature is not compatible with
-prepared transactions, which must use only a single commit timestamp.
-
-For prepared transactions, the commit timestamp must not be before the prepare
-timestamp. Otherwise, the commit timestamp must be after the stable timestamp.
-
-@section timestamp_txn_api_read_timestamp Setting the transaction's read timestamp
-
-Setting the transaction's read timestamp causes a transaction to not see any
-commits with a newer timestamp. (Updates may still conflict with commits having
-a newer timestamp, of course.),
-
-The read timestamp may be set to any time equal to or after the system's
-\c oldest timestamp.
-
-This restriction is enforced and applications can rely on an error return to
-detect attempts to set the read timestamp older than the \c oldest timestamp.
-
-The read timestamp may only be set once in the lifetime of a transaction.
-
  */
diff --git a/src/third_party/wiredtiger/src/docs/transactions_api.dox b/src/third_party/wiredtiger/src/docs/transactions_api.dox
index 3ccf2776fea..641ba2e8ed2 100644
--- a/src/third_party/wiredtiger/src/docs/transactions_api.dox
+++ b/src/third_party/wiredtiger/src/docs/transactions_api.dox
@@ -25,6 +25,12 @@ effects may be discarded by calling WT_SESSION::rollback_transaction. If
 WT_SESSION::commit_transaction returns any error, the transaction was rolled
 back, not committed.
 
+Schema changing operations are not generally transactional in WiredTiger, they
+can't be grouped together within the scope of a transaction and atomically
+committed or aborted. Think of them as one-shot transactions where the operation
+will either succeed or fail. Examples of schema changing operations are table
+create, drop and rename.
+
 A data operation executed within a transaction can fail if it conflicts with an
 operation in another concurrently running transaction.  (A conflict occurs
 between two operations when letting both of them continue would lead to a
diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox
index d1cc9902ad0..4130cecd6a4 100644
--- a/src/third_party/wiredtiger/src/docs/upgrading.dox
+++ b/src/third_party/wiredtiger/src/docs/upgrading.dox
@@ -1,6 +1,6 @@
 /*! @page upgrading Upgrading WiredTiger applications
 
-@section version_1101 Upgrading to Version 11.0.1
+@section version_1100 Upgrading to Version 11.0.0
 
 The WiredTiger 11.0 release is a complete refresh of the WiredTiger storage engine.
 Applications written to earlier versions of the WiredTiger API will require review
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 03125dd93ae..7bf8a99564f 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -458,6 +458,14 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
     WT_INTL_FOREACH_END;
 
     /*
+     * It is always OK to evict pages from checkpoint cursor trees if they don't have children, and
+     * visibility checks for pages deleted in the checkpoint aren't needed (or correct when done in
+     * eviction threads).
+     */
+    if (WT_READING_CHECKPOINT(session))
+        return (0);
+
+    /*
      * The fast check is done and there are no cursors in the child pages. Make sure the child
      * WT_REF structures pages can be discarded.
      */
@@ -692,6 +700,9 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool
 
     WT_ASSERT(session, LF_ISSET(WT_REC_VISIBLE_ALL) || F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT));
 
+    /* We should not be trying to evict using a checkpoint-cursor transaction. */
+    WT_ASSERT(session, !F_ISSET(session->txn, WT_TXN_IS_CHECKPOINT));
+
     /*
      * Reconcile the page. Force read-committed isolation level if we are using snapshots for
      * eviction workers or application threads.
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index 76ea129011a..ca419ab5143 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -305,28 +305,15 @@
 #define CURSOR_UPDATE_API_END_STAT(s, ret, api) CURSOR_UPDATE_API_END_RETRY_STAT(s, ret, true, api)
 
 /*
- * FIXME-WT-9372 The cursor reposition code has been disabled to isolate performance impact of a
- * couple of eviction bugs. We are going to introduce a debug configuration option to control
- * switching the feature on and off. This will help evaluate the need to have the repositioning
- * logic and to tune it once MongoDB enables yielding during long running transactions.
- */
-#if 0
-/*
  * Calling certain top level APIs allows for internal repositioning of cursors to facilitate
  * eviction of hot pages. These macros facilitate tracking when that is OK.
  */
-#define CURSOR_REPOSITION_ENTER(c, s)   \
-    do {                                \
-        if ((s)->api_call_counter == 1) \
-            F_SET((c), WT_CURSTD_EVICT_REPOSITION);
-
-#define CURSOR_REPOSITION_END(c, s)             \
-    if ((s)->api_call_counter == 1)             \
-        F_CLR((c), WT_CURSTD_EVICT_REPOSITION); \
-    }                                           \
-    while (0)                                   \
-        ;
-#else
-#define CURSOR_REPOSITION_ENTER(c, s)
-#define CURSOR_REPOSITION_END(c, s)
-#endif
+#define CURSOR_REPOSITION_ENTER(c, s)                                      \
+    if (FLD_ISSET(S2C(s)->debug_flags, WT_CONN_DEBUG_CURSOR_REPOSITION) && \
+      (s)->api_call_counter == 1)                                          \
+    F_SET((c), WT_CURSTD_EVICT_REPOSITION)
+
+#define CURSOR_REPOSITION_END(c, s)                                        \
+    if (FLD_ISSET(S2C(s)->debug_flags, WT_CONN_DEBUG_CURSOR_REPOSITION) && \
+      (s)->api_call_counter == 1)                                          \
+    F_CLR((c), WT_CURSTD_EVICT_REPOSITION)
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index ad4bdc5e79d..9257221ab28 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -150,20 +150,6 @@ struct __wt_addr {
 };
 
 /*
- * WT_ADDR_COPY --
- *	We have to lock the WT_REF to look at a WT_ADDR: a structure we can use to quickly get a
- * copy of the WT_REF address information.
- */
-struct __wt_addr_copy {
-    WT_TIME_AGGREGATE ta;
-
-    uint8_t type;
-
-    uint8_t addr[255 /* WT_BTREE_MAX_ADDR_COOKIE */];
-    uint8_t size;
-};
-
-/*
  * Overflow tracking for reuse: When a page is reconciled, we write new K/V overflow items. If pages
  * are reconciled multiple times, we need to know if we've already written a particular overflow
  * record (so we don't write it again), as well as if we've modified an overflow record previously
@@ -457,6 +443,10 @@ struct __wt_page_modify {
     /* Overflow record tracking for reconciliation. */
     WT_OVFL_TRACK *ovfl_track;
 
+    /* Cached page-delete information for newly instantiated deleted pages. */
+    WT_PAGE_DELETED *page_del; /* Deletion information; NULL if globally visible. */
+    bool instantiated;         /* True if this is a newly instantiated page. */
+
 #define WT_PAGE_LOCK(s, p) __wt_spin_lock((s), &(p)->modify->page_lock)
 #define WT_PAGE_TRYLOCK(s, p) __wt_spin_trylock((s), &(p)->modify->page_lock)
 #define WT_PAGE_UNLOCK(s, p) __wt_spin_unlock((s), &(p)->modify->page_lock)
@@ -960,15 +950,78 @@ struct __wt_ref {
 #define ref_ikey key.ikey
 
     /*
-     * Fast-truncate information. When a WT_REF is included in a fast-truncate operation, WT_REF.del
-     * is allocated and initialized. If the page must be instantiated before the truncate becomes
-     * globally visible, WT_UPDATE structures are created for the page entries, the transaction
-     * information from WT_REF.del is migrated to those WT_UPDATE structures, and the WT_REF.del
-     * field is freed and replaced by the WT_REF.update array (needed for subsequent transaction
-     * commit/abort). Doing anything other than testing if WT_REF.del/update is non-NULL (which
-     * eviction does), requires the WT_REF be locked. If the locked WT_REF's previous state was
-     * WT_REF_DELETED, WT_REF.del is valid, if the WT_REF's previous state was an in-memory state,
-     * then WT_REF.update is valid.
+     * Fast-truncate information, written-to/read-from disk as necessary in the internal page's
+     * deleted page proxy cell. When a WT_REF first becomes part of a fast-truncate operation, the
+     * ft_info.del field is allocated and initialized.
+     *
+     * Fast-truncate pages might have to be instantiated if a thread for which the operation isn't
+     * visible accesses the page. This can happen if the operation hasn't committed yet; it can also
+     * happen if an older read transaction visits the page, and it can happen if the fast-truncate
+     * operation is included in a checkpoint and then seen later, after a restart or via a
+     * checkpoint cursor.
+     *
+     * If the page must be instantiated for any reason: (1) WT_UPDATE structures are created for the
+     * page entries, (2) the transaction information from ft_info.del is copied to those WT_UPDATE
+     * structures (making them a match for the truncate operation), (3) the ft_info.del field is
+     * discarded, and (4) the WT_REF state switches to WT_REF_MEM.
+     *
+     * If the fast-truncate operation has not yet committed, additionally the ft_info.update field
+     * is created, which is an array of references to the WT_UPDATE structures, for subsequent
+     * transaction commit/abort. (The page can split, so there needs to be some way to find all of
+     * the update structures.)
+     *
+     * Doing anything other than testing if ft_info.del or ft_info.update is non-NULL (which
+     * eviction does) requires the WT_REF be locked.
+     *
+     * Because ft_info is a union it is important to always access the correct field. It is also
+     * vital to interpret the state correctly and consider all the possible cases.
+     *
+     * The union access should be ft_info.del if the state is WT_REF_DELETED (states 1 and 2 below),
+     * and should be ft_info.update if the state is WT_REF_MEM (states 5-6 below). Otherwise,
+     * neither field is valid and the pointer should always be NULL.
+     *
+     * These are the possible states:
+     *
+     * 1. The WT_REF state is WT_REF_DELETED and ft_info.del is NULL. This means the page is deleted
+     * and the deletion is globally visible. Any on-disk page has been or will be discarded.
+     *
+     * 2. The WT_REF state is WT_REF_DELETED and ft_info.del is not NULL. The page is deleted, but
+     * but the deletion may not yet be globally visible (or visible to any given reader either.) The
+     * on-disk page remains in case we need it to satisfy reads. ft_info.del describes the delete
+     * operation. If it is necessary to read the page on behalf of a thread that cannot see the
+     * deletion, the page must be instantiated as described above.
+     *
+     * 3. The WT_REF state is WT_REF_DISK, and the parent page's address cell is a deleted-address
+     * cell. ft_info is not valid; ft_info.del should read as NULL. The page is on disk, and
+     * deleted; the deletion may not yet be globally visible. Because the time aggregate stored in
+     * the parent internal page includes the deletion time, tree walks will skip the page as
+     * appropriate without needing the fast-delete information. This state can only happen in
+     * readonly trees; it is a result of the page being read in and instantiated, but not marked
+     * dirty, then discarded by eviction. (In principle eviction should set the state back to
+     * WT_REF_DELETED in this case; however, this turns out to be awkward and we work around it
+     * instead.) This state only arises in two places: when reading in the page, and in some cases
+     * of skipping over the page; both cases already need to unpack the address cell, so we can use
+     * it to retrieve the fast-delete information. Other than these considerations, this state is
+     * indistinguishable from state 4.
+     *
+     * 4. The WT_REF state is WT_REF_DISK, and the parent page's address cell is not a
+     * deleted-address cell. ft_info is not valid; ft_info.del should read as NULL. This is an
+     * ordinary on-disk page.
+     *
+     * 5. The WT_REF state is WT_REF_MEM, and ft_info.update is NULL. This is an ordinary in-memory
+     * page.
+     *
+     * 6. The WT_REF state is WT_REF_MEM, and ft_info.update is not NULL. This is a deleted page
+     * that was instantiated when the delete transaction was not yet resolved. ft_info.update is the
+     * list of updates created by the instantiation, which is used to commit or abort them as needed
+     * and then cleared. It is not possible to get to this state if the truncate information was
+     * read from disk; uncommitted (including prepared) truncates are not evicted or checkpointed.
+     *
+     * In both states 5 and 6, the page will have a modify structure to hold the instantiated
+     * tombstones. If the tree is read-write, the page will be marked dirty. Until it is reconciled,
+     * modify->instantiated will also be set to true, and modify->page_del will hold the page-delete
+     * information used for the instantiation, if any. This is needed under some circumstances
+     * for checkpointing internal pages.
      */
     union {
         WT_PAGE_DELETED *del; /* Page not instantiated, page-deleted structure */
diff --git a/src/third_party/wiredtiger/src/include/btree_inline.h b/src/third_party/wiredtiger/src/include/btree_inline.h
index 6a0935a5b63..8314d636ee4 100644
--- a/src/third_party/wiredtiger/src/include/btree_inline.h
+++ b/src/third_party/wiredtiger/src/include/btree_inline.h
@@ -1451,6 +1451,23 @@ __wt_row_leaf_value_cell(
 }
 
 /*
+ * WT_ADDR_COPY --
+ *	We have to lock the WT_REF to look at a WT_ADDR: a structure we can use to quickly get a
+ * copy of the WT_REF address information.
+ */
+struct __wt_addr_copy {
+    uint8_t type;
+
+    uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
+    uint8_t size;
+
+    WT_TIME_AGGREGATE ta;
+
+    WT_PAGE_DELETED del; /* Fast-truncate page information */
+    bool del_set;
+};
+
+/*
  * __wt_ref_addr_copy --
  *     Return a copy of the WT_REF address information.
  */
@@ -1463,6 +1480,7 @@ __wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
 
     unpack = &_unpack;
     page = ref->home;
+    copy->del_set = false;
 
     /*
      * To look at an on-page cell, we need to look at the parent page's disk image, and that can be
@@ -1488,7 +1506,7 @@ __wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
     /* If on-page, the pointer references a cell. */
     __wt_cell_unpack_addr(session, page->dsk, (WT_CELL *)addr, unpack);
     WT_TIME_AGGREGATE_COPY(&copy->ta, &unpack->ta);
-    copy->type = 0; /* Avoid static analyzer uninitialized value complaints. */
+
     switch (unpack->raw) {
     case WT_CELL_ADDR_INT:
         copy->type = WT_ADDR_INT;
@@ -1496,6 +1514,20 @@ __wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
     case WT_CELL_ADDR_LEAF:
         copy->type = WT_ADDR_LEAF;
         break;
+    case WT_CELL_ADDR_DEL:
+        /* Copy out any fast-truncate information. */
+        copy->del_set = true;
+        if (F_ISSET(page->dsk, WT_PAGE_FT_UPDATE))
+            copy->del = unpack->page_del;
+        else {
+            /* It's a legacy page; create default delete information. */
+            copy->del.txnid = WT_TXN_NONE;
+            copy->del.timestamp = copy->del.durable_timestamp = WT_TS_NONE;
+            copy->del.prepare_state = 0;
+            copy->del.previous_ref_state = WT_REF_DISK;
+            copy->del.committed = true;
+        }
+        /* FALLTHROUGH */
     case WT_CELL_ADDR_LEAF_NO:
         copy->type = WT_ADDR_LEAF_NO;
         break;
@@ -1524,27 +1556,55 @@ __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
 }
 
 /*
- * __wt_page_del_active --
- *     Return if a truncate operation is active.
+ * __wt_page_del_visible --
+ *     Return if a truncate operation is visible to the caller.
  */
 static inline bool
-__wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
+__wt_page_del_visible(WT_SESSION_IMPL *session, WT_PAGE_DELETED *page_del, bool visible_all)
 {
-    WT_PAGE_DELETED *page_del;
     uint8_t prepare_state;
 
-    WT_ASSERT(session, ref->state == WT_REF_LOCKED);
+    /*
+     * In general usage, a NULL WT_PAGE_DELETED is a truncate operation whose details were discarded
+     * when it became globally visible.
+     */
+    if (page_del == NULL)
+        return (true);
+
+    /* We discard page_del on transaction abort, so should never see an aborted one. */
+    WT_ASSERT(session, page_del->txnid != WT_TXN_ABORTED);
 
-    if ((page_del = ref->ft_info.del) == NULL)
-        return (false);
-    if (page_del->txnid == WT_TXN_ABORTED)
-        return (false);
     WT_ORDERED_READ(prepare_state, page_del->prepare_state);
     if (prepare_state == WT_PREPARE_INPROGRESS || prepare_state == WT_PREPARE_LOCKED)
-        return (true);
+        return (false);
+
     return (visible_all ?
-        !__wt_txn_visible_all(session, page_del->txnid, page_del->durable_timestamp) :
-        !__wt_txn_visible(session, page_del->txnid, page_del->timestamp));
+        __wt_txn_visible_all(session, page_del->txnid, page_del->durable_timestamp) :
+        __wt_txn_visible(session, page_del->txnid, page_del->timestamp));
+}
+
+/*
+ * __wt_page_del_active --
+ *     Return if a truncate operation is active.
+ */
+static inline bool
+__wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
+{
+    /*
+     * Return if a truncate operation is active: "active" means approximately that the truncate is
+     * still in progress, that is, that the underlying original page may still be required. This
+     * function in practice is actually a visibility test (it returns whether the truncate is *not*
+     * visible) and should be renamed and have its sense flipped to be more consistent with the rest
+     * of the system.
+     *
+     * Our caller should have already locked the WT_REF and confirmed that the previous state was
+     * WT_REF_DELETED. Consequently there are two possible cases: either ft_info.del is NULL (in
+     * which case the deletion is globally visible and cannot be rolled back) or it is not, in which
+     * case the information in ft_info.del gives us the visibility.
+     */
+    WT_ASSERT(session, ref->state == WT_REF_LOCKED);
+
+    return (!__wt_page_del_visible(session, ref->ft_info.del, visible_all));
 }
 
 /*
@@ -1719,15 +1779,18 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
     page = ref->page;
     mod = page->modify;
 
-    /* Never modified pages can always be evicted. */
+    /* Pages without modify structures can always be evicted, it's just discarding a disk image. */
     if (mod == NULL)
         return (true);
 
     /*
-     * If a fast-truncate page is subsequently instantiated, it can become an eviction candidate. If
-     * the fast-truncate itself has not resolved when the page is instantiated, a list of updates is
-     * created, which will be discarded as part of transaction resolution. Don't attempt to evict a
-     * fast-truncate page until any update list has been removed.
+     * Check the fast-truncate information. Pages with an uncommitted truncate cannot be evicted.
+     *
+     * Because the page is in memory, we look at ft_info.update. If it's not NULL, that means the
+     * truncate operation isn't committed.
+     *
+     * The list of updates in ft_info.update will be discarded when the transaction they belong to
+     * is resolved.
      */
     if (ref->ft_info.update != NULL)
         return (false);
@@ -2065,6 +2128,7 @@ __wt_btcur_skip_page(
   WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool visible_all, bool *skipp)
 {
     WT_ADDR_COPY addr;
+    WT_BTREE *btree;
     uint8_t previous_state;
 
     WT_UNUSED(context);
@@ -2072,38 +2136,76 @@ __wt_btcur_skip_page(
 
     *skipp = false; /* Default to reading */
 
+    btree = S2BT(session);
+
     /* Don't skip pages in FLCS trees; deleted records need to read back as 0. */
-    if (S2BT(session)->type == BTREE_COL_FIX)
+    if (btree->type == BTREE_COL_FIX)
         return (0);
 
     /*
      * Determine if all records on the page have been deleted and all the tombstones are visible to
      * our transaction. If so, we can avoid reading the records on the page and move to the next
-     * page. We base this decision on the aggregate stop point added to the page during the last
-     * reconciliation. We can skip this test if the page has been modified since it was reconciled.
-     * We also skip this test on an internal page, as we rely on reconciliation to mark the internal
-     * page dirty. There could be a period of time when the internal page is marked clean but the
-     * leaf page is dirty and has newer data than let on by the internal page's aggregated
-     * information.
+     * page.
      *
-     * We are making these decisions while holding a lock for the page as checkpoint or eviction can
-     * make changes to the data structures (i.e., aggregate timestamps) we are reading. It is okay
-     * if the page is not in memory, or gets evicted before we lock it. In such a case, we can forgo
-     * checking if the page has been modified. So, only do a page modified check if the page was in
-     * memory before locking.
+     * Skip this test on an internal page, as we rely on reconciliation to mark the internal page
+     * dirty. There could be a period of time when the internal page is marked clean but the leaf
+     * page is dirty and has newer data than let on by the internal page's aggregated information.
      */
     if (F_ISSET(ref, WT_REF_FLAG_INTERNAL))
         return (0);
 
+    /*
+     * We are making these decisions while holding a lock for the page as checkpoint or eviction can
+     * make changes to the data structures (i.e., aggregate timestamps) we are reading.
+     */
     WT_REF_LOCK(session, ref, &previous_state);
-    if ((previous_state == WT_REF_DISK || previous_state == WT_REF_DELETED ||
-          (previous_state == WT_REF_MEM && !__wt_page_is_modified(ref->page))) &&
-      __wt_ref_addr_copy(session, ref, &addr) && addr.ta.newest_stop_txn != WT_TXN_MAX &&
-      addr.ta.newest_stop_ts != WT_TS_MAX &&
-      __wt_txn_visible(session, addr.ta.newest_stop_txn, addr.ta.newest_stop_ts))
+
+    /*
+     * Check the fast-truncate information, there are 4 cases:
+     *
+     * (1) The page is in the WT_REF_DELETED state and ft_info.del is NULL. The page is deleted.
+     * (2) The page is in the WT_REF_DELETED state and ft_info.del is not NULL. The page is deleted
+     *     if the truncate operation is visible. Look at ft_info.del; we could use the info from the
+     *     address cell below too, but that's slower.
+     * (3) The page is in the WT_REF_DISK state. The page may be deleted; check the delete info from
+     *     the address cell.
+     * (4) The page is in memory and has been instantiated. The delete info from the address cell
+     *     will serve for readonly/unmodified pages, and for modified pages we can't skip the page
+     *     anyway.
+     */
+    if (previous_state == WT_REF_DELETED &&
+      (ref->ft_info.del == NULL ||
+        __wt_txn_visible(session, ref->ft_info.del->txnid, ref->ft_info.del->timestamp))) {
         *skipp = true;
+        goto unlock;
+    }
 
-    WT_REF_UNLOCK(ref, previous_state);
+    /*
+     * Look at the disk address, if it exists, and if the page is unmodified. We must skip this test
+     * if the page has been modified since it was reconciled, since neither the delete information
+     * nor the timestamp information is necessarily up to date.
+     */
+    if ((previous_state == WT_REF_DISK ||
+          (previous_state == WT_REF_MEM && !__wt_page_is_modified(ref->page))) &&
+      __wt_ref_addr_copy(session, ref, &addr)) {
+        /* If there's delete information in the disk address, we can use it. */
+        if (addr.del_set && __wt_txn_visible(session, addr.del.txnid, addr.del.timestamp)) {
+            *skipp = true;
+            goto unlock;
+        }
 
+        /*
+         * Otherwise, check the timestamp information. We base this decision on the aggregate stop
+         * point added to the page during the last reconciliation.
+         */
+        if (addr.ta.newest_stop_txn != WT_TXN_MAX && addr.ta.newest_stop_ts != WT_TS_MAX &&
+          __wt_txn_visible(session, addr.ta.newest_stop_txn, addr.ta.newest_stop_ts)) {
+            *skipp = true;
+            goto unlock;
+        }
+    }
+
+unlock:
+    WT_REF_UNLOCK(ref, previous_state);
     return (0);
 }
diff --git a/src/third_party/wiredtiger/src/include/cache_inline.h b/src/third_party/wiredtiger/src/include/cache_inline.h
index 99a65cd95b8..79c803adb63 100644
--- a/src/third_party/wiredtiger/src/include/cache_inline.h
+++ b/src/third_party/wiredtiger/src/include/cache_inline.h
@@ -452,6 +452,15 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly, bo
         return (0);
 
     /*
+     * If the transaction is a checkpoint cursor transaction, don't try to evict. Because eviction
+     * keeps the current transaction snapshot, and the snapshot in a checkpoint cursor transaction
+     * can be (and likely is) very old, we won't be able to see anything current to evict and won't
+     * be able to accomplish anything useful.
+     */
+    if (F_ISSET(session->txn, WT_TXN_IS_CHECKPOINT))
+        return (0);
+
+    /*
      * If the current transaction is keeping the oldest ID pinned, it is in the middle of an
      * operation. This may prevent the oldest ID from moving forward, leading to deadlock, so only
      * evict what we can. Otherwise, we are at a transaction boundary and we can work harder to make
diff --git a/src/third_party/wiredtiger/src/include/cell_inline.h b/src/third_party/wiredtiger/src/include/cell_inline.h
index 2c6ab8fa061..1591bad71f6 100644
--- a/src/third_party/wiredtiger/src/include/cell_inline.h
+++ b/src/third_party/wiredtiger/src/include/cell_inline.h
@@ -969,6 +969,30 @@ done:
 }
 
 /*
+ * __cell_page_del_window_cleanup --
+ *     Clean up a page_del structure loaded from a previous run.
+ */
+static inline void
+__cell_page_del_window_cleanup(WT_SESSION_IMPL *session, WT_PAGE_DELETED *page_del, bool *clearedp)
+{
+    /*
+     * The fast-truncate times are a stop time for the whole page; this code should match the stop
+     * txn and stop time logic for KV cells.
+     */
+    if (page_del->txnid != WT_TXN_MAX) {
+        if (clearedp != NULL)
+            *clearedp = true;
+        page_del->txnid = WT_TXN_NONE;
+        /* As above, only for non-timestamped tables. */
+        if (page_del->timestamp == WT_TS_MAX) {
+            page_del->timestamp = WT_TS_NONE;
+            WT_ASSERT(session, page_del->durable_timestamp == WT_TS_NONE);
+        }
+    } else
+        WT_ASSERT(session, page_del->timestamp == WT_TS_MAX);
+}
+
+/*
  * __cell_addr_window_cleanup --
  *     Clean up addr cells loaded from a previous run.
  */
@@ -976,8 +1000,10 @@ static inline void
 __cell_addr_window_cleanup(
   WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_ADDR *unpack_addr)
 {
-    WT_PAGE_DELETED *page_del;
     WT_TIME_AGGREGATE *ta;
+    bool cleared;
+
+    cleared = false;
 
     /* Tell reconciliation we cleared the transaction ids and the cell needs to be rebuilt. */
     if (unpack_addr != NULL) {
@@ -1005,22 +1031,9 @@ __cell_addr_window_cleanup(
 
         /* Also handle any fast-truncate information. */
         if (unpack_addr->raw == WT_CELL_ADDR_DEL && F_ISSET(dsk, WT_PAGE_FT_UPDATE)) {
-            page_del = &unpack_addr->page_del;
-
-            /*
-             * The fast-truncate times are a stop time for the whole page; this code should match
-             * the stop txn and stop time logic for KV cells.
-             */
-            if (page_del->txnid != WT_TXN_MAX) {
-                page_del->txnid = WT_TXN_NONE;
+            __cell_page_del_window_cleanup(session, &unpack_addr->page_del, &cleared);
+            if (cleared)
                 F_SET(unpack_addr, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
-                /* As above, only for non-timestamped tables. */
-                if (page_del->timestamp == WT_TS_MAX) {
-                    page_del->timestamp = WT_TS_NONE;
-                    WT_ASSERT(session, page_del->durable_timestamp == WT_TS_NONE);
-                }
-            } else
-                WT_ASSERT(session, page_del->timestamp == WT_TS_MAX);
         }
     }
 }
@@ -1060,6 +1073,32 @@ __cell_kv_window_cleanup(WT_SESSION_IMPL *session, WT_CELL_UNPACK_KV *unpack_kv)
 }
 
 /*
+ * __cell_redo_page_del_cleanup --
+ *     Redo the window cleanup logic on a page_del structure after the write generations have been
+ *     bumped. Note: the name of this function is abusive (there are no cells involved) but as the
+ *     logic is a copy of __cell_unpack_window_cleanup it seems worthwhile to keep the two together.
+ */
+static inline void
+__cell_redo_page_del_cleanup(
+  WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_PAGE_DELETED *page_del)
+{
+    uint64_t write_gen;
+
+    WT_ASSERT(session, !WT_READING_CHECKPOINT(session));
+
+    write_gen = S2BT(session)->base_write_gen;
+
+    WT_ASSERT(session, dsk->write_gen != 0);
+    if (dsk->write_gen > write_gen)
+        return;
+
+    if (F_ISSET(session, WT_SESSION_DEBUG_DO_NOT_CLEAR_TXN_ID))
+        return;
+
+    __cell_page_del_window_cleanup(session, page_del, NULL);
+}
+
+/*
  * __cell_unpack_window_cleanup --
  *     Clean up cells loaded from a previous run.
  */
diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h
index 7c1872b3af5..a876e858bba 100644
--- a/src/third_party/wiredtiger/src/include/config.h
+++ b/src/third_party/wiredtiger/src/include/config.h
@@ -46,6 +46,21 @@ struct __wt_config_parser_impl {
 
 #define WT_CONFIG_ITEM_STATIC_INIT(n) static const WT_CONFIG_ITEM n = {"", 0, 0, WT_CONFIG_ITEM_NUM}
 
+/*
+ * If double quotes surround the string, then expand the string to include them. This is always
+ * called in the context of keys or values returned by the configuration parser. The character after
+ * the string must be at a valid memory address, and checking just that one is sufficient. If it is
+ * a double quote, then the character before must be as well, by the rules of the tokenizer.
+ */
+#define WT_CONFIG_PRESERVE_QUOTES(session, item)        \
+    do {                                                \
+        if ((item)->str[(item)->len] == '"') {          \
+            WT_ASSERT(session, (item)->str[-1] == '"'); \
+            (item)->str -= 1;                           \
+            (item)->len += 2;                           \
+        }                                               \
+    } while (0)
+
 #define WT_CONFIG_UNSET (-1)
 /*
  * DO NOT EDIT: automatically built by dist/api_config.py.
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 0a361862a90..360998b2664 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -541,8 +541,8 @@ struct __wt_connection_impl {
 #define WT_DIRECT_IO_CHECKPOINT 0x1u /* Checkpoints */
 #define WT_DIRECT_IO_DATA 0x2u       /* Data files */
 #define WT_DIRECT_IO_LOG 0x4u        /* Log files */
-                                     /* AUTOMATIC FLAG VALUE GENERATION STOP 64 */
-    uint64_t direct_io;              /* O_DIRECT, FILE_FLAG_NO_BUFFERING */
+                                     /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */
+    uint32_t direct_io;              /* O_DIRECT, FILE_FLAG_NO_BUFFERING */
     uint64_t write_through;          /* FILE_FLAG_WRITE_THROUGH */
 
     bool mmap;     /* use mmap when reading checkpoints */
@@ -558,12 +558,13 @@ struct __wt_connection_impl {
 #define WT_CONN_DEBUG_CKPT_RETAIN 0x01u
 #define WT_CONN_DEBUG_CORRUPTION_ABORT 0x02u
 #define WT_CONN_DEBUG_CURSOR_COPY 0x04u
-#define WT_CONN_DEBUG_FLUSH_CKPT 0x08u
-#define WT_CONN_DEBUG_REALLOC_EXACT 0x10u
-#define WT_CONN_DEBUG_SLOW_CKPT 0x20u
-#define WT_CONN_DEBUG_UPDATE_RESTORE_EVICT 0x40u
-    /* AUTOMATIC FLAG VALUE GENERATION STOP 64 */
-    uint64_t debug_flags;
+#define WT_CONN_DEBUG_CURSOR_REPOSITION 0x08u
+#define WT_CONN_DEBUG_FLUSH_CKPT 0x10u
+#define WT_CONN_DEBUG_REALLOC_EXACT 0x20u
+#define WT_CONN_DEBUG_SLOW_CKPT 0x40u
+#define WT_CONN_DEBUG_UPDATE_RESTORE_EVICT 0x80u
+    /* AUTOMATIC FLAG VALUE GENERATION STOP 16 */
+    uint16_t debug_flags;
 
     /* Verbose settings for our various categories. */
     WT_VERBOSE_LEVEL verbose[WT_VERB_NUM_CATEGORIES];
@@ -598,8 +599,8 @@ struct __wt_connection_impl {
 #define WT_TIMING_STRESS_SPLIT_6 0x20000u
 #define WT_TIMING_STRESS_SPLIT_7 0x40000u
 #define WT_TIMING_STRESS_TIERED_FLUSH_FINISH 0x80000u
-    /* AUTOMATIC FLAG VALUE GENERATION STOP 64 */
-    uint64_t timing_stress_flags;
+    /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */
+    uint32_t timing_stress_flags;
 
 #define WT_STDERR(s) (&S2C(s)->wt_stderr)
 #define WT_STDOUT(s) (&S2C(s)->wt_stdout)
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 2dae004c4fb..83ebb712148 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -279,8 +279,8 @@ extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
-  WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
+  bool *is_col_fix) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt)
@@ -654,12 +654,16 @@ extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_
   WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
-  WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref,
+  WT_PAGE_DELETED *page_del) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_delete_redo_window_cleanup(WT_SESSION_IMPL *session)
+  WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_dhandle_update_write_gens(WT_SESSION_IMPL *session)
+  WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg,
   const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip,
@@ -1353,8 +1357,8 @@ extern int __wt_schema_project_out(WT_SESSION_IMPL *session, WT_CURSOR **cp, con
 extern int __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg,
   bool key_only, const char *vformat, WT_ITEM *value)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop)
-  WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop,
+  bool *is_col_fix) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_release_tiered(WT_SESSION_IMPL *session, WT_TIERED **tieredp)
@@ -1760,7 +1764,6 @@ extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session);
 extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst);
 extern void __wt_curtable_set_key(WT_CURSOR *cursor, ...);
 extern void __wt_curtable_set_value(WT_CURSOR *cursor, ...);
-extern void __wt_dhandle_update_write_gens(WT_SESSION_IMPL *session);
 extern void __wt_encrypt_size(
   WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep);
 extern void __wt_err_func(WT_SESSION_IMPL *session, int error, const char *func, int line,
@@ -1972,6 +1975,8 @@ static inline bool __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bo
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 static inline bool __wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_page_del_visible(WT_SESSION_IMPL *session, WT_PAGE_DELETED *page_del,
+  bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 static inline bool __wt_page_evict_clean(WT_PAGE *page)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 static inline bool __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
@@ -2381,7 +2386,7 @@ static inline void __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag);
 static inline void __wt_tree_modify_set(WT_SESSION_IMPL *session);
 static inline void __wt_txn_cursor_op(WT_SESSION_IMPL *session);
 static inline void __wt_txn_err_set(WT_SESSION_IMPL *session, int ret);
-static inline void __wt_txn_op_apply_prepare_state(
+static inline void __wt_txn_op_delete_apply_prepare_state(
   WT_SESSION_IMPL *session, WT_REF *ref, bool commit);
 static inline void __wt_txn_op_delete_commit_apply_timestamps(
   WT_SESSION_IMPL *session, WT_REF *ref);
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index 23ae4f59799..cc2d7c5b489 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -92,8 +92,8 @@ struct __wt_blkincr {
 #define WT_BLKINCR_FULL 0x1u  /* There is no checkpoint, always do full file */
 #define WT_BLKINCR_INUSE 0x2u /* This entry is active */
 #define WT_BLKINCR_VALID 0x4u /* This entry is valid */
-                              /* AUTOMATIC FLAG VALUE GENERATION STOP 64 */
-    uint64_t flags;
+                              /* AUTOMATIC FLAG VALUE GENERATION STOP 8 */
+    uint8_t flags;
 };
 
 /*
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index 2b666d72ec9..9757b96b44b 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -165,9 +165,9 @@
  * argument), LF_XXX (handles a local variable named "flags"), and FLD_XXX (handles any variable,
  * anywhere).
  *
- * Flags are unsigned 32-bit values -- we cast to keep the compiler quiet (the hex constant might be
- * a negative integer), and to ensure the hex constant is the correct size before applying the
- * bitwise not operator.
+ * Flags can be different unsigned bit values -- we cast to keep the compiler quiet (the hex
+ * constant might be a negative integer), and to ensure the hex constant is the correct size before
+ * applying the bitwise not operator.
  */
 #define FLD_CLR(field, mask) ((void)((field) &= ~(mask)))
 #define FLD_MASK(field, mask) ((field) & (mask))
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index 752c8dba327..64519f5de01 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -305,7 +305,8 @@ struct __wt_reconcile {
 };
 
 typedef struct {
-    WT_UPDATE *upd; /* Update to write (or NULL) */
+    WT_UPDATE *upd;       /* Update to write (or NULL) */
+    WT_UPDATE *tombstone; /* The tombstone to write (or NULL) */
 
     WT_TIME_WINDOW tw;
 
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 87a9daab01a..ca8cad80888 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -603,6 +603,7 @@ struct __wt_connection_stats {
     int64_t cursor_sweep_examined;
     int64_t cursor_sweep;
     int64_t cursor_truncate;
+    int64_t cursor_truncate_keys_deleted;
     int64_t cursor_update;
     int64_t cursor_update_error;
     int64_t cursor_update_bytes;
diff --git a/src/third_party/wiredtiger/src/include/txn_inline.h b/src/third_party/wiredtiger/src/include/txn_inline.h
index 9393875b7d6..ce2f47ca5d6 100644
--- a/src/third_party/wiredtiger/src/include/txn_inline.h
+++ b/src/third_party/wiredtiger/src/include/txn_inline.h
@@ -203,13 +203,14 @@ __wt_txn_unmodify(WT_SESSION_IMPL *session)
 }
 
 /*
- * __wt_txn_op_apply_prepare_state --
+ * __wt_txn_op_delete_apply_prepare_state --
  *     Apply the correct prepare state and the timestamp to the ref and to any updates in the page
  *     del update list.
  */
 static inline void
-__wt_txn_op_apply_prepare_state(WT_SESSION_IMPL *session, WT_REF *ref, bool commit)
+__wt_txn_op_delete_apply_prepare_state(WT_SESSION_IMPL *session, WT_REF *ref, bool commit)
 {
+    WT_PAGE_DELETED *page_del;
     WT_TXN *txn;
     WT_UPDATE **updp;
     wt_timestamp_t ts;
@@ -230,24 +231,41 @@ __wt_txn_op_apply_prepare_state(WT_SESSION_IMPL *session, WT_REF *ref, bool comm
 
     /*
      * Timestamps and prepare state are in the page deleted structure for truncates, or in the
-     * updates in the case of instantiated pages.
+     * updates in the case of instantiated pages. In the case of instantiated pages we may also need
+     * to update the page deleted structure saved in page->modify.
+     *
+     * Only two cases are possible. First: the state is WT_REF_DELETED. In this case ft_info.del
+     * cannot be NULL yet because an uncommitted operation cannot have reached global visibility.
+     * Otherwise: there is an uncommitted delete operation we're handling, so the page can't be in a
+     * non-deleted state, and the tree can't be readonly. Therefore the page must have been
+     * instantiated, the state must be WT_REF_MEM, and there should be an update list in
+     * ft_info.update. (But just in case, allow the update list to be null. Perhaps the page was
+     * truncated when all items on it were already deleted, so no tombstones were created during
+     * instantiation.)
      */
-    if (previous_state == WT_REF_DELETED) {
-        ref->ft_info.del->timestamp = ts;
+    if (previous_state == WT_REF_DELETED)
+        page_del = ref->ft_info.del;
+    else {
+        if ((updp = ref->ft_info.update) != NULL)
+            for (; *updp != NULL; ++updp) {
+                (*updp)->start_ts = ts;
+                /*
+                 * Holding the ref locked means we have exclusive access, so if we are committing we
+                 * don't need to use the prepare locked transition state.
+                 */
+                (*updp)->prepare_state = prepare_state;
+                if (commit)
+                    (*updp)->durable_ts = txn->durable_timestamp;
+            }
+        WT_ASSERT(session, ref->page != NULL && ref->page->modify != NULL);
+        page_del = ref->page->modify->page_del;
+    }
+    if (page_del != NULL) {
+        page_del->timestamp = ts;
         if (commit)
-            ref->ft_info.del->durable_timestamp = txn->durable_timestamp;
-        WT_PUBLISH(ref->ft_info.del->prepare_state, prepare_state);
-    } else if ((updp = ref->ft_info.update) != NULL)
-        for (; *updp != NULL; ++updp) {
-            (*updp)->start_ts = ts;
-            /*
-             * Holding the ref locked means we have exclusive access, so if we are committing we
-             * don't need to use the prepare locked transition state.
-             */
-            (*updp)->prepare_state = prepare_state;
-            if (commit)
-                (*updp)->durable_ts = txn->durable_timestamp;
-        }
+            page_del->durable_timestamp = txn->durable_timestamp;
+        WT_PUBLISH(page_del->prepare_state, prepare_state);
+    }
 
     WT_REF_UNLOCK(ref, previous_state);
 }
@@ -259,6 +277,7 @@ __wt_txn_op_apply_prepare_state(WT_SESSION_IMPL *session, WT_REF *ref, bool comm
 static inline void
 __wt_txn_op_delete_commit_apply_timestamps(WT_SESSION_IMPL *session, WT_REF *ref)
 {
+    WT_PAGE_DELETED *page_del;
     WT_TXN *txn;
     WT_UPDATE **updp;
     uint8_t previous_state;
@@ -271,17 +290,31 @@ __wt_txn_op_delete_commit_apply_timestamps(WT_SESSION_IMPL *session, WT_REF *ref
     /*
      * Timestamps are in the page deleted structure for truncates, or in the updates in the case of
      * instantiated pages. Both commit and durable timestamps need to be updated.
+     *
+     * Only two cases are possible. First: the state is WT_REF_DELETED. In this case ft_info.del
+     * cannot be NULL yet because an uncommitted operation cannot have reached global visibility.
+     * Otherwise: there is an uncommitted delete operation we're handling, so the page can't be in a
+     * non-deleted state, and the tree can't be readonly. Therefore the page must have been
+     * instantiated, the state must be WT_REF_MEM, and there should be an update list in
+     * ft_info.update. (But just in case, allow the update list to be null. Perhaps the page was
+     * truncated when all items on it were already deleted, so no tombstones were created during
+     * instantiation.)
      */
-    if (previous_state == WT_REF_DELETED) {
-        if (ref->ft_info.del->timestamp == WT_TS_NONE) {
-            ref->ft_info.del->timestamp = txn->commit_timestamp;
-            ref->ft_info.del->durable_timestamp = txn->durable_timestamp;
-        }
-    } else if ((updp = ref->ft_info.update) != NULL)
-        for (; *updp != NULL; ++updp) {
-            (*updp)->start_ts = txn->commit_timestamp;
-            (*updp)->durable_ts = txn->durable_timestamp;
-        }
+    if (previous_state == WT_REF_DELETED)
+        page_del = ref->ft_info.del;
+    else {
+        if ((updp = ref->ft_info.update) != NULL)
+            for (; *updp != NULL; ++updp) {
+                (*updp)->start_ts = txn->commit_timestamp;
+                (*updp)->durable_ts = txn->durable_timestamp;
+            }
+        WT_ASSERT(session, ref->page != NULL && ref->page->modify != NULL);
+        page_del = ref->page->modify->page_del;
+    }
+    if (page_del != NULL && page_del->timestamp == WT_TS_NONE) {
+        page_del->timestamp = txn->commit_timestamp;
+        page_del->durable_timestamp = txn->durable_timestamp;
+    }
 
     WT_REF_UNLOCK(ref, previous_state);
 }
@@ -317,7 +350,7 @@ __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op)
          * transaction commit call.
          */
         if (op->type == WT_TXN_OP_REF_DELETE)
-            __wt_txn_op_apply_prepare_state(session, op->u.ref, true);
+            __wt_txn_op_delete_apply_prepare_state(session, op->u.ref, true);
         else {
             upd = op->u.op_upd;
 
@@ -400,7 +433,10 @@ __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref)
     op->type = WT_TXN_OP_REF_DELETE;
     op->u.ref = ref;
 
-    /* This access to the WT_PAGE_DELETED structure is safe, caller has the WT_REF locked. */
+    /*
+     * This access to the WT_PAGE_DELETED structure is safe; caller has the WT_REF locked, and in
+     * fact just allocated the structure to fill in.
+     */
     ref->ft_info.del->txnid = txn->id;
     __wt_txn_op_set_timestamp(session, op);
 
@@ -519,6 +555,11 @@ __txn_visible_all_id(WT_SESSION_IMPL *session, uint64_t id)
 
     txn = session->txn;
 
+    /* Make sure that checkpoint cursor transactions only read checkpoints, except for metadata. */
+    WT_ASSERT(session,
+      (session->dhandle != NULL && WT_IS_METADATA(session->dhandle)) ||
+        WT_READING_CHECKPOINT(session) == F_ISSET(session->txn, WT_TXN_IS_CHECKPOINT));
+
     /*
      * When reading from a checkpoint, all readers use the same snapshot, so a transaction is
      * globally visible if it is visible in that snapshot. Note that this can cause things that were
@@ -528,8 +569,17 @@ __txn_visible_all_id(WT_SESSION_IMPL *session, uint64_t id)
      * was taken becomes not globally visible in the checkpoint) never happen as this violates basic
      * assumptions about visibility. (And, concretely, it can cause stale history store entries to
      * come back to life and produce wrong answers.)
+     *
+     * Note: we use the transaction to check this rather than testing WT_READING_CHECKPOINT because
+     * reading the metadata while working with a checkpoint cursor will borrow the transaction; it
+     * then ends up using it to read a non-checkpoint tree. This is believed to be ok because the
+     * metadata is always read-uncommitted, but we want to still use the checkpoint-cursor
+     * visibility logic. Using the regular visibility logic with a checkpoint cursor transaction can
+     * be logically invalid (it is possible that way for something to be globally visible but
+     * specifically invisible) and also can end up comparing transaction ids from different database
+     * opens.
      */
-    if (WT_READING_CHECKPOINT(session))
+    if (F_ISSET(session->txn, WT_TXN_IS_CHECKPOINT))
         return (__wt_txn_visible_id_snapshot(
           id, txn->snap_min, txn->snap_max, txn->snapshot, txn->snapshot_count));
     oldest_id = __wt_txn_oldest_id(session);
@@ -568,8 +618,13 @@ __wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id, wt_timestamp_t times
     if (timestamp == WT_TS_NONE)
         return (true);
 
+    /* Make sure that checkpoint cursor transactions only read checkpoints, except for metadata. */
+    WT_ASSERT(session,
+      (session->dhandle != NULL && WT_IS_METADATA(session->dhandle)) ||
+        WT_READING_CHECKPOINT(session) == F_ISSET(session->txn, WT_TXN_IS_CHECKPOINT));
+
     /* When reading a checkpoint, use the checkpoint state instead of the current state. */
-    if (WT_READING_CHECKPOINT(session))
+    if (F_ISSET(session->txn, WT_TXN_IS_CHECKPOINT))
         return (session->txn->checkpoint_oldest_timestamp != WT_TS_NONE &&
           timestamp <= session->txn->checkpoint_oldest_timestamp);
 
@@ -1139,9 +1194,8 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
             WT_RET(__wt_session_copy_values(session));
 
         /*
-         * Stall here if the cache is completely full. We have allocated a transaction ID which
-         * makes it possible for eviction to decide we're contributing to the problem and return
-         * WT_ROLLBACK. The WT_SESSION.begin_transaction API can't return rollback, continue on.
+         * Stall here if the cache is completely full. Eviction check can return rollback, but the
+         * WT_SESSION.begin_transaction API can't, continue on.
          */
         WT_RET_ERROR_OK(__wt_cache_eviction_check(session, false, true, NULL), WT_ROLLBACK);
 
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 702172790f7..1f1bb276ef9 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -741,40 +741,40 @@ struct __wt_cursor {
 	WT_ITEM lower_bound, upper_bound;
 
 /* AUTOMATIC FLAG VALUE GENERATION START 0 */
-#define	WT_CURSTD_APPEND		0x000000001u
-#define	WT_CURSTD_BULK			0x000000002u
-#define	WT_CURSTD_CACHEABLE		0x000000004u
-#define	WT_CURSTD_CACHED		0x000000008u
-#define	WT_CURSTD_DEAD			0x000000010u
-#define	WT_CURSTD_DEBUG_COPY_KEY	0x000000020u
-#define	WT_CURSTD_DEBUG_COPY_VALUE	0x000000040u
-#define	WT_CURSTD_DEBUG_RESET_EVICT	0x000000080u
-#define	WT_CURSTD_DUMP_HEX		0x000000100u
-#define	WT_CURSTD_DUMP_JSON		0x000000200u
-#define	WT_CURSTD_DUMP_PRETTY		0x000000400u
-#define	WT_CURSTD_DUMP_PRINT		0x000000800u
-#define	WT_CURSTD_DUP_NO_VALUE          0x000001000u
-#define	WT_CURSTD_EVICT_REPOSITION     0x000002000u
-#define	WT_CURSTD_HS_READ_ALL		0x000004000u
-#define	WT_CURSTD_HS_READ_COMMITTED	0x000008000u
-#define	WT_CURSTD_IGNORE_TOMBSTONE	0x000010000u
-#define	WT_CURSTD_JOINED		0x000020000u
-#define	WT_CURSTD_KEY_EXT		0x000040000u /* Key points out of tree. */
-#define	WT_CURSTD_KEY_INT		0x000080000u /* Key points into tree. */
-#define	WT_CURSTD_KEY_ONLY		0x000100000u
-#define	WT_CURSTD_META_INUSE		0x000200000u
-#define	WT_CURSTD_OPEN			0x000400000u
-#define	WT_CURSTD_OVERWRITE		0x000800000u
-#define	WT_CURSTD_PREFIX_SEARCH		0x001000000u
-#define	WT_CURSTD_RAW			0x002000000u
-#define	WT_CURSTD_RAW_SEARCH		0x004000000u
-#define	WT_CURSTD_VALUE_EXT		0x008000000u /* Value points out of tree. */
-#define	WT_CURSTD_VALUE_INT		0x010000000u /* Value points into tree. */
-#define WT_CURSTD_BOUND_LOWER    0x020000000u       /* Lower bound. */
-#define WT_CURSTD_BOUND_LOWER_INCLUSIVE 0x040000000u /* Inclusive lower bound. */
-#define WT_CURSTD_BOUND_UPPER           0x080000000u /* Upper bound. */
-#define WT_CURSTD_BOUND_UPPER_INCLUSIVE 0x100000000u /* Inclusive upper bound. */
-#define WT_CURSTD_VERSION_CURSOR	0x200000000u /* Version cursor. */
+#define	WT_CURSTD_APPEND		0x000000001ull
+#define	WT_CURSTD_BULK			0x000000002ull
+#define	WT_CURSTD_CACHEABLE		0x000000004ull
+#define	WT_CURSTD_CACHED		0x000000008ull
+#define	WT_CURSTD_DEAD			0x000000010ull
+#define	WT_CURSTD_DEBUG_COPY_KEY	0x000000020ull
+#define	WT_CURSTD_DEBUG_COPY_VALUE	0x000000040ull
+#define	WT_CURSTD_DEBUG_RESET_EVICT	0x000000080ull
+#define	WT_CURSTD_DUMP_HEX		0x000000100ull
+#define	WT_CURSTD_DUMP_JSON		0x000000200ull
+#define	WT_CURSTD_DUMP_PRETTY		0x000000400ull
+#define	WT_CURSTD_DUMP_PRINT		0x000000800ull
+#define	WT_CURSTD_DUP_NO_VALUE          0x000001000ull
+#define	WT_CURSTD_EVICT_REPOSITION     0x000002000ull
+#define	WT_CURSTD_HS_READ_ALL		0x000004000ull
+#define	WT_CURSTD_HS_READ_COMMITTED	0x000008000ull
+#define	WT_CURSTD_IGNORE_TOMBSTONE	0x000010000ull
+#define	WT_CURSTD_JOINED		0x000020000ull
+#define	WT_CURSTD_KEY_EXT		0x000040000ull /* Key points out of tree. */
+#define	WT_CURSTD_KEY_INT		0x000080000ull /* Key points into tree. */
+#define	WT_CURSTD_KEY_ONLY		0x000100000ull
+#define	WT_CURSTD_META_INUSE		0x000200000ull
+#define	WT_CURSTD_OPEN			0x000400000ull
+#define	WT_CURSTD_OVERWRITE		0x000800000ull
+#define	WT_CURSTD_PREFIX_SEARCH		0x001000000ull
+#define	WT_CURSTD_RAW			0x002000000ull
+#define	WT_CURSTD_RAW_SEARCH		0x004000000ull
+#define	WT_CURSTD_VALUE_EXT		0x008000000ull /* Value points out of tree. */
+#define	WT_CURSTD_VALUE_INT		0x010000000ull /* Value points into tree. */
+#define WT_CURSTD_BOUND_LOWER    0x020000000ull       /* Lower bound. */
+#define WT_CURSTD_BOUND_LOWER_INCLUSIVE 0x040000000ull /* Inclusive lower bound. */
+#define WT_CURSTD_BOUND_UPPER           0x080000000ull /* Upper bound. */
+#define WT_CURSTD_BOUND_UPPER_INCLUSIVE 0x100000000ull /* Inclusive upper bound. */
+#define WT_CURSTD_VERSION_CURSOR	0x200000000ull /* Version cursor. */
 /* AUTOMATIC FLAG VALUE GENERATION STOP 64 */
 #define	WT_CURSTD_KEY_SET	(WT_CURSTD_KEY_EXT | WT_CURSTD_KEY_INT)
 #define	WT_CURSTD_VALUE_SET	(WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT)
@@ -927,7 +927,8 @@ struct __wt_session {
 	 * Cursors capable of supporting transactional operations operate in the
 	 * context of the current transaction, if any.
 	 *
-	 * WT_SESSION::rollback_transaction implicitly resets all cursors.
+	 * WT_SESSION::rollback_transaction implicitly resets all cursors associated with the
+         # session.
 	 *
 	 * Cursors are relatively light-weight objects but may hold references
 	 * to heavier-weight objects; applications should re-use cursors when
@@ -1220,18 +1221,18 @@ struct __wt_session {
 	 * by any update to a record in the table., a boolean flag; default \c false.}
 	 * @config{import = (, configure import of an existing object into the currently running
 	 * database., a set of related configuration options defined below.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;compare_timestamp, Allow importing files with timestamps
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;compare_timestamp, allow importing files with timestamps
 	 * smaller or equal to the configured global timestamps.  Note the history of the files are
 	 * not imported together and thus snapshot read of historical data will not work with the
 	 * option "stable_timestamp". (The \c oldest and \c stable arguments are deprecated
-	 * short-hand for \c oldest_timestamp and \c stable_timestamp\, respectively.)., a string\,
+	 * short-hand for \c oldest_timestamp and \c stable_timestamp\, respectively)., a string\,
 	 * chosen from the following options: \c "oldest"\, \c "oldest_timestamp"\, \c "stable"\, \c
 	 * "stable_timestamp"; default \c oldest_timestamp.}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;
 	 * enabled, whether to import the input URI from disk., a boolean flag; default \c false.}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_metadata, the file configuration extracted from the
 	 * metadata of the export database., a string; default empty.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;metadata_file, File that contains all the relevant
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;metadata_file, a text file that contains all the relevant
 	 * metadata information for the URI to import.  The file is generated by backup:export
 	 * cursor., a string; default empty.}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;repair, whether to
@@ -1763,7 +1764,7 @@ struct __wt_session {
 	 * A transaction must be in progress when this method is called.
 	 *
 	 * If WT_SESSION::commit_transaction returns an error, the transaction
-	 * was rolled back, not committed.
+	 * was rolled back, not committed, and all cursors associated with the session are reset.
 	 *
 	 * @requires_transaction
 	 *
@@ -1829,7 +1830,7 @@ struct __wt_session {
 	 *
 	 * A transaction must be in progress when this method is called.
 	 *
-	 * All cursors are reset.
+	 * All cursors associated with the session are reset.
 	 *
 	 * @requires_transaction
 	 *
@@ -2193,6 +2194,11 @@ struct __wt_connection {
 	 * make a copy of any data returned by a cursor operation and return the copy instead.  The
 	 * copy is freed on the next cursor operation.  This allows memory sanitizers to detect
 	 * inappropriate references to memory owned by cursors., a boolean flag; default \c false.}
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;cursor_reposition, if true\, for the operations with
+	 * snapshot isolation the cursor temporarily releases the page that requires force
+	 * eviction\, then repositions back to the page for further operations.  A page release
+	 * encourages eviction of hot or large pages\, which is more likely to succeed without a
+	 * cursor keeping the page pinned., a boolean flag; default \c false.}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;eviction, if true\, modify internal algorithms to change
 	 * skew to force history store eviction to happen more aggressively.  This includes but is
 	 * not limited to not skewing newest\, not favoring leaf pages\, and modifying the eviction
@@ -2407,9 +2413,6 @@ struct __wt_connection {
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time in seconds to retain data
 	 * on tiered storage on the local tier for faster read access., an integer between 0 and
 	 * 10000; default \c 300.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;object_target_size, the
-	 * approximate size of objects before creating them on the tiered storage tier., an integer
-	 * between 100K and 10TB; default \c 10M.}
 	 * @config{ ),,}
 	 * @config{verbose, enable messages for various subsystems and operations.  Options are
 	 * given as a list\, where each message type can optionally define an associated verbosity
@@ -2918,22 +2921,26 @@ struct __wt_connection {
  * next cursor operation.  This allows memory sanitizers to detect inappropriate references to
  * memory owned by cursors., a boolean flag; default \c false.}
  * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * eviction, if true\, modify internal algorithms to change skew to force history store eviction to
- * happen more aggressively.  This includes but is not limited to not skewing newest\, not favoring
- * leaf pages\, and modifying the eviction score mechanism., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;flush_checkpoint, if true\, call a system wide checkpoint
- * immediately after a flush_tier completes to force objects out to disk so that a flush_tier can
- * work single-threaded., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * log_retention, adjust log removal to retain at least this number of log files\, ignored if set to
- * 0. (Warning: this option can remove log files required for recovery if no checkpoints have yet
- * been done and the number of log files exceeds the configured value.  As WiredTiger cannot detect
- * the difference between a system that has not yet checkpointed and one that will never
- * checkpoint\, it might discard log files before any checkpoint is done.)., an integer between 0
- * and 1024; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;realloc_exact, if true\, reallocation of
- * memory will only provide the exact amount requested.  This will help with spotting memory
- * allocation issues more easily., a boolean flag; default \c false.}
+ * cursor_reposition, if true\, for the operations with snapshot isolation the cursor temporarily
+ * releases the page that requires force eviction\, then repositions back to the page for further
+ * operations.  A page release encourages eviction of hot or large pages\, which is more likely to
+ * succeed without a cursor keeping the page pinned., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;eviction, if true\, modify internal algorithms to change skew to
+ * force history store eviction to happen more aggressively.  This includes but is not limited to
+ * not skewing newest\, not favoring leaf pages\, and modifying the eviction score mechanism., a
+ * boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;flush_checkpoint, if true\, call
+ * a system wide checkpoint immediately after a flush_tier completes to force objects out to disk so
+ * that a flush_tier can work single-threaded., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;log_retention, adjust log removal to retain at least this number
+ * of log files\, ignored if set to 0. (Warning: this option can remove log files required for
+ * recovery if no checkpoints have yet been done and the number of log files exceeds the configured
+ * value.  As WiredTiger cannot detect the difference between a system that has not yet checkpointed
+ * and one that will never checkpoint\, it might discard log files before any checkpoint is done.).,
+ * an integer between 0 and 1024; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;realloc_exact, if
+ * true\, reallocation of memory will only provide the exact amount requested.  This will help with
+ * spotting memory allocation issues more easily., a boolean flag; default \c false.}
  * @config{&nbsp;&nbsp;&nbsp;&nbsp;rollback_error, return a WT_ROLLBACK error from a transaction
  * operation about every Nth operation to simulate a collision., an integer between 0 and 10M;
  * default \c 0.}
@@ -5852,650 +5859,652 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_CONN_CURSOR_SWEEP			1269
 /*! cursor: cursor truncate calls */
 #define	WT_STAT_CONN_CURSOR_TRUNCATE			1270
+/*! cursor: cursor truncates performed on individual keys */
+#define	WT_STAT_CONN_CURSOR_TRUNCATE_KEYS_DELETED	1271
 /*! cursor: cursor update calls */
-#define	WT_STAT_CONN_CURSOR_UPDATE			1271
+#define	WT_STAT_CONN_CURSOR_UPDATE			1272
 /*! cursor: cursor update calls that return an error */
-#define	WT_STAT_CONN_CURSOR_UPDATE_ERROR		1272
+#define	WT_STAT_CONN_CURSOR_UPDATE_ERROR		1273
 /*! cursor: cursor update key and value bytes */
-#define	WT_STAT_CONN_CURSOR_UPDATE_BYTES		1273
+#define	WT_STAT_CONN_CURSOR_UPDATE_BYTES		1274
 /*! cursor: cursor update value size change */
-#define	WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED	1274
+#define	WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED	1275
 /*! cursor: cursors reused from cache */
-#define	WT_STAT_CONN_CURSOR_REOPEN			1275
+#define	WT_STAT_CONN_CURSOR_REOPEN			1276
 /*! cursor: open cursor count */
-#define	WT_STAT_CONN_CURSOR_OPEN_COUNT			1276
+#define	WT_STAT_CONN_CURSOR_OPEN_COUNT			1277
 /*! data-handle: connection data handle size */
-#define	WT_STAT_CONN_DH_CONN_HANDLE_SIZE		1277
+#define	WT_STAT_CONN_DH_CONN_HANDLE_SIZE		1278
 /*! data-handle: connection data handles currently active */
-#define	WT_STAT_CONN_DH_CONN_HANDLE_COUNT		1278
+#define	WT_STAT_CONN_DH_CONN_HANDLE_COUNT		1279
 /*! data-handle: connection sweep candidate became referenced */
-#define	WT_STAT_CONN_DH_SWEEP_REF			1279
+#define	WT_STAT_CONN_DH_SWEEP_REF			1280
 /*! data-handle: connection sweep dhandles closed */
-#define	WT_STAT_CONN_DH_SWEEP_CLOSE			1280
+#define	WT_STAT_CONN_DH_SWEEP_CLOSE			1281
 /*! data-handle: connection sweep dhandles removed from hash list */
-#define	WT_STAT_CONN_DH_SWEEP_REMOVE			1281
+#define	WT_STAT_CONN_DH_SWEEP_REMOVE			1282
 /*! data-handle: connection sweep time-of-death sets */
-#define	WT_STAT_CONN_DH_SWEEP_TOD			1282
+#define	WT_STAT_CONN_DH_SWEEP_TOD			1283
 /*! data-handle: connection sweeps */
-#define	WT_STAT_CONN_DH_SWEEPS				1283
+#define	WT_STAT_CONN_DH_SWEEPS				1284
 /*!
  * data-handle: connection sweeps skipped due to checkpoint gathering
  * handles
  */
-#define	WT_STAT_CONN_DH_SWEEP_SKIP_CKPT			1284
+#define	WT_STAT_CONN_DH_SWEEP_SKIP_CKPT			1285
 /*! data-handle: session dhandles swept */
-#define	WT_STAT_CONN_DH_SESSION_HANDLES			1285
+#define	WT_STAT_CONN_DH_SESSION_HANDLES			1286
 /*! data-handle: session sweep attempts */
-#define	WT_STAT_CONN_DH_SESSION_SWEEPS			1286
+#define	WT_STAT_CONN_DH_SESSION_SWEEPS			1287
 /*! lock: checkpoint lock acquisitions */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_COUNT		1287
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_COUNT		1288
 /*! lock: checkpoint lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION	1288
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION	1289
 /*! lock: checkpoint lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL	1289
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL	1290
 /*! lock: dhandle lock application thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION	1290
+#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION	1291
 /*! lock: dhandle lock internal thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL		1291
+#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL		1292
 /*! lock: dhandle read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT		1292
+#define	WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT		1293
 /*! lock: dhandle write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT		1293
+#define	WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT		1294
 /*!
  * lock: durable timestamp queue lock application thread time waiting
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION	1294
+#define	WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION	1295
 /*!
  * lock: durable timestamp queue lock internal thread time waiting
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL	1295
+#define	WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL	1296
 /*! lock: durable timestamp queue read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT	1296
+#define	WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT	1297
 /*! lock: durable timestamp queue write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT	1297
+#define	WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT	1298
 /*! lock: metadata lock acquisitions */
-#define	WT_STAT_CONN_LOCK_METADATA_COUNT		1298
+#define	WT_STAT_CONN_LOCK_METADATA_COUNT		1299
 /*! lock: metadata lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION	1299
+#define	WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION	1300
 /*! lock: metadata lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL	1300
+#define	WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL	1301
 /*!
  * lock: read timestamp queue lock application thread time waiting
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION	1301
+#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION	1302
 /*! lock: read timestamp queue lock internal thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL	1302
+#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL	1303
 /*! lock: read timestamp queue read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT	1303
+#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT	1304
 /*! lock: read timestamp queue write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT	1304
+#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT	1305
 /*! lock: schema lock acquisitions */
-#define	WT_STAT_CONN_LOCK_SCHEMA_COUNT			1305
+#define	WT_STAT_CONN_LOCK_SCHEMA_COUNT			1306
 /*! lock: schema lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION	1306
+#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION	1307
 /*! lock: schema lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL		1307
+#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL		1308
 /*!
  * lock: table lock application thread time waiting for the table lock
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION	1308
+#define	WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION	1309
 /*!
  * lock: table lock internal thread time waiting for the table lock
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL		1309
+#define	WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL		1310
 /*! lock: table read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TABLE_READ_COUNT		1310
+#define	WT_STAT_CONN_LOCK_TABLE_READ_COUNT		1311
 /*! lock: table write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT		1311
+#define	WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT		1312
 /*! lock: txn global lock application thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION	1312
+#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION	1313
 /*! lock: txn global lock internal thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL	1313
+#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL	1314
 /*! lock: txn global read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT		1314
+#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT		1315
 /*! lock: txn global write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT	1315
+#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT	1316
 /*! log: busy returns attempting to switch slots */
-#define	WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY		1316
+#define	WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY		1317
 /*! log: force log remove time sleeping (usecs) */
-#define	WT_STAT_CONN_LOG_FORCE_REMOVE_SLEEP		1317
+#define	WT_STAT_CONN_LOG_FORCE_REMOVE_SLEEP		1318
 /*! log: log bytes of payload data */
-#define	WT_STAT_CONN_LOG_BYTES_PAYLOAD			1318
+#define	WT_STAT_CONN_LOG_BYTES_PAYLOAD			1319
 /*! log: log bytes written */
-#define	WT_STAT_CONN_LOG_BYTES_WRITTEN			1319
+#define	WT_STAT_CONN_LOG_BYTES_WRITTEN			1320
 /*! log: log files manually zero-filled */
-#define	WT_STAT_CONN_LOG_ZERO_FILLS			1320
+#define	WT_STAT_CONN_LOG_ZERO_FILLS			1321
 /*! log: log flush operations */
-#define	WT_STAT_CONN_LOG_FLUSH				1321
+#define	WT_STAT_CONN_LOG_FLUSH				1322
 /*! log: log force write operations */
-#define	WT_STAT_CONN_LOG_FORCE_WRITE			1322
+#define	WT_STAT_CONN_LOG_FORCE_WRITE			1323
 /*! log: log force write operations skipped */
-#define	WT_STAT_CONN_LOG_FORCE_WRITE_SKIP		1323
+#define	WT_STAT_CONN_LOG_FORCE_WRITE_SKIP		1324
 /*! log: log records compressed */
-#define	WT_STAT_CONN_LOG_COMPRESS_WRITES		1324
+#define	WT_STAT_CONN_LOG_COMPRESS_WRITES		1325
 /*! log: log records not compressed */
-#define	WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS		1325
+#define	WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS		1326
 /*! log: log records too small to compress */
-#define	WT_STAT_CONN_LOG_COMPRESS_SMALL			1326
+#define	WT_STAT_CONN_LOG_COMPRESS_SMALL			1327
 /*! log: log release advances write LSN */
-#define	WT_STAT_CONN_LOG_RELEASE_WRITE_LSN		1327
+#define	WT_STAT_CONN_LOG_RELEASE_WRITE_LSN		1328
 /*! log: log scan operations */
-#define	WT_STAT_CONN_LOG_SCANS				1328
+#define	WT_STAT_CONN_LOG_SCANS				1329
 /*! log: log scan records requiring two reads */
-#define	WT_STAT_CONN_LOG_SCAN_REREADS			1329
+#define	WT_STAT_CONN_LOG_SCAN_REREADS			1330
 /*! log: log server thread advances write LSN */
-#define	WT_STAT_CONN_LOG_WRITE_LSN			1330
+#define	WT_STAT_CONN_LOG_WRITE_LSN			1331
 /*! log: log server thread write LSN walk skipped */
-#define	WT_STAT_CONN_LOG_WRITE_LSN_SKIP			1331
+#define	WT_STAT_CONN_LOG_WRITE_LSN_SKIP			1332
 /*! log: log sync operations */
-#define	WT_STAT_CONN_LOG_SYNC				1332
+#define	WT_STAT_CONN_LOG_SYNC				1333
 /*! log: log sync time duration (usecs) */
-#define	WT_STAT_CONN_LOG_SYNC_DURATION			1333
+#define	WT_STAT_CONN_LOG_SYNC_DURATION			1334
 /*! log: log sync_dir operations */
-#define	WT_STAT_CONN_LOG_SYNC_DIR			1334
+#define	WT_STAT_CONN_LOG_SYNC_DIR			1335
 /*! log: log sync_dir time duration (usecs) */
-#define	WT_STAT_CONN_LOG_SYNC_DIR_DURATION		1335
+#define	WT_STAT_CONN_LOG_SYNC_DIR_DURATION		1336
 /*! log: log write operations */
-#define	WT_STAT_CONN_LOG_WRITES				1336
+#define	WT_STAT_CONN_LOG_WRITES				1337
 /*! log: logging bytes consolidated */
-#define	WT_STAT_CONN_LOG_SLOT_CONSOLIDATED		1337
+#define	WT_STAT_CONN_LOG_SLOT_CONSOLIDATED		1338
 /*! log: maximum log file size */
-#define	WT_STAT_CONN_LOG_MAX_FILESIZE			1338
+#define	WT_STAT_CONN_LOG_MAX_FILESIZE			1339
 /*! log: number of pre-allocated log files to create */
-#define	WT_STAT_CONN_LOG_PREALLOC_MAX			1339
+#define	WT_STAT_CONN_LOG_PREALLOC_MAX			1340
 /*! log: pre-allocated log files not ready and missed */
-#define	WT_STAT_CONN_LOG_PREALLOC_MISSED		1340
+#define	WT_STAT_CONN_LOG_PREALLOC_MISSED		1341
 /*! log: pre-allocated log files prepared */
-#define	WT_STAT_CONN_LOG_PREALLOC_FILES			1341
+#define	WT_STAT_CONN_LOG_PREALLOC_FILES			1342
 /*! log: pre-allocated log files used */
-#define	WT_STAT_CONN_LOG_PREALLOC_USED			1342
+#define	WT_STAT_CONN_LOG_PREALLOC_USED			1343
 /*! log: records processed by log scan */
-#define	WT_STAT_CONN_LOG_SCAN_RECORDS			1343
+#define	WT_STAT_CONN_LOG_SCAN_RECORDS			1344
 /*! log: slot close lost race */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSE_RACE		1344
+#define	WT_STAT_CONN_LOG_SLOT_CLOSE_RACE		1345
 /*! log: slot close unbuffered waits */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF		1345
+#define	WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF		1346
 /*! log: slot closures */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSES			1346
+#define	WT_STAT_CONN_LOG_SLOT_CLOSES			1347
 /*! log: slot join atomic update races */
-#define	WT_STAT_CONN_LOG_SLOT_RACES			1347
+#define	WT_STAT_CONN_LOG_SLOT_RACES			1348
 /*! log: slot join calls atomic updates raced */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_RACE		1348
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_RACE		1349
 /*! log: slot join calls did not yield */
-#define	WT_STAT_CONN_LOG_SLOT_IMMEDIATE			1349
+#define	WT_STAT_CONN_LOG_SLOT_IMMEDIATE			1350
 /*! log: slot join calls found active slot closed */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE		1350
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE		1351
 /*! log: slot join calls slept */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP		1351
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP		1352
 /*! log: slot join calls yielded */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD			1352
+#define	WT_STAT_CONN_LOG_SLOT_YIELD			1353
 /*! log: slot join found active slot closed */
-#define	WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED		1353
+#define	WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED		1354
 /*! log: slot joins yield time (usecs) */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_DURATION		1354
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_DURATION		1355
 /*! log: slot transitions unable to find free slot */
-#define	WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS		1355
+#define	WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS		1356
 /*! log: slot unbuffered writes */
-#define	WT_STAT_CONN_LOG_SLOT_UNBUFFERED		1356
+#define	WT_STAT_CONN_LOG_SLOT_UNBUFFERED		1357
 /*! log: total in-memory size of compressed records */
-#define	WT_STAT_CONN_LOG_COMPRESS_MEM			1357
+#define	WT_STAT_CONN_LOG_COMPRESS_MEM			1358
 /*! log: total log buffer size */
-#define	WT_STAT_CONN_LOG_BUFFER_SIZE			1358
+#define	WT_STAT_CONN_LOG_BUFFER_SIZE			1359
 /*! log: total size of compressed records */
-#define	WT_STAT_CONN_LOG_COMPRESS_LEN			1359
+#define	WT_STAT_CONN_LOG_COMPRESS_LEN			1360
 /*! log: written slots coalesced */
-#define	WT_STAT_CONN_LOG_SLOT_COALESCED			1360
+#define	WT_STAT_CONN_LOG_SLOT_COALESCED			1361
 /*! log: yields waiting for previous log file close */
-#define	WT_STAT_CONN_LOG_CLOSE_YIELDS			1361
+#define	WT_STAT_CONN_LOG_CLOSE_YIELDS			1362
 /*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50	1362
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50	1363
 /*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100	1363
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100	1364
 /*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250	1364
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250	1365
 /*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500	1365
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500	1366
 /*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000	1366
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000	1367
 /*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000	1367
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000	1368
 /*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50	1368
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50	1369
 /*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100	1369
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100	1370
 /*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250	1370
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250	1371
 /*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500	1371
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500	1372
 /*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000	1372
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000	1373
 /*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000	1373
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000	1374
 /*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250	1374
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250	1375
 /*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500	1375
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500	1376
 /*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000	1376
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000	1377
 /*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000	1377
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000	1378
 /*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000	1378
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000	1379
 /*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250	1379
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250	1380
 /*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500	1380
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500	1381
 /*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000	1381
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000	1382
 /*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000	1382
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000	1383
 /*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000	1383
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000	1384
 /*! reconciliation: approximate byte size of timestamps in pages written */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS		1384
+#define	WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS		1385
 /*!
  * reconciliation: approximate byte size of transaction IDs in pages
  * written
  */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN		1385
+#define	WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN		1386
 /*! reconciliation: fast-path pages deleted */
-#define	WT_STAT_CONN_REC_PAGE_DELETE_FAST		1386
+#define	WT_STAT_CONN_REC_PAGE_DELETE_FAST		1387
 /*! reconciliation: leaf-page overflow keys */
-#define	WT_STAT_CONN_REC_OVERFLOW_KEY_LEAF		1387
+#define	WT_STAT_CONN_REC_OVERFLOW_KEY_LEAF		1388
 /*! reconciliation: maximum seconds spent in a reconciliation call */
-#define	WT_STAT_CONN_REC_MAXIMUM_SECONDS		1388
+#define	WT_STAT_CONN_REC_MAXIMUM_SECONDS		1389
 /*! reconciliation: page reconciliation calls */
-#define	WT_STAT_CONN_REC_PAGES				1389
+#define	WT_STAT_CONN_REC_PAGES				1390
 /*! reconciliation: page reconciliation calls for eviction */
-#define	WT_STAT_CONN_REC_PAGES_EVICTION			1390
+#define	WT_STAT_CONN_REC_PAGES_EVICTION			1391
 /*!
  * reconciliation: page reconciliation calls that resulted in values with
  * prepared transaction metadata
  */
-#define	WT_STAT_CONN_REC_PAGES_WITH_PREPARE		1391
+#define	WT_STAT_CONN_REC_PAGES_WITH_PREPARE		1392
 /*!
  * reconciliation: page reconciliation calls that resulted in values with
  * timestamps
  */
-#define	WT_STAT_CONN_REC_PAGES_WITH_TS			1392
+#define	WT_STAT_CONN_REC_PAGES_WITH_TS			1393
 /*!
  * reconciliation: page reconciliation calls that resulted in values with
  * transaction ids
  */
-#define	WT_STAT_CONN_REC_PAGES_WITH_TXN			1393
+#define	WT_STAT_CONN_REC_PAGES_WITH_TXN			1394
 /*! reconciliation: pages deleted */
-#define	WT_STAT_CONN_REC_PAGE_DELETE			1394
+#define	WT_STAT_CONN_REC_PAGE_DELETE			1395
 /*!
  * reconciliation: pages written including an aggregated newest start
  * durable timestamp
  */
-#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS	1395
+#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS	1396
 /*!
  * reconciliation: pages written including an aggregated newest stop
  * durable timestamp
  */
-#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS	1396
+#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS	1397
 /*!
  * reconciliation: pages written including an aggregated newest stop
  * timestamp
  */
-#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS	1397
+#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS	1398
 /*!
  * reconciliation: pages written including an aggregated newest stop
  * transaction ID
  */
-#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN	1398
+#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN	1399
 /*!
  * reconciliation: pages written including an aggregated newest
  * transaction ID
  */
-#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN		1399
+#define	WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN		1400
 /*!
  * reconciliation: pages written including an aggregated oldest start
  * timestamp
  */
-#define	WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS	1400
+#define	WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS	1401
 /*! reconciliation: pages written including an aggregated prepare */
-#define	WT_STAT_CONN_REC_TIME_AGGR_PREPARED		1401
+#define	WT_STAT_CONN_REC_TIME_AGGR_PREPARED		1402
 /*! reconciliation: pages written including at least one prepare state */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED	1402
+#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED	1403
 /*!
  * reconciliation: pages written including at least one start durable
  * timestamp
  */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS	1403
+#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS	1404
 /*! reconciliation: pages written including at least one start timestamp */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS	1404
+#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS	1405
 /*!
  * reconciliation: pages written including at least one start transaction
  * ID
  */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN	1405
+#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN	1406
 /*!
  * reconciliation: pages written including at least one stop durable
  * timestamp
  */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS	1406
+#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS	1407
 /*! reconciliation: pages written including at least one stop timestamp */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS	1407
+#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS	1408
 /*!
  * reconciliation: pages written including at least one stop transaction
  * ID
  */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN	1408
+#define	WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN	1409
 /*! reconciliation: records written including a prepare state */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_PREPARED		1409
+#define	WT_STAT_CONN_REC_TIME_WINDOW_PREPARED		1410
 /*! reconciliation: records written including a start durable timestamp */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS	1410
+#define	WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS	1411
 /*! reconciliation: records written including a start timestamp */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_START_TS		1411
+#define	WT_STAT_CONN_REC_TIME_WINDOW_START_TS		1412
 /*! reconciliation: records written including a start transaction ID */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_START_TXN		1412
+#define	WT_STAT_CONN_REC_TIME_WINDOW_START_TXN		1413
 /*! reconciliation: records written including a stop durable timestamp */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS	1413
+#define	WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS	1414
 /*! reconciliation: records written including a stop timestamp */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS		1414
+#define	WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS		1415
 /*! reconciliation: records written including a stop transaction ID */
-#define	WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN		1415
+#define	WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN		1416
 /*! reconciliation: split bytes currently awaiting free */
-#define	WT_STAT_CONN_REC_SPLIT_STASHED_BYTES		1416
+#define	WT_STAT_CONN_REC_SPLIT_STASHED_BYTES		1417
 /*! reconciliation: split objects currently awaiting free */
-#define	WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS		1417
+#define	WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS		1418
 /*! session: attempts to remove a local object and the object is in use */
-#define	WT_STAT_CONN_LOCAL_OBJECTS_INUSE		1418
+#define	WT_STAT_CONN_LOCAL_OBJECTS_INUSE		1419
 /*! session: flush_tier operation calls */
-#define	WT_STAT_CONN_FLUSH_TIER				1419
+#define	WT_STAT_CONN_FLUSH_TIER				1420
 /*! session: flush_tier tables skipped due to no checkpoint */
-#define	WT_STAT_CONN_FLUSH_TIER_SKIPPED			1420
+#define	WT_STAT_CONN_FLUSH_TIER_SKIPPED			1421
 /*! session: flush_tier tables switched */
-#define	WT_STAT_CONN_FLUSH_TIER_SWITCHED		1421
+#define	WT_STAT_CONN_FLUSH_TIER_SWITCHED		1422
 /*! session: local objects removed */
-#define	WT_STAT_CONN_LOCAL_OBJECTS_REMOVED		1422
+#define	WT_STAT_CONN_LOCAL_OBJECTS_REMOVED		1423
 /*! session: open session count */
-#define	WT_STAT_CONN_SESSION_OPEN			1423
+#define	WT_STAT_CONN_SESSION_OPEN			1424
 /*! session: session query timestamp calls */
-#define	WT_STAT_CONN_SESSION_QUERY_TS			1424
+#define	WT_STAT_CONN_SESSION_QUERY_TS			1425
 /*! session: table alter failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL		1425
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL		1426
 /*! session: table alter successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS	1426
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS	1427
 /*! session: table alter triggering checkpoint calls */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_TRIGGER_CHECKPOINT	1427
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_TRIGGER_CHECKPOINT	1428
 /*! session: table alter unchanged and skipped */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP		1428
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP		1429
 /*! session: table compact failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL		1429
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL		1430
 /*! session: table compact failed calls due to cache pressure */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL_CACHE_PRESSURE	1430
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL_CACHE_PRESSURE	1431
 /*! session: table compact running */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_RUNNING	1431
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_RUNNING	1432
 /*! session: table compact skipped as process would not reduce file size */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SKIPPED	1432
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SKIPPED	1433
 /*! session: table compact successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS	1433
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS	1434
 /*! session: table compact timeout */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_TIMEOUT	1434
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_TIMEOUT	1435
 /*! session: table create failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL		1435
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL		1436
 /*! session: table create successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS	1436
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS	1437
 /*! session: table create with import failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_CREATE_IMPORT_FAIL	1437
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_IMPORT_FAIL	1438
 /*! session: table create with import successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_CREATE_IMPORT_SUCCESS	1438
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_IMPORT_SUCCESS	1439
 /*! session: table drop failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_DROP_FAIL		1439
+#define	WT_STAT_CONN_SESSION_TABLE_DROP_FAIL		1440
 /*! session: table drop successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS		1440
+#define	WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS		1441
 /*! session: table rename failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL		1441
+#define	WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL		1442
 /*! session: table rename successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS	1442
+#define	WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS	1443
 /*! session: table salvage failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL		1443
+#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL		1444
 /*! session: table salvage successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS	1444
+#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS	1445
 /*! session: table truncate failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL	1445
+#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL	1446
 /*! session: table truncate successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS	1446
+#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS	1447
 /*! session: table verify failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL		1447
+#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL		1448
 /*! session: table verify successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS	1448
+#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS	1449
 /*! session: tiered operations dequeued and processed */
-#define	WT_STAT_CONN_TIERED_WORK_UNITS_DEQUEUED		1449
+#define	WT_STAT_CONN_TIERED_WORK_UNITS_DEQUEUED		1450
 /*! session: tiered operations scheduled */
-#define	WT_STAT_CONN_TIERED_WORK_UNITS_CREATED		1450
+#define	WT_STAT_CONN_TIERED_WORK_UNITS_CREATED		1451
 /*! session: tiered storage local retention time (secs) */
-#define	WT_STAT_CONN_TIERED_RETENTION			1451
+#define	WT_STAT_CONN_TIERED_RETENTION			1452
 /*! thread-state: active filesystem fsync calls */
-#define	WT_STAT_CONN_THREAD_FSYNC_ACTIVE		1452
+#define	WT_STAT_CONN_THREAD_FSYNC_ACTIVE		1453
 /*! thread-state: active filesystem read calls */
-#define	WT_STAT_CONN_THREAD_READ_ACTIVE			1453
+#define	WT_STAT_CONN_THREAD_READ_ACTIVE			1454
 /*! thread-state: active filesystem write calls */
-#define	WT_STAT_CONN_THREAD_WRITE_ACTIVE		1454
+#define	WT_STAT_CONN_THREAD_WRITE_ACTIVE		1455
 /*! thread-yield: application thread time evicting (usecs) */
-#define	WT_STAT_CONN_APPLICATION_EVICT_TIME		1455
+#define	WT_STAT_CONN_APPLICATION_EVICT_TIME		1456
 /*! thread-yield: application thread time waiting for cache (usecs) */
-#define	WT_STAT_CONN_APPLICATION_CACHE_TIME		1456
+#define	WT_STAT_CONN_APPLICATION_CACHE_TIME		1457
 /*!
  * thread-yield: connection close blocked waiting for transaction state
  * stabilization
  */
-#define	WT_STAT_CONN_TXN_RELEASE_BLOCKED		1457
+#define	WT_STAT_CONN_TXN_RELEASE_BLOCKED		1458
 /*! thread-yield: connection close yielded for lsm manager shutdown */
-#define	WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM		1458
+#define	WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM		1459
 /*! thread-yield: data handle lock yielded */
-#define	WT_STAT_CONN_DHANDLE_LOCK_BLOCKED		1459
+#define	WT_STAT_CONN_DHANDLE_LOCK_BLOCKED		1460
 /*!
  * thread-yield: get reference for page index and slot time sleeping
  * (usecs)
  */
-#define	WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED	1460
+#define	WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED	1461
 /*! thread-yield: page access yielded due to prepare state change */
-#define	WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE	1461
+#define	WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE	1462
 /*! thread-yield: page acquire busy blocked */
-#define	WT_STAT_CONN_PAGE_BUSY_BLOCKED			1462
+#define	WT_STAT_CONN_PAGE_BUSY_BLOCKED			1463
 /*! thread-yield: page acquire eviction blocked */
-#define	WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED	1463
+#define	WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED	1464
 /*! thread-yield: page acquire locked blocked */
-#define	WT_STAT_CONN_PAGE_LOCKED_BLOCKED		1464
+#define	WT_STAT_CONN_PAGE_LOCKED_BLOCKED		1465
 /*! thread-yield: page acquire read blocked */
-#define	WT_STAT_CONN_PAGE_READ_BLOCKED			1465
+#define	WT_STAT_CONN_PAGE_READ_BLOCKED			1466
 /*! thread-yield: page acquire time sleeping (usecs) */
-#define	WT_STAT_CONN_PAGE_SLEEP				1466
+#define	WT_STAT_CONN_PAGE_SLEEP				1467
 /*!
  * thread-yield: page delete rollback time sleeping for state change
  * (usecs)
  */
-#define	WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED		1467
+#define	WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED		1468
 /*! thread-yield: page reconciliation yielded due to child modification */
-#define	WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE		1468
+#define	WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE		1469
 /*! transaction: Number of prepared updates */
-#define	WT_STAT_CONN_TXN_PREPARED_UPDATES		1469
+#define	WT_STAT_CONN_TXN_PREPARED_UPDATES		1470
 /*! transaction: Number of prepared updates committed */
-#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_COMMITTED	1470
+#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_COMMITTED	1471
 /*! transaction: Number of prepared updates repeated on the same key */
-#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_KEY_REPEATED	1471
+#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_KEY_REPEATED	1472
 /*! transaction: Number of prepared updates rolled back */
-#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_ROLLEDBACK	1472
+#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_ROLLEDBACK	1473
 /*! transaction: prepared transactions */
-#define	WT_STAT_CONN_TXN_PREPARE			1473
+#define	WT_STAT_CONN_TXN_PREPARE			1474
 /*! transaction: prepared transactions committed */
-#define	WT_STAT_CONN_TXN_PREPARE_COMMIT			1474
+#define	WT_STAT_CONN_TXN_PREPARE_COMMIT			1475
 /*! transaction: prepared transactions currently active */
-#define	WT_STAT_CONN_TXN_PREPARE_ACTIVE			1475
+#define	WT_STAT_CONN_TXN_PREPARE_ACTIVE			1476
 /*! transaction: prepared transactions rolled back */
-#define	WT_STAT_CONN_TXN_PREPARE_ROLLBACK		1476
+#define	WT_STAT_CONN_TXN_PREPARE_ROLLBACK		1477
 /*! transaction: query timestamp calls */
-#define	WT_STAT_CONN_TXN_QUERY_TS			1477
+#define	WT_STAT_CONN_TXN_QUERY_TS			1478
 /*! transaction: race to read prepared update retry */
-#define	WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE	1478
+#define	WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE	1479
 /*! transaction: rollback to stable calls */
-#define	WT_STAT_CONN_TXN_RTS				1479
+#define	WT_STAT_CONN_TXN_RTS				1480
 /*!
  * transaction: rollback to stable history store records with stop
  * timestamps older than newer records
  */
-#define	WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START	1480
+#define	WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START	1481
 /*! transaction: rollback to stable inconsistent checkpoint */
-#define	WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT		1481
+#define	WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT		1482
 /*! transaction: rollback to stable keys removed */
-#define	WT_STAT_CONN_TXN_RTS_KEYS_REMOVED		1482
+#define	WT_STAT_CONN_TXN_RTS_KEYS_REMOVED		1483
 /*! transaction: rollback to stable keys restored */
-#define	WT_STAT_CONN_TXN_RTS_KEYS_RESTORED		1483
+#define	WT_STAT_CONN_TXN_RTS_KEYS_RESTORED		1484
 /*! transaction: rollback to stable pages visited */
-#define	WT_STAT_CONN_TXN_RTS_PAGES_VISITED		1484
+#define	WT_STAT_CONN_TXN_RTS_PAGES_VISITED		1485
 /*! transaction: rollback to stable restored tombstones from history store */
-#define	WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES	1485
+#define	WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES	1486
 /*! transaction: rollback to stable restored updates from history store */
-#define	WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES		1486
+#define	WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES		1487
 /*! transaction: rollback to stable skipping delete rle */
-#define	WT_STAT_CONN_TXN_RTS_DELETE_RLE_SKIPPED		1487
+#define	WT_STAT_CONN_TXN_RTS_DELETE_RLE_SKIPPED		1488
 /*! transaction: rollback to stable skipping stable rle */
-#define	WT_STAT_CONN_TXN_RTS_STABLE_RLE_SKIPPED		1488
+#define	WT_STAT_CONN_TXN_RTS_STABLE_RLE_SKIPPED		1489
 /*! transaction: rollback to stable sweeping history store keys */
-#define	WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS		1489
+#define	WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS		1490
 /*! transaction: rollback to stable tree walk skipping pages */
-#define	WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES	1490
+#define	WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES	1491
 /*! transaction: rollback to stable updates aborted */
-#define	WT_STAT_CONN_TXN_RTS_UPD_ABORTED		1491
+#define	WT_STAT_CONN_TXN_RTS_UPD_ABORTED		1492
 /*! transaction: rollback to stable updates removed from history store */
-#define	WT_STAT_CONN_TXN_RTS_HS_REMOVED			1492
+#define	WT_STAT_CONN_TXN_RTS_HS_REMOVED			1493
 /*! transaction: sessions scanned in each walk of concurrent sessions */
-#define	WT_STAT_CONN_TXN_SESSIONS_WALKED		1493
+#define	WT_STAT_CONN_TXN_SESSIONS_WALKED		1494
 /*! transaction: set timestamp calls */
-#define	WT_STAT_CONN_TXN_SET_TS				1494
+#define	WT_STAT_CONN_TXN_SET_TS				1495
 /*! transaction: set timestamp durable calls */
-#define	WT_STAT_CONN_TXN_SET_TS_DURABLE			1495
+#define	WT_STAT_CONN_TXN_SET_TS_DURABLE			1496
 /*! transaction: set timestamp durable updates */
-#define	WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD		1496
+#define	WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD		1497
 /*! transaction: set timestamp oldest calls */
-#define	WT_STAT_CONN_TXN_SET_TS_OLDEST			1497
+#define	WT_STAT_CONN_TXN_SET_TS_OLDEST			1498
 /*! transaction: set timestamp oldest updates */
-#define	WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD		1498
+#define	WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD		1499
 /*! transaction: set timestamp stable calls */
-#define	WT_STAT_CONN_TXN_SET_TS_STABLE			1499
+#define	WT_STAT_CONN_TXN_SET_TS_STABLE			1500
 /*! transaction: set timestamp stable updates */
-#define	WT_STAT_CONN_TXN_SET_TS_STABLE_UPD		1500
+#define	WT_STAT_CONN_TXN_SET_TS_STABLE_UPD		1501
 /*! transaction: transaction begins */
-#define	WT_STAT_CONN_TXN_BEGIN				1501
+#define	WT_STAT_CONN_TXN_BEGIN				1502
 /*! transaction: transaction checkpoint currently running */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1502
+#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1503
 /*!
  * transaction: transaction checkpoint currently running for history
  * store file
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS		1503
+#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS		1504
 /*! transaction: transaction checkpoint generation */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1504
+#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1505
 /*!
  * transaction: transaction checkpoint history store file duration
  * (usecs)
  */
-#define	WT_STAT_CONN_TXN_HS_CKPT_DURATION		1505
+#define	WT_STAT_CONN_TXN_HS_CKPT_DURATION		1506
 /*! transaction: transaction checkpoint max time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1506
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1507
 /*! transaction: transaction checkpoint min time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1507
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1508
 /*!
  * transaction: transaction checkpoint most recent duration for gathering
  * all handles (usecs)
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION	1508
+#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION	1509
 /*!
  * transaction: transaction checkpoint most recent duration for gathering
  * applied handles (usecs)
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY	1509
+#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY	1510
 /*!
  * transaction: transaction checkpoint most recent duration for gathering
  * skipped handles (usecs)
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP	1510
+#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP	1511
 /*! transaction: transaction checkpoint most recent handles applied */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED	1511
+#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED	1512
 /*! transaction: transaction checkpoint most recent handles skipped */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED	1512
+#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED	1513
 /*! transaction: transaction checkpoint most recent handles walked */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED	1513
+#define	WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED	1514
 /*! transaction: transaction checkpoint most recent time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1514
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1515
 /*! transaction: transaction checkpoint prepare currently running */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING	1515
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING	1516
 /*! transaction: transaction checkpoint prepare max time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX		1516
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX		1517
 /*! transaction: transaction checkpoint prepare min time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN		1517
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN		1518
 /*! transaction: transaction checkpoint prepare most recent time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT		1518
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT		1519
 /*! transaction: transaction checkpoint prepare total time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL		1519
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL		1520
 /*! transaction: transaction checkpoint scrub dirty target */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1520
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1521
 /*! transaction: transaction checkpoint scrub time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1521
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1522
 /*! transaction: transaction checkpoint stop timing stress active */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_STOP_STRESS_ACTIVE	1522
+#define	WT_STAT_CONN_TXN_CHECKPOINT_STOP_STRESS_ACTIVE	1523
 /*! transaction: transaction checkpoint total time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1523
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1524
 /*! transaction: transaction checkpoints */
-#define	WT_STAT_CONN_TXN_CHECKPOINT			1524
+#define	WT_STAT_CONN_TXN_CHECKPOINT			1525
 /*! transaction: transaction checkpoints due to obsolete pages */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED	1525
+#define	WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED	1526
 /*!
  * transaction: transaction checkpoints skipped because database was
  * clean
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED		1526
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED		1527
 /*! transaction: transaction failures due to history store */
-#define	WT_STAT_CONN_TXN_FAIL_CACHE			1527
+#define	WT_STAT_CONN_TXN_FAIL_CACHE			1528
 /*!
  * transaction: transaction fsync calls for checkpoint after allocating
  * the transaction ID
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1528
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1529
 /*!
  * transaction: transaction fsync duration for checkpoint after
  * allocating the transaction ID (usecs)
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1529
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1530
 /*! transaction: transaction range of IDs currently pinned */
-#define	WT_STAT_CONN_TXN_PINNED_RANGE			1530
+#define	WT_STAT_CONN_TXN_PINNED_RANGE			1531
 /*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1531
+#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1532
 /*! transaction: transaction range of timestamps currently pinned */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP		1532
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP		1533
 /*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT	1533
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT	1534
 /*!
  * transaction: transaction range of timestamps pinned by the oldest
  * active read timestamp
  */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER	1534
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER	1535
 /*!
  * transaction: transaction range of timestamps pinned by the oldest
  * timestamp
  */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST	1535
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST	1536
 /*! transaction: transaction read timestamp of the oldest active reader */
-#define	WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ	1536
+#define	WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ	1537
 /*! transaction: transaction rollback to stable currently running */
-#define	WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING	1537
+#define	WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING	1538
 /*! transaction: transaction walk of concurrent sessions */
-#define	WT_STAT_CONN_TXN_WALK_SESSIONS			1538
+#define	WT_STAT_CONN_TXN_WALK_SESSIONS			1539
 /*! transaction: transactions committed */
-#define	WT_STAT_CONN_TXN_COMMIT				1539
+#define	WT_STAT_CONN_TXN_COMMIT				1540
 /*! transaction: transactions rolled back */
-#define	WT_STAT_CONN_TXN_ROLLBACK			1540
+#define	WT_STAT_CONN_TXN_ROLLBACK			1541
 /*! transaction: update conflicts */
-#define	WT_STAT_CONN_TXN_UPDATE_CONFLICT		1541
+#define	WT_STAT_CONN_TXN_UPDATE_CONFLICT		1542
 
 /*!
  * @}
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index f95a38ad858..60a783e86cd 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -1564,7 +1564,10 @@ err:
  * __wt_meta_read_checkpoint_snapshot --
  *     Fetch the snapshot data for a checkpoint from the metadata file. Reads the selected named
  *     checkpoint's snapshot, or if the checkpoint name passed is null, the most recent checkpoint's
- *     snapshot. The snapshot list returned is allocated and must be freed by the caller.
+ *     snapshot. The snapshot list returned is allocated and must be freed by the caller. Can be
+ *     called with NULL return parameters to avoid (in particular) bothering to allocate the
+ *     snapshot data if it's not needed. Note that if you retrieve the snapshot data you must also
+ *     retrieve the snapshot count.
  */
 int
 __wt_meta_read_checkpoint_snapshot(WT_SESSION_IMPL *session, const char *ckpt_name,
@@ -1598,10 +1601,14 @@ __wt_meta_read_checkpoint_snapshot(WT_SESSION_IMPL *session, const char *ckpt_na
     /* Initialize to an empty snapshot. */
     if (snap_write_gen != NULL)
         *snap_write_gen = 0;
-    *snap_min = WT_TXN_NONE;
-    *snap_max = WT_TXN_NONE;
-    *snapshot = NULL;
-    *snapshot_count = 0;
+    if (snap_min != NULL)
+        *snap_min = WT_TXN_NONE;
+    if (snap_max != NULL)
+        *snap_max = WT_TXN_NONE;
+    if (snapshot != NULL)
+        *snapshot = NULL;
+    if (snapshot_count != NULL)
+        *snapshot_count = 0;
     if (ckpttime != NULL)
         *ckpttime = 0;
 
@@ -1618,20 +1625,25 @@ __wt_meta_read_checkpoint_snapshot(WT_SESSION_IMPL *session, const char *ckpt_na
     /* Extract the components of the metadata string. */
     if (sys_config != NULL) {
         WT_CLEAR(cval);
-        if (__wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT_MIN, &cval) == 0 &&
+        if (snap_min != NULL &&
+          __wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT_MIN, &cval) == 0 &&
           cval.len != 0)
             *snap_min = (uint64_t)cval.val;
 
-        if (__wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT_MAX, &cval) == 0 &&
+        if (snap_max != NULL &&
+          __wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT_MAX, &cval) == 0 &&
           cval.len != 0)
             *snap_max = (uint64_t)cval.val;
 
-        if (__wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT_COUNT, &cval) == 0 &&
+        if (snapshot_count != NULL &&
+          __wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT_COUNT, &cval) == 0 &&
           cval.len != 0)
             *snapshot_count = (uint32_t)cval.val;
 
-        if (__wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT, &cval) == 0 &&
+        if (snapshot != NULL &&
+          __wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT, &cval) == 0 &&
           cval.len != 0) {
+            WT_ASSERT(session, snapshot_count != NULL);
             __wt_config_subinit(session, &list, &cval);
             WT_ERR(__wt_calloc_def(session, *snapshot_count, snapshot));
             while (__wt_config_subget_next(&list, &k) == 0)
@@ -1648,17 +1660,14 @@ __wt_meta_read_checkpoint_snapshot(WT_SESSION_IMPL *session, const char *ckpt_na
         if (snap_write_gen != NULL)
             *snap_write_gen = write_gen;
 
-        if (ckpttime != NULL) {
-            /*
-             * If the write generation is current, extract the checkpoint time. Otherwise we use 0.
-             */
-            if (cval.val != 0 && write_gen >= conn->base_write_gen) {
-                WT_ERR_NOTFOUND_OK(
-                  __wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT_TIME, &cval),
-                  false);
-                if (cval.val != 0)
-                    *ckpttime = (uint64_t)cval.val;
-            }
+        /*
+         * If the write generation is current, extract the checkpoint time. Otherwise we use 0.
+         */
+        if (ckpttime != NULL && cval.val != 0 && write_gen >= conn->base_write_gen) {
+            WT_ERR_NOTFOUND_OK(
+              __wt_config_getones(session, sys_config, WT_SYSTEM_CKPT_SNAPSHOT_TIME, &cval), false);
+            if (cval.val != 0)
+                *ckpttime = (uint64_t)cval.val;
         }
 
         /*
@@ -1666,7 +1675,7 @@ __wt_meta_read_checkpoint_snapshot(WT_SESSION_IMPL *session, const char *ckpt_na
          * transaction IDs between min and max.
          */
         WT_ASSERT(session,
-          *snapshot == NULL ||
+          snapshot == NULL || snap_min == NULL || snap_max == NULL || *snapshot == NULL ||
             (*snapshot_count == counter && (*snapshot)[0] == *snap_min &&
               (*snapshot)[counter - 1] < *snap_max));
     }
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_child.c b/src/third_party/wiredtiger/src/reconcile/rec_child.c
index 265c76447b1..afdf3f5ad78 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_child.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_child.c
@@ -13,39 +13,46 @@
  *     Handle pages with leaf pages in the WT_REF_DELETED state.
  */
 static int
-__rec_child_deleted(
-  WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, WT_CHILD_MODIFY_STATE *cmsp)
+__rec_child_deleted(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref,
+  WT_PAGE_DELETED *page_del, WT_CHILD_MODIFY_STATE *cmsp)
 {
-    WT_PAGE_DELETED *page_del;
-    WT_TXN *txn;
     uint8_t prepare_state;
 
     cmsp->state = WT_CHILD_IGNORE;
 
-    txn = session->txn;
-
     /*
-     * The complicated case is a fast-delete which may not be visible or stable. Otherwise, discard
-     * any underlying disk blocks and don't write anything.
+     * If there's no page-delete structure, the truncate must be globally visible. Discard any
+     * underlying disk blocks and don't write anything in the internal page.
      */
-    page_del = ref->ft_info.del;
     if (page_del == NULL)
-        return (ref->addr == NULL ? 0 : __wt_ref_block_free(session, ref));
+        return (__wt_ref_block_free(session, ref));
 
     /*
-     * The fast-delete may not yet be visible to us. In that case, we proceed as with any change not
+     * The truncate may not yet be visible to us. In that case, we proceed as with any change not
      * visible during reconciliation by ignoring the change for the purposes of writing the internal
      * page.
      *
      * We expect the page to be clean after reconciliation. If there are invisible updates, abort
      * eviction.
+     *
+     * We must have reconciliation leave the page dirty in this case, because the truncation hasn't
+     * been written to disk yet; if the page gets marked clean it might be discarded and then the
+     * truncation is lost.
      */
-    if (__wt_page_del_active(session, ref, !F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))) {
+    if (!__wt_page_del_visible(session, page_del, !F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT))) {
         if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
             WT_RET_PANIC(session, EINVAL, "reconciliation illegally skipped an update");
-        if (F_ISSET(r, WT_REC_CLEAN_AFTER_REC))
+        /*
+         * In addition to the WT_REC_CLEAN_AFTER_REC case, fail if we're trying to evict an internal
+         * page and we can't see the update to it. There's not much point continuing; unlike with a
+         * leaf page, rewriting the page image and keeping the modification doesn't accomplish a
+         * great deal. Also currently code elsewhere assumes that evicting (vs. checkpointing)
+         * internal pages shouldn't leave them dirty.
+         */
+        if (F_ISSET(r, WT_REC_CLEAN_AFTER_REC | WT_REC_EVICT))
             return (__wt_set_return(session, EBUSY));
         cmsp->state = WT_CHILD_ORIGINAL;
+        r->leave_dirty = true;
         return (0);
     }
 
@@ -57,12 +64,18 @@ __rec_child_deleted(
      * We should never see an in-progress prepare in eviction: when we check to see if an internal
      * page can be evicted, we check for an unresolved fast-truncate, which includes a fast-truncate
      * in a prepared state, so it's an error to see that during eviction.
+     *
+     * As in the previous case, leave the page dirty. This is not strictly necessary as the prepared
+     * truncation will also prevent eviction; but if we don't do it and someone adds the ability to
+     * evict prepared truncates, the page apparently being clean might lead to truncations being
+     * lost in hard-to-debug ways.
      */
     WT_ORDERED_READ(prepare_state, page_del->prepare_state);
     if (prepare_state == WT_PREPARE_INPROGRESS || prepare_state == WT_PREPARE_LOCKED) {
         WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
 
         cmsp->state = WT_CHILD_ORIGINAL;
+        r->leave_dirty = true;
         return (0);
     }
 
@@ -74,22 +87,30 @@ __rec_child_deleted(
      * We have the WT_REF locked, but that lock is released before returning to the function writing
      * cells to the page. Copy out the current fast-truncate information for that function.
      */
-    if (__wt_page_del_active(session, ref, true)) {
-        cmsp->del = *ref->ft_info.del;
+    if (!__wt_page_del_visible(session, page_del, true)) {
+        cmsp->del = *page_del;
         cmsp->state = WT_CHILD_PROXY;
         return (0);
     }
 
     /*
-     * Otherwise, we can discard the leaf page to the block manager and no cell needs to be written.
-     * Done outside of the underlying tracking routines because this action is permanent and
-     * irrevocable. (Clearing the address means we've lost track of the disk address in a permanent
-     * way. This is safe because there's no path to reading the leaf page again: if there's ever a
-     * read into this part of the name space again, the cache read function instantiates an entirely
-     * new page.)
+     * Globally visible truncate, discard the leaf page to the block manager and no cell needs to be
+     * written. Done outside of the underlying tracking routines because this action is permanent
+     * and irrevocable. (Clearing the address means we've lost track of the disk address in a
+     * permanent way. This is safe because there's no path to reading the leaf page again: if there
+     * is ever a read into this part of the name space again, the cache read function instantiates
+     * an entirely new page.)
      */
     WT_RET(__wt_ref_block_free(session, ref));
-    __wt_overwrite_and_free(session, ref->ft_info.del);
+
+    /*
+     * Globally visible fast-truncate information is never used again, a NULL value is identical.
+     * Fast-truncate information in the page-modify structure can be used more than once if this
+     * reconciliation of the internal page were to fail.
+     */
+    if (page_del == ref->ft_info.del)
+        __wt_overwrite_and_free(session, ref->ft_info.del);
+
     return (0);
 }
 
@@ -125,6 +146,8 @@ __wt_rec_child_modify(
         case WT_REF_DISK:
             /* On disk, not modified by definition. */
             WT_ASSERT(session, ref->addr != NULL);
+            /* DISK pages do not have fast-truncate info. */
+            WT_ASSERT(session, ref->ft_info.del == NULL);
             goto done;
 
         case WT_REF_DELETED:
@@ -137,7 +160,7 @@ __wt_rec_child_modify(
              */
             if (!WT_REF_CAS_STATE(session, ref, WT_REF_DELETED, WT_REF_LOCKED))
                 break;
-            ret = __rec_child_deleted(session, r, ref, cmsp);
+            ret = __rec_child_deleted(session, r, ref, ref->ft_info.del, cmsp);
             WT_REF_SET_STATE(ref, WT_REF_DELETED);
             goto done;
 
@@ -193,7 +216,56 @@ __wt_rec_child_modify(
             }
             WT_RET(ret);
             cmsp->hazard = true;
-            goto in_memory;
+
+            /*
+             * The child is potentially modified if the page's modify structure has been created. If
+             * the modify structure exists and the page has been reconciled, set that state.
+             */
+            mod = ref->page->modify;
+            if (mod != NULL && mod->rec_result != 0) {
+                cmsp->state = WT_CHILD_MODIFIED;
+                goto done;
+            }
+
+            /*
+             * Deleted page instantiation can happen at any time during a checkpoint. If we found
+             * the instantiated page in the first checkpoint pass, it will have been reconciled and
+             * dealt with normally. However, if that didn't happen, we get here with a page that has
+             * been modified and never reconciled.
+             *
+             * Ordinarily in that situation we'd write a reference to the original child page, and
+             * in the ordinary case where the modifications were applied after the checkpoint
+             * started that would be fine. However, for a deleted page it's possible that the
+             * deletion predates the checkpoint and is visible, and only the instantiation happened
+             * after the checkpoint started. In that case we need the modifications to appear in the
+             * checkpoint, but if we didn't already reconcile the page it's too late to do it now.
+             * Depending on visibility, we may need to write the original page, or write a proxy
+             * (deleted-address) cell with the pre-instantiation page-delete information, or we may
+             * be able to ignore the page entirely. We keep the original fast-truncate information
+             * in the modify structure after instantiation to make the visibility check possible.
+             *
+             * The key is the page-modify.instantiated flag, removed during page reconciliation. If
+             * it's set, instantiation happened after checkpoint passed the leaf page and we treat
+             * this page like a WT_REF_DELETED page, evaluating it as it was before instantiation.
+             *
+             * We do not need additional locking: with a hazard pointer the page can't be evicted,
+             * and reconciliation is the only thing that can clear the page-modify info.
+             */
+            if (mod != NULL && mod->instantiated) {
+                WT_RET(__rec_child_deleted(session, r, ref, mod->page_del, cmsp));
+                goto done;
+            }
+
+            /*
+             * Insert splits are permitted during checkpoint. Checkpoints first walk the internal
+             * page's page-index and write out any dirty pages we find, then we write out the
+             * internal page in post-order traversal. If we found the split page in the first step,
+             * it will have an address; if we didn't find the split page in the first step, it won't
+             * have an address and we ignore it, it's not part of the checkpoint.
+             */
+            if (ref->addr == NULL)
+                cmsp->state = WT_CHILD_IGNORE;
+            goto done;
 
         case WT_REF_SPLIT:
             /*
@@ -217,31 +289,6 @@ __wt_rec_child_modify(
         WT_STAT_CONN_INCR(session, child_modify_blocked_page);
     }
 
-in_memory:
-    /*
-     * In-memory states: the child is potentially modified if the page's modify structure has been
-     * instantiated. If the modify structure exists and the page has actually been modified, set
-     * that state. If that's not the case, we would normally use the original cell's disk address as
-     * our reference, however there are two special cases, both flagged by a missing block address.
-     *
-     * First, if forced to instantiate a deleted child page and it's never modified, we end up here
-     * with a page that has a modify structure, no modifications, and no disk address. Ignore those
-     * pages, they're not modified and there is no reason to write the cell.
-     *
-     * Second, insert splits are permitted during checkpoint. When doing the final checkpoint pass,
-     * we first walk the internal page's page-index and write out any dirty pages we find, then we
-     * write out the internal page in post-order traversal. If we found the split page in the first
-     * step, it will have an address; if we didn't find the split page in the first step, it won't
-     * have an address and we ignore it, it's not part of the checkpoint.
-     */
-    mod = ref->page->modify;
-    if (mod != NULL && mod->rec_result != 0)
-        cmsp->state = WT_CHILD_MODIFIED;
-    else if (ref->addr == NULL) {
-        cmsp->state = WT_CHILD_IGNORE;
-        WT_CHILD_RELEASE(session, cmsp->hazard, ref);
-    }
-
 done:
     WT_DIAGNOSTIC_YIELD;
     return (ret);
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 27c5e2d0e00..9c40e3e5af1 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -421,61 +421,30 @@ __rec_calc_upd_memsize(WT_UPDATE *onpage_upd, WT_UPDATE *tombstone, size_t upd_m
 }
 
 /*
- * __wt_rec_upd_select --
- *     Return the update in a list that should be written (or NULL if none can be written).
+ * __rec_upd_select --
+ *     Select the update to write to disk image.
  */
-int
-__wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, WT_ROW *rip,
-  WT_CELL_UNPACK_KV *vpack, WT_UPDATE_SELECT *upd_select)
+static int
+__rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *first_upd,
+  WT_UPDATE_SELECT *upd_select, WT_UPDATE **first_txn_updp, bool *has_newer_updatesp,
+  size_t *upd_memsizep)
 {
-    WT_PAGE *page;
-    WT_TIME_WINDOW *select_tw;
-    WT_UPDATE *first_txn_upd, *first_upd, *onpage_upd, *upd, *last_upd, *tombstone;
+    WT_UPDATE *upd;
     wt_timestamp_t max_ts;
-    size_t upd_memsize;
     uint64_t max_txn, session_txnid, txnid;
-    bool has_newer_updates, is_hs_page, supd_restore, upd_saved;
+    bool is_hs_page;
 #ifdef HAVE_DIAGNOSTIC
     bool seen_prepare;
 #endif
 
-    /*
-     * The "saved updates" return value is used independently of returning an update we can write,
-     * both must be initialized.
-     */
-    upd_select->upd = NULL;
-    upd_select->upd_saved = false;
-    upd_select->no_ts_tombstone = false;
-    select_tw = &upd_select->tw;
-    WT_TIME_WINDOW_INIT(select_tw);
-
-    page = r->page;
-    first_txn_upd = onpage_upd = upd = last_upd = tombstone = NULL;
-    upd_memsize = 0;
     max_ts = WT_TS_NONE;
     max_txn = WT_TXN_NONE;
-    has_newer_updates = supd_restore = upd_saved = false;
     is_hs_page = F_ISSET(session->dhandle, WT_DHANDLE_HS);
     session_txnid = WT_SESSION_TXN_SHARED(session)->id;
-
 #ifdef HAVE_DIAGNOSTIC
     seen_prepare = false;
 #endif
 
-    /*
-     * If called with a WT_INSERT item, use its WT_UPDATE list (which must exist), otherwise check
-     * for an on-page row-store WT_UPDATE list (which may not exist). Return immediately if the item
-     * has no updates.
-     */
-    if (ins != NULL)
-        first_upd = ins->upd;
-    else {
-        /* Note: ins is never null for columns. */
-        WT_ASSERT(session, rip != NULL && page->type == WT_PAGE_ROW_LEAF);
-        if ((first_upd = WT_ROW_UPDATE(page, rip)) == NULL)
-            return (0);
-    }
-
     for (upd = first_upd; upd != NULL; upd = upd->next) {
         if ((txnid = upd->txnid) == WT_TXN_ABORTED)
             continue;
@@ -483,8 +452,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
         /*
          * Track the first update in the chain that is not aborted and the maximum transaction ID.
          */
-        if (first_txn_upd == NULL)
-            first_txn_upd = upd;
+        if (*first_txn_updp == NULL)
+            *first_txn_updp = upd;
         if (WT_TXNID_LT(max_txn, txnid))
             max_txn = txnid;
 
@@ -492,8 +461,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
          * Special handling for application threads evicting their own updates.
          */
         if (!is_hs_page && F_ISSET(r, WT_REC_APP_EVICTION_SNAPSHOT) && txnid == session_txnid) {
-            upd_memsize += WT_UPDATE_MEMSIZE(upd);
-            has_newer_updates = true;
+            *upd_memsizep += WT_UPDATE_MEMSIZE(upd);
+            *has_newer_updatesp = true;
             continue;
         }
         /*
@@ -530,8 +499,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
                 return (__wt_set_return(session, EBUSY));
             }
 
-            upd_memsize += WT_UPDATE_MEMSIZE(upd);
-            has_newer_updates = true;
+            *upd_memsizep += WT_UPDATE_MEMSIZE(upd);
+            *has_newer_updatesp = true;
             continue;
         }
 
@@ -540,11 +509,11 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
           upd->prepare_state == WT_PREPARE_INPROGRESS) {
             WT_ASSERT(session, upd_select->upd == NULL || upd_select->upd->txnid == upd->txnid);
             if (F_ISSET(r, WT_REC_CHECKPOINT)) {
-                upd_memsize += WT_UPDATE_MEMSIZE(upd);
-                has_newer_updates = true;
+                *upd_memsizep += WT_UPDATE_MEMSIZE(upd);
+                *has_newer_updatesp = true;
                 if (upd->start_ts > max_ts)
                     max_ts = upd->start_ts;
-#ifdef HAVE_DIADNOSTIC
+#ifdef HAVE_DIAGNOSTIC
                 seen_prepare = true;
 #endif
                 continue;
@@ -580,6 +549,199 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
             break;
     }
 
+    /*
+     * Track the most recent transaction in the page. We store this in the tree at the end of
+     * reconciliation in the service of checkpoints, it is used to avoid discarding trees from
+     * memory when they have changes required to satisfy a snapshot read.
+     */
+    if (WT_TXNID_LT(r->max_txn, max_txn))
+        r->max_txn = max_txn;
+
+    /* Update the maximum timestamp. */
+    if (max_ts > r->max_ts)
+        r->max_ts = max_ts;
+
+    /*
+     * We should never select an update that has been written to the history store except checkpoint
+     * writes the update that is older than a prepared update or we need to first delete the update
+     * from the history store.
+     */
+    WT_ASSERT(session,
+      upd_select->upd == NULL || !F_ISSET(upd_select->upd, WT_UPDATE_HS) ||
+        F_ISSET(upd_select->upd, WT_UPDATE_TO_DELETE_FROM_HS) ||
+        (!F_ISSET(r, WT_REC_EVICT) && seen_prepare));
+
+    return (0);
+}
+
+/*
+ * __rec_fill_tw_from_upd_select --
+ *     Fill the time window information and the selected update.
+ */
+static int
+__rec_fill_tw_from_upd_select(
+  WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK_KV *vpack, WT_UPDATE_SELECT *upd_select)
+{
+    WT_TIME_WINDOW *select_tw;
+    WT_UPDATE *last_upd, *upd, *tombstone;
+
+    upd = upd_select->upd;
+    last_upd = tombstone = NULL;
+    select_tw = &upd_select->tw;
+
+    /*
+     * The start timestamp is determined by the commit timestamp when the key is first inserted (or
+     * last updated). The end timestamp is set when a key/value pair becomes invalid, either because
+     * of a remove or a modify/update operation on the same key.
+     */
+
+    /*
+     * Mark the prepare flag if the selected update is an uncommitted prepare. As tombstone updates
+     * are never returned to write, set this flag before we move into the previous update to write.
+     */
+    if (upd->prepare_state == WT_PREPARE_INPROGRESS)
+        select_tw->prepare = 1;
+
+    /*
+     * If the newest is a tombstone then select the update before it and set the end of the
+     * visibility window to its time point as appropriate to indicate that we should return "not
+     * found" for reads after this point.
+     *
+     * Otherwise, leave the end of the visibility window at the maximum possible value to indicate
+     * that the value is visible to any timestamp/transaction id ahead of it.
+     */
+    if (upd->type == WT_UPDATE_TOMBSTONE) {
+        WT_TIME_WINDOW_SET_STOP(select_tw, upd);
+        tombstone = upd_select->tombstone = upd;
+
+        /* Find the update this tombstone applies to. */
+        if (!__wt_txn_upd_visible_all(session, upd)) {
+            while (upd->next != NULL && upd->next->txnid == WT_TXN_ABORTED)
+                upd = upd->next;
+
+            WT_ASSERT(session, upd->next == NULL || upd->next->txnid != WT_TXN_ABORTED);
+            upd_select->upd = upd = upd->next;
+            /* We should not see multiple consecutive tombstones. */
+            WT_ASSERT(session, upd == NULL || upd->type != WT_UPDATE_TOMBSTONE);
+        }
+    }
+
+    if (upd != NULL)
+        /* The beginning of the validity window is the selected update's time point. */
+        WT_TIME_WINDOW_SET_START(select_tw, upd);
+    else if (select_tw->stop_ts != WT_TS_NONE || select_tw->stop_txn != WT_TXN_NONE) {
+        /* We only have a tombstone on the update list. */
+        WT_ASSERT(session, tombstone != NULL);
+
+        /* We must have an ondisk value and it can't be a prepared update. */
+        WT_ASSERT(session, vpack != NULL && vpack->type != WT_CELL_DEL && !vpack->tw.prepare);
+
+        /* Move the pointer to the last update on the update chain. */
+        for (last_upd = tombstone; last_upd->next != NULL; last_upd = last_upd->next)
+            /* Tombstone is the only non-aborted update on the update chain. */
+            WT_ASSERT(session, last_upd->next->txnid == WT_TXN_ABORTED);
+
+        /*
+         * It's possible to have a tombstone as the only update in the update list. If we reconciled
+         * before with only a single update and then read the page back into cache, we'll have an
+         * empty update list. And applying a delete on top of that will result in ONLY a tombstone
+         * in the update list.
+         *
+         * In this case, we should leave the selected update unset to indicate that we want to keep
+         * the same on-disk value but set the stop time point to indicate that the validity window
+         * ends when this tombstone started. (Note: this may have been true at one point, but
+         * currently we either append the onpage value and return that, or return the tombstone
+         * itself; there is no case that returns no update but sets the time window.)
+         *
+         * FIXME-WT-6557: no need to check this after WT-6557 is done as the tombstone will be freed
+         * when it is written to the disk image in the previous eviction.
+         */
+        if (!F_ISSET(tombstone, WT_UPDATE_RESTORED_FROM_DS | WT_UPDATE_RESTORED_FROM_HS)) {
+            WT_RET(__rec_append_orig_value(session, page, tombstone, vpack));
+
+            /*
+             * We may have updated the global transaction concurrently and the tombstone is now
+             * globally visible. In this case, the on page value is not appended. Verify that.
+             */
+            if (last_upd->next != NULL) {
+                WT_ASSERT(session,
+                  last_upd->next->txnid == vpack->tw.start_txn &&
+                    last_upd->next->start_ts == vpack->tw.start_ts &&
+                    last_upd->next->type == WT_UPDATE_STANDARD && last_upd->next->next == NULL);
+                upd_select->upd = last_upd->next;
+                WT_TIME_WINDOW_SET_START(select_tw, last_upd->next);
+            } else {
+                /*
+                 * It's possible that onpage value is not appended if the tombstone becomes globally
+                 * visible because the oldest transaction id or the oldest timestamp is moved
+                 * concurrently.
+                 *
+                 * If the tombstone is aborted concurrently, we should still have appended the
+                 * onpage value.
+                 */
+                WT_ASSERT(session,
+                  tombstone->txnid != WT_TXN_ABORTED &&
+                    __wt_txn_upd_visible_all(session, tombstone) && upd_select->upd == NULL);
+                upd_select->upd = tombstone;
+            }
+        } else
+            /*
+             * If the tombstone is restored from the disk or history store, it must have already
+             * been written to the disk image in the previous eviction.
+             */
+            WT_ASSERT(session,
+              upd_select->upd == NULL && vpack->tw.durable_stop_ts == tombstone->durable_ts &&
+                vpack->tw.stop_txn == tombstone->txnid);
+    }
+
+    return (0);
+}
+
+/*
+ * __wt_rec_upd_select --
+ *     Return the update in a list that should be written (or NULL if none can be written).
+ */
+int
+__wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, WT_ROW *rip,
+  WT_CELL_UNPACK_KV *vpack, WT_UPDATE_SELECT *upd_select)
+{
+    WT_PAGE *page;
+    WT_UPDATE *first_txn_upd, *first_upd, *onpage_upd, *upd;
+    size_t upd_memsize;
+    bool has_newer_updates, supd_restore, upd_saved;
+
+    /*
+     * The "saved updates" return value is used independently of returning an update we can write,
+     * both must be initialized.
+     */
+    upd_select->upd = NULL;
+    upd_select->tombstone = NULL;
+    upd_select->upd_saved = false;
+    upd_select->no_ts_tombstone = false;
+    WT_TIME_WINDOW_INIT(&upd_select->tw);
+
+    page = r->page;
+    first_txn_upd = onpage_upd = upd = NULL;
+    upd_memsize = 0;
+    has_newer_updates = supd_restore = upd_saved = false;
+
+    /*
+     * If called with a WT_INSERT item, use its WT_UPDATE list (which must exist), otherwise check
+     * for an on-page row-store WT_UPDATE list (which may not exist). Return immediately if the item
+     * has no updates.
+     */
+    if (ins != NULL)
+        first_upd = ins->upd;
+    else {
+        /* Note: ins is never null for columns. */
+        WT_ASSERT(session, rip != NULL && page->type == WT_PAGE_ROW_LEAF);
+        if ((first_upd = WT_ROW_UPDATE(page, rip)) == NULL)
+            return (0);
+    }
+
+    WT_RET(__rec_upd_select(
+      session, r, first_upd, upd_select, &first_txn_upd, &has_newer_updates, &upd_memsize));
+
     /* Keep track of the selected update. */
     upd = upd_select->upd;
 
@@ -616,129 +778,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
     if (upd != NULL)
         r->update_used = true;
 
-    /*
-     * The start timestamp is determined by the commit timestamp when the key is first inserted (or
-     * last updated). The end timestamp is set when a key/value pair becomes invalid, either because
-     * of a remove or a modify/update operation on the same key.
-     *
-     * In the case of a tombstone where the previous update is the ondisk value, we'll allocate an
-     * update here to represent the ondisk value. Keep a pointer to the original update (the
-     * tombstone) since we do some pointer comparisons below to check whether or not all updates are
-     * stable.
-     */
-    if (upd != NULL) {
-        /*
-         * Mark the prepare flag if the selected update is an uncommitted prepare. As tombstone
-         * updates are never returned to write, set this flag before we move into the previous
-         * update to write.
-         */
-        if (upd->prepare_state == WT_PREPARE_INPROGRESS)
-            select_tw->prepare = 1;
-
-        /*
-         * If the newest is a tombstone then select the update before it and set the end of the
-         * visibility window to its time point as appropriate to indicate that we should return "not
-         * found" for reads after this point.
-         *
-         * Otherwise, leave the end of the visibility window at the maximum possible value to
-         * indicate that the value is visible to any timestamp/transaction id ahead of it.
-         */
-        if (upd->type == WT_UPDATE_TOMBSTONE) {
-            WT_TIME_WINDOW_SET_STOP(select_tw, upd);
-            tombstone = upd;
-
-            /* Find the update this tombstone applies to. */
-            if (!__wt_txn_upd_visible_all(session, upd)) {
-                while (upd->next != NULL && upd->next->txnid == WT_TXN_ABORTED)
-                    upd = upd->next;
-
-                WT_ASSERT(session, upd->next == NULL || upd->next->txnid != WT_TXN_ABORTED);
-                upd_select->upd = upd = upd->next;
-                /* We should not see multiple consecutive tombstones. */
-                WT_ASSERT(session, upd == NULL || upd->type != WT_UPDATE_TOMBSTONE);
-            }
-        }
-
-        if (upd != NULL)
-            /* The beginning of the validity window is the selected update's time point. */
-            WT_TIME_WINDOW_SET_START(select_tw, upd);
-        else if (select_tw->stop_ts != WT_TS_NONE || select_tw->stop_txn != WT_TXN_NONE) {
-            /* We only have a tombstone on the update list. */
-            WT_ASSERT(session, tombstone != NULL);
-
-            /* We must have an ondisk value and it can't be a prepared update. */
-            WT_ASSERT(session, vpack != NULL && vpack->type != WT_CELL_DEL && !vpack->tw.prepare);
-
-            /* Move the pointer to the last update on the update chain. */
-            for (last_upd = tombstone; last_upd->next != NULL; last_upd = last_upd->next)
-                /* Tombstone is the only non-aborted update on the update chain. */
-                WT_ASSERT(session, last_upd->next->txnid == WT_TXN_ABORTED);
-
-            /*
-             * It's possible to have a tombstone as the only update in the update list. If we
-             * reconciled before with only a single update and then read the page back into cache,
-             * we'll have an empty update list. And applying a delete on top of that will result in
-             * ONLY a tombstone in the update list.
-             *
-             * In this case, we should leave the selected update unset to indicate that we want to
-             * keep the same on-disk value but set the stop time point to indicate that the validity
-             * window ends when this tombstone started. (Note: this may have been true at one point,
-             * but currently we either append the onpage value and return that, or return the
-             * tombstone itself; there is no case that returns no update but sets the time window.)
-             *
-             * FIXME-WT-6557: no need to check this after WT-6557 is done as the tombstone will be
-             * freed when it is written to the disk image in the previous eviction.
-             */
-            if (!F_ISSET(tombstone, WT_UPDATE_RESTORED_FROM_DS | WT_UPDATE_RESTORED_FROM_HS)) {
-                WT_RET(__rec_append_orig_value(session, page, tombstone, vpack));
-
-                /*
-                 * We may have updated the global transaction concurrently and the tombstone is now
-                 * globally visible. In this case, the on page value is not appended. Verify that.
-                 */
-                if (last_upd->next != NULL) {
-                    WT_ASSERT(session,
-                      last_upd->next->txnid == vpack->tw.start_txn &&
-                        last_upd->next->start_ts == vpack->tw.start_ts &&
-                        last_upd->next->type == WT_UPDATE_STANDARD && last_upd->next->next == NULL);
-                    upd_select->upd = last_upd->next;
-                    WT_TIME_WINDOW_SET_START(select_tw, last_upd->next);
-                } else {
-                    /*
-                     * It's possible that onpage value is not appended if the tombstone becomes
-                     * globally visible because the oldest transaction id or the oldest timestamp is
-                     * moved concurrently.
-                     *
-                     * If the tombstone is aborted concurrently, we should still have appended the
-                     * onpage value.
-                     */
-                    WT_ASSERT(session,
-                      tombstone->txnid != WT_TXN_ABORTED &&
-                        __wt_txn_upd_visible_all(session, tombstone) && upd_select->upd == NULL);
-                    upd_select->upd = tombstone;
-                }
-            } else
-                /*
-                 * If the tombstone is restored from the disk or history store, it must have already
-                 * been written to the disk image in the previous eviction.
-                 */
-                WT_ASSERT(session,
-                  upd_select->upd == NULL && vpack->tw.durable_stop_ts == tombstone->durable_ts &&
-                    vpack->tw.stop_txn == tombstone->txnid);
-        }
-    }
-
-    /*
-     * Track the most recent transaction in the page. We store this in the tree at the end of
-     * reconciliation in the service of checkpoints, it is used to avoid discarding trees from
-     * memory when they have changes required to satisfy a snapshot read.
-     */
-    if (WT_TXNID_LT(r->max_txn, max_txn))
-        r->max_txn = max_txn;
-
-    /* Update the maximum timestamp. */
-    if (max_ts > r->max_ts)
-        r->max_ts = max_ts;
+    if (upd != NULL)
+        WT_RET(__rec_fill_tw_from_upd_select(session, page, vpack, upd_select));
 
     /* Mark the page dirty after reconciliation. */
     if (has_newer_updates)
@@ -749,14 +790,14 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
       upd_select->upd;
 
     /* Check the update chain for conditions that could prevent it's eviction. */
-    WT_RET(__rec_validate_upd_chain(session, r, onpage_upd, select_tw, vpack));
+    WT_RET(__rec_validate_upd_chain(session, r, onpage_upd, &upd_select->tw, vpack));
 
     /*
      * Set the flag if the selected tombstone has no timestamp. Based on this flag, the caller
      * functions perform the history store truncation for this key.
      */
-    if (!is_hs_page && tombstone != NULL &&
-      !F_ISSET(tombstone, WT_UPDATE_RESTORED_FROM_DS | WT_UPDATE_RESTORED_FROM_HS)) {
+    if (!F_ISSET(session->dhandle, WT_DHANDLE_HS) && upd_select->tombstone != NULL &&
+      !F_ISSET(upd_select->tombstone, WT_UPDATE_RESTORED_FROM_DS | WT_UPDATE_RESTORED_FROM_HS)) {
         upd = upd_select->upd;
 
         /*
@@ -764,7 +805,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
          * Compare the tombstone's timestamp with either the next update in the update list or the
          * on-disk cell timestamp to determine if the tombstone is discarding a timestamp.
          */
-        if (tombstone == upd) {
+        if (upd_select->tombstone == upd) {
             upd = upd->next;
 
             /* Loop until a valid update is found. */
@@ -772,8 +813,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
                 upd = upd->next;
         }
 
-        if ((upd != NULL && upd->start_ts > tombstone->start_ts) ||
-          (vpack != NULL && vpack->tw.start_ts > tombstone->start_ts))
+        if ((upd != NULL && upd->start_ts > upd_select->tombstone->start_ts) ||
+          (vpack != NULL && vpack->tw.start_ts > upd_select->tombstone->start_ts))
             upd_select->no_ts_tombstone = true;
     }
 
@@ -784,7 +825,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
      * Returning EBUSY here is okay as the previous call to validate the update chain wouldn't have
      * caught the situation where only a tombstone is selected.
      */
-    if (__timestamp_no_ts_fix(session, select_tw) && F_ISSET(r, WT_REC_HS) &&
+    if (__timestamp_no_ts_fix(session, &upd_select->tw) && F_ISSET(r, WT_REC_HS) &&
       F_ISSET(r, WT_REC_CHECKPOINT_RUNNING)) {
         /* Catch this case in diagnostic builds. */
         WT_STAT_CONN_DATA_INCR(session, cache_eviction_blocked_no_ts_checkpoint_race_3);
@@ -807,9 +848,9 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
         supd_restore = F_ISSET(r, WT_REC_EVICT) &&
           (has_newer_updates || F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
 
-        upd_memsize = __rec_calc_upd_memsize(onpage_upd, tombstone, upd_memsize);
+        upd_memsize = __rec_calc_upd_memsize(onpage_upd, upd_select->tombstone, upd_memsize);
         WT_RET(__rec_update_save(
-          session, r, ins, rip, onpage_upd, tombstone, supd_restore, upd_memsize));
+          session, r, ins, rip, onpage_upd, upd_select->tombstone, supd_restore, upd_memsize));
 
         /*
          * Mark the selected update (and potentially the tombstone preceding it) as being destined
@@ -818,8 +859,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
          */
         if (upd_select->upd != NULL)
             F_SET(upd_select->upd, WT_UPDATE_DS);
-        if (tombstone != NULL)
-            F_SET(tombstone, WT_UPDATE_DS);
+        if (upd_select->tombstone != NULL)
+            F_SET(upd_select->tombstone, WT_UPDATE_DS);
         upd_saved = upd_select->upd_saved = true;
     }
 
@@ -834,16 +875,6 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
      * Paranoia: check that we didn't choose an update that has since been rolled back.
      */
     WT_ASSERT(session, upd_select->upd == NULL || upd_select->upd->txnid != WT_TXN_ABORTED);
-    /*
-     * We should never select an update that has been written to the history store except checkpoint
-     * writes the update that is older than a prepared update.
-     */
-    WT_ASSERT(session,
-      upd_select->upd == NULL || !F_ISSET(upd_select->upd, WT_UPDATE_HS) ||
-        (!F_ISSET(r, WT_REC_EVICT) && seen_prepare));
-    WT_ASSERT(session,
-      tombstone == NULL || !F_ISSET(tombstone, WT_UPDATE_HS) ||
-        (!F_ISSET(r, WT_REC_EVICT) && seen_prepare));
 
     /*
      * Returning an update means the original on-page value might be lost, and that's a problem if
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index fad9e8fd3ad..65a7412e03a 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -2554,6 +2554,12 @@ split:
         break;
     }
 
+    /* If the page has post-instantiation delete information, we don't need it any more. */
+    if (mod->instantiated) {
+        mod->instantiated = false;
+        __wt_free(session, mod->page_del);
+    }
+
     return (0);
 }
 
diff --git a/src/third_party/wiredtiger/src/schema/schema_truncate.c b/src/third_party/wiredtiger/src/schema/schema_truncate.c
index 6dbeb264d80..f19eed9df17 100644
--- a/src/third_party/wiredtiger/src/schema/schema_truncate.c
+++ b/src/third_party/wiredtiger/src/schema/schema_truncate.c
@@ -156,7 +156,8 @@ __wt_range_truncate(WT_CURSOR *start, WT_CURSOR *stop)
  *     WT_SESSION::truncate with a range.
  */
 int
-__wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop)
+__wt_schema_range_truncate(
+  WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop, bool *is_col_fix)
 {
     WT_DATA_SOURCE *dsrc;
     WT_DECL_RET;
@@ -169,7 +170,8 @@ __wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR
         if (stop != NULL)
             WT_ERR(__cursor_needkey(stop));
         WT_WITH_BTREE(session, CUR2BT(start),
-          ret = __wt_btcur_range_truncate((WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop));
+          ret = __wt_btcur_range_truncate(
+            (WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop, is_col_fix));
     } else if (WT_PREFIX_MATCH(uri, "table:"))
         ret = __wt_table_range_truncate((WT_CURSOR_TABLE *)start, (WT_CURSOR_TABLE *)stop);
     else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL && dsrc->range_truncate != NULL)
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 979713e0c59..e6d6529393c 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -1358,9 +1358,16 @@ __wt_session_range_truncate(
 {
     WT_DECL_RET;
     int cmp;
-    bool local_start;
+    bool is_col_fix, local_start;
 
-    local_start = false;
+#ifdef HAVE_DIAGNOSTIC
+    WT_CURSOR *debug_start, *debug_stop;
+    WT_ITEM col_value;
+
+    debug_start = debug_stop = NULL;
+#endif
+
+    is_col_fix = local_start = false;
     if (uri != NULL) {
         WT_ASSERT(session, WT_BTREE_PREFIX(uri));
         /*
@@ -1449,7 +1456,43 @@ __wt_session_range_truncate(
             goto done;
     }
 
-    WT_ERR(__wt_schema_range_truncate(session, start, stop));
+    /*
+     * Create a copy of the start and stop cursors to maintain the original start and stop positions
+     * for error-checking purposes.
+     */
+#ifdef HAVE_DIAGNOSTIC
+    if (start != NULL)
+        WT_ERR(__session_open_cursor((WT_SESSION *)session, NULL, start, NULL, &debug_start));
+    if (stop != NULL)
+        WT_ERR(__session_open_cursor((WT_SESSION *)session, NULL, stop, NULL, &debug_stop));
+#endif
+
+    WT_ERR(__wt_schema_range_truncate(session, start, stop, &is_col_fix));
+
+#ifdef HAVE_DIAGNOSTIC
+    /*
+     * The debug cursors will be positioned at the start and stop keys of the range if there is one.
+     * For row-store and variable-length column store, we expect a WT_NOTFOUND value when searching
+     * for a record that has been truncated. For fixed length column store, this works a little
+     * differently. We should instead check that the corresponding value of the truncated record is
+     * zero.
+     */
+    if (!is_col_fix) {
+        if (start != NULL)
+            WT_ASSERT(session, debug_start->search(debug_start) == WT_NOTFOUND);
+        if (stop != NULL)
+            WT_ASSERT(session, debug_stop->search(debug_stop) == WT_NOTFOUND);
+    } else {
+        if (start != NULL) {
+            WT_ERR(debug_start->search(debug_start));
+            WT_ASSERT(session, debug_start->get_value(debug_start, &col_value) == 0);
+        }
+        if (stop != NULL) {
+            WT_ERR(debug_stop->search(debug_stop));
+            WT_ASSERT(session, debug_stop->get_value(debug_stop, &col_value) == 0);
+        }
+    }
+#endif
 
 done:
 err:
@@ -1465,6 +1508,14 @@ err:
         WT_TRET(start->reset(start));
     if (stop != NULL)
         WT_TRET(stop->reset(stop));
+
+#ifdef HAVE_DIAGNOSTIC
+    if (debug_start != NULL)
+        WT_TRET(debug_start->close(debug_start));
+    if (debug_stop != NULL)
+        WT_TRET(debug_stop->close(debug_stop));
+#endif
+
     return (ret);
 }
 
diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c
index 106d4aebb4a..c8f39c28273 100644
--- a/src/third_party/wiredtiger/src/session/session_dhandle.c
+++ b/src/third_party/wiredtiger/src/session/session_dhandle.c
@@ -292,13 +292,16 @@ __session_fetch_checkpoint_meta(WT_SESSION_IMPL *session, const char *ckpt_name,
   WT_CKPT_SNAPSHOT *info_ret, uint64_t *snapshot_time_ret, uint64_t *stable_time_ret,
   uint64_t *oldest_time_ret)
 {
-    WT_DECL_RET;
-    uint64_t *snapshot_txns;
+    /* Get the timestamps. */
+    WT_RET(__wt_meta_read_checkpoint_timestamp(
+      session, ckpt_name, &info_ret->stable_ts, stable_time_ret));
+    WT_RET(
+      __wt_meta_read_checkpoint_oldest(session, ckpt_name, &info_ret->oldest_ts, oldest_time_ret));
 
-    /* Get the snapshot first; it's written last as the checkpoint completes. */
+    /* Get the snapshot. */
     WT_RET(__wt_meta_read_checkpoint_snapshot(session, ckpt_name, &info_ret->snapshot_write_gen,
-      &info_ret->snapshot_min, &info_ret->snapshot_max, &snapshot_txns, &info_ret->snapshot_count,
-      snapshot_time_ret));
+      &info_ret->snapshot_min, &info_ret->snapshot_max, &info_ret->snapshot_txns,
+      &info_ret->snapshot_count, snapshot_time_ret));
 
     /*
      * If we successfully read a null snapshot, set the min and max to WT_TXN_MAX so everything is
@@ -309,22 +312,22 @@ __session_fetch_checkpoint_meta(WT_SESSION_IMPL *session, const char *ckpt_name,
      */
     if (info_ret->snapshot_min == WT_TXN_NONE && info_ret->snapshot_max == WT_TXN_NONE) {
         info_ret->snapshot_min = info_ret->snapshot_max = WT_TXN_MAX;
-        WT_ASSERT(session, snapshot_txns == NULL && info_ret->snapshot_count == 0);
+        WT_ASSERT(session, info_ret->snapshot_txns == NULL && info_ret->snapshot_count == 0);
     }
 
-    /* Get the timestamps. */
-    WT_ERR(__wt_meta_read_checkpoint_timestamp(
-      session, ckpt_name, &info_ret->stable_ts, stable_time_ret));
-    WT_ERR(
-      __wt_meta_read_checkpoint_oldest(session, ckpt_name, &info_ret->oldest_ts, oldest_time_ret));
-
-    /* Wait until we succeed to assign this, to be sure it can't be cleaned up twice. */
-    info_ret->snapshot_txns = snapshot_txns;
     return (0);
+}
 
-err:
-    __wt_free(session, snapshot_txns);
-    return (ret);
+/*
+ * __session_fetch_checkpoint_snapshot_wall_time --
+ *     Like __session_fetch_checkpoint_meta, but retrieves just the wall clock time of the snapshot.
+ */
+static int
+__session_fetch_checkpoint_snapshot_wall_time(
+  WT_SESSION_IMPL *session, const char *ckpt_name, uint64_t *walltime)
+{
+    return (__wt_meta_read_checkpoint_snapshot(
+      session, ckpt_name, NULL, NULL, NULL, NULL, NULL, walltime));
 }
 
 /*
@@ -365,12 +368,12 @@ __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const cha
 {
     WT_CONFIG_ITEM cval;
     WT_DECL_RET;
-    uint64_t ds_time, hs_time, oldest_time, snapshot_time, stable_time;
+    uint64_t ds_time, first_snapshot_time, hs_time, oldest_time, snapshot_time, stable_time;
     int64_t ds_order, hs_order;
     const char *checkpoint, *hs_checkpoint;
     bool is_unnamed_ckpt, must_resolve;
 
-    ds_time = hs_time = oldest_time = snapshot_time = stable_time = 0;
+    ds_time = first_snapshot_time = hs_time = oldest_time = snapshot_time = stable_time = 0;
     ds_order = hs_order = 0;
     checkpoint = NULL;
     hs_checkpoint = NULL;
@@ -434,28 +437,29 @@ __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const cha
      * because unnamed checkpoints are never replaced, but for named checkpoints it's possible for
      * the open to race with regeneration of the checkpoint.)
      *
-     * Because the snapshot and timestamp information is always written by every checkpoint, and is
-     * written last, it always gives the wall clock time of the most recent completed global
-     * checkpoint. If either the data store or history store checkpoint has a newer wall clock time,
-     * it must be from a currently running checkpoint and does not match the snapshot; therefore we
-     * must retry or fail. If both have the same or an older wall clock time, they are from the same
-     * or an older checkpoint and can be presumed to match.
+     * Because the snapshot information is always written by every checkpoint, and is written last,
+     * we use its wall clock time as the reference. This is always the wall clock time of the most
+     * recent completed global checkpoint of the same name, or the most recent completed unnamed
+     * checkpoint, as appropriate. We read this time twice, once at the very beginning and again
+     * along with the snapshot information itself at the end after the other items. If these two
+     * times don't match, a global checkpoint completed while we were reading. In this case we
+     * cannot tell for sure if we read one of the trees' metadata before the checkpoint updated it;
+     * if the tree's wall clock time is older than the snapshot's, it might be because that tree was
+     * skipped, but it might also be because there was an update but we read before the update
+     * happened. Therefore, we need to retry.
      *
-     * A slight complication is that the snapshot and timestamp information is three separate pieces
-     * of metadata; we read the time from all three and if they don't agree, it must be because a
-     * checkpoint is finishing at this very moment, so we retry.
+     * If the two copies of the snapshot time match, we check the other wall clock times against the
+     * snapshot time. If any of the items are newer, they were written by a currently running
+     * checkpoint that hasn't finished yet, and we need to retry.
      *
-     * (It is actually slightly more complicated: either timestamp might not be present, in which
+     * (For the timestamps it is slightly easier; either timestamp might not be present, in which
      * case both the timestamp and its associated time will read back as zero. We take advantage of
      * the knowledge that for both these timestamps the system cannot transition from a state with
      * the timestamp set to one where it is not, and therefore once any checkpoint includes either
-     * timestamp, every subsequent checkpoint will too. Since the snapshot is written after both
-     * timestamps, we read it first. Then for each timestamp, if we read it and find it present, it
-     * must be from the same checkpoint as the snapshot or the next. If it isn't present, its
-     * absence might technically be associated with the next checkpoint, but if so it cannot have
-     * been present in the snapshot's checkpoint either and we are ok to proceed. So we retry if
-     * either timestamp's wall time is newer than the snapshot's. Then, to partially crosscheck this
-     * logic we assert that the wall time is either the same as the snapshot's or zero.)
+     * timestamp, every subsequent checkpoint will too. Therefore, the timestamps' wall times should
+     * either match the snapshot or be zero; and if they're zero, it doesn't matter if they were
+     * actually zero in a newer, currently running checkpoint, because then they must have always
+     * been zero.)
      *
      * This scheme relies on the fact we take steps to make sure that the checkpoint wall clock time
      * does not run backward, and that successive checkpoints are never given the same wall clock
@@ -482,7 +486,10 @@ __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const cha
         /* We're opening the history store directly, so don't open it twice. */
         hs_dhandlep = NULL;
 
-    /* Test for the internal checkpoint name (WiredTigerCheckpoint). */
+    /*
+     * Test for the internal checkpoint name (WiredTigerCheckpoint). Note: must_resolve is true in a
+     * subset of the cases where is_unnamed_ckpt is true.
+     */
     must_resolve = WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len);
     is_unnamed_ckpt = cval.len >= strlen(WT_CHECKPOINT) && WT_PREFIX_MATCH(cval.str, WT_CHECKPOINT);
 
@@ -490,20 +497,28 @@ __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const cha
     do {
         ret = 0;
 
-        if (ckpt_snapshot != NULL)
+        if (!must_resolve)
+            /* Copy the checkpoint name first because we may need it to get the first wall time. */
+            WT_RET(__wt_strndup(session, cval.str, cval.len, &checkpoint));
+
+        if (ckpt_snapshot != NULL) {
             /* We're about to re-fetch this; discard the prior version. No effect the first time. */
             __wt_free(session, ckpt_snapshot->snapshot_txns);
 
-        /* Look up the data store checkpoint. */
+            /*
+             * Now, as the first step of the retrieval process, get the wall-clock time of the
+             * snapshot metadata (only). If we need the name, we'll have copied it already.
+             */
+            WT_RET(__session_fetch_checkpoint_snapshot_wall_time(
+              session, is_unnamed_ckpt ? NULL : checkpoint, &first_snapshot_time));
+        }
+
         if (must_resolve)
+            /* Look up the most recent data store checkpoint. This fetches the exact name to use. */
             WT_RET(__wt_meta_checkpoint_last_name(session, uri, &checkpoint, &ds_order, &ds_time));
-        else {
-            /* Copy the checkpoint name. */
-            WT_RET(__wt_strndup(session, cval.str, cval.len, &checkpoint));
-
-            /* Look up the checkpoint and get its time and order information. */
+        else
+            /* Look up the checkpoint by name and get its time and order information. */
             WT_RET(__wt_meta_checkpoint_by_name(session, uri, checkpoint, &ds_order, &ds_time));
-        }
 
         /* Look up the history store checkpoint. */
         if (hs_dhandlep != NULL) {
@@ -532,10 +547,9 @@ __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const cha
             /*
              * Check if we raced with a running checkpoint.
              *
-             * If either timestamp metadata time is newer than the snapshot, we read in the middle
-             * of that material being updated and we need to retry. If that didn't happen, then
-             * check if either the data store or history store checkpoint time is newer than the
-             * metadata time. In either case we need to retry.
+             * If the two copies of the snapshot don't match, or if any of the other metadata items'
+             * time is newer than the snapshot, we read in the middle of that material being updated
+             * and we need to retry.
              *
              * Otherwise we have successfully gotten a matching set, as described above.
              *
@@ -548,8 +562,8 @@ __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const cha
              * forever.
              */
 
-            if (ds_time > snapshot_time || hs_time > snapshot_time || stable_time > snapshot_time ||
-              oldest_time > snapshot_time)
+            if (first_snapshot_time != snapshot_time || ds_time > snapshot_time ||
+              hs_time > snapshot_time || stable_time > snapshot_time || oldest_time > snapshot_time)
                 ret = __wt_set_return(session, EBUSY);
             else {
                 /* Crosscheck that we didn't somehow get an older timestamp. */
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 6e53950cf56..eb64c3d9237 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -1430,6 +1430,7 @@ static const char *const __stats_connection_desc[] = {
   "cursor: cursor sweep cursors examined",
   "cursor: cursor sweeps",
   "cursor: cursor truncate calls",
+  "cursor: cursor truncates performed on individual keys",
   "cursor: cursor update calls",
   "cursor: cursor update calls that return an error",
   "cursor: cursor update key and value bytes",
@@ -2015,6 +2016,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
     stats->cursor_sweep_examined = 0;
     stats->cursor_sweep = 0;
     stats->cursor_truncate = 0;
+    stats->cursor_truncate_keys_deleted = 0;
     stats->cursor_update = 0;
     stats->cursor_update_error = 0;
     stats->cursor_update_bytes = 0;
@@ -2604,6 +2606,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
     to->cursor_sweep_examined += WT_STAT_READ(from, cursor_sweep_examined);
     to->cursor_sweep += WT_STAT_READ(from, cursor_sweep);
     to->cursor_truncate += WT_STAT_READ(from, cursor_truncate);
+    to->cursor_truncate_keys_deleted += WT_STAT_READ(from, cursor_truncate_keys_deleted);
     to->cursor_update += WT_STAT_READ(from, cursor_update);
     to->cursor_update_error += WT_STAT_READ(from, cursor_update_error);
     to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes);
diff --git a/src/third_party/wiredtiger/src/tiered/tiered_handle.c b/src/third_party/wiredtiger/src/tiered/tiered_handle.c
index 35db12d817a..216b40e2eb7 100644
--- a/src/third_party/wiredtiger/src/tiered/tiered_handle.c
+++ b/src/third_party/wiredtiger/src/tiered/tiered_handle.c
@@ -215,10 +215,13 @@ __tiered_create_local(WT_SESSION_IMPL *session, WT_TIERED *tiered)
     WT_ERR(__wt_scr_alloc(session, 1024, &build));
     __wt_config_init(session, &cparser, config);
     while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
-        if (!WT_STRING_MATCH("checkpoint", ck.str, ck.len))
+        if (!WT_STRING_MATCH("checkpoint", ck.str, ck.len)) {
+            /* Preserve any quotation marks during the copy. */
+            WT_CONFIG_PRESERVE_QUOTES(session, &cv);
             /* Append the entry to the new buffer. */
             WT_ERR(__wt_buf_catfmt(
               session, build, "%.*s=%.*s,", (int)ck.len, ck.str, (int)cv.len, cv.str));
+        }
     }
     WT_ERR_NOTFOUND_OK(ret, false);
     __wt_free(session, config);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index a9379b44b1b..48b58b20c71 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -1640,6 +1640,15 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
     for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
         if (op->type == WT_TXN_OP_REF_DELETE) {
             WT_REF_LOCK(session, op->u.ref, &previous_state);
+
+            /*
+             * Only two cases are possible. First: the state is WT_REF_DELETED. In this case
+             * ft_info.del cannot be NULL yet because an uncommitted operation cannot have reached
+             * global visibility. Otherwise: there is an uncommitted delete operation we're
+             * handling, so the page can't be in a non-deleted state, and the tree can't be
+             * readonly. Therefore the page must have been instantiated, the state must be
+             * WT_REF_MEM, and there should be an update list in ft_info.update.
+             */
             if (previous_state == WT_REF_DELETED)
                 op->u.ref->ft_info.del->committed = true;
             else
@@ -1852,7 +1861,7 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
                 }
             break;
         case WT_TXN_OP_REF_DELETE:
-            __wt_txn_op_apply_prepare_state(session, op->u.ref, false);
+            __wt_txn_op_delete_apply_prepare_state(session, op->u.ref, false);
             break;
         case WT_TXN_OP_TRUNCATE_COL:
         case WT_TXN_OP_TRUNCATE_ROW:
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 418588b9eaa..775332a5568 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -997,7 +997,7 @@ done:
      * connection level base write generation number is updated at the end of the recovery
      * checkpoint.
      */
-    __wt_dhandle_update_write_gens(session);
+    WT_ERR(__wt_dhandle_update_write_gens(session));
 
     /*
      * If we're downgrading and have newer log files, force log removal, no matter what the remove
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index ee71daae59c..f1db05d68a2 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -1240,16 +1240,21 @@ __rollback_to_stable_page_skip(
     *skipp = false; /* Default to reading */
 
     /*
-     * Skip fast-truncate operations durable at or before the RTS timestamp (reading the page will
-     * delete it). A page without fast-truncate timestamp information is an old format page: skip
-     * them as there's no way to get correct behavior, and skipping them matches historic behavior.
+     * Skip pages truncated at or before the RTS timestamp. (We could read the page, but that would
+     * unnecessarily instantiate it). If the page has no fast-delete information, that means either
+     * it was discarded because the delete is globally visible, or the internal page holding the
+     * cell was an old format page so none was loaded. In the latter case we should skip the page as
+     * there's no way to get correct behavior and skipping matches the historic behavior. Note that
+     * eviction is running; we must lock the WT_REF before examining the fast-delete information.
      */
-    if (ref->state == WT_REF_DELETED) {
+    if (ref->state == WT_REF_DELETED &&
+      WT_REF_CAS_STATE(session, ref, WT_REF_DELETED, WT_REF_LOCKED)) {
         page_del = ref->ft_info.del;
         if (page_del == NULL ||
           (__rollback_txn_visible_id(session, page_del->txnid) &&
             page_del->durable_timestamp <= rollback_timestamp))
             *skipp = true;
+        WT_REF_SET_STATE(ref, WT_REF_DELETED);
         return (0);
     }
 
@@ -1257,7 +1262,12 @@ __rollback_to_stable_page_skip(
     if (ref->state != WT_REF_DISK)
         return (0);
 
-    /* Check whether this on-disk page has any updates to be aborted. */
+    /*
+     * Check whether this on-disk page has any updates to be aborted. We are not holding a hazard
+     * reference on the page and so we rely on there being no other threads of control in the tree,
+     * that is, eviction ignores WT_REF_DISK pages and no other thread is reading pages, this page
+     * cannot change state from on-disk to something else.
+     */
     if (!__rollback_page_needs_abort(session, ref, rollback_timestamp)) {
         *skipp = true;
         __wt_verbose_multi(
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/burst_inserts_stress.txt b/src/third_party/wiredtiger/test/cppsuite/configs/burst_inserts_stress.txt
index 9bdfd13fff2..1fbd288e2f0 100644
--- a/src/third_party/wiredtiger/test/cppsuite/configs/burst_inserts_stress.txt
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/burst_inserts_stress.txt
@@ -1,31 +1,44 @@
-# Used as a stress test for the framework.
+# Burst inserts is attempting to replicate conditions seen in WT-7798.
+# This workload is a highly stressful insert and read workload that performs insertions rapidly for
+# burst_duration and then sleeps the op_rate defined in the insert config.
+# It intentionally performs relatively infrequent checkpoints.
 duration_seconds=14400,
-cache_size_mb=2000,
+burst_duration=90,
+cache_size_mb=2048,
+# Compression is required otherwise the disk of the host fills up.
+compression_enabled=true,
 timestamp_manager=
 (
     enabled=true,
-    oldest_lag=10,
-    stable_lag=10
+    oldest_lag=30,
+    stable_lag=30
 ),
 workload_manager=
 (
     populate_config=
     (
-        collection_count=160,
-        key_count_per_collection=500,
-        key_size=200,
+        collection_count=1000,
+        key_count_per_collection=1,
+        key_size=50,
         thread_count=40,
-        value_size=20000
+        value_size=10
     ),
     insert_config=
     (
-        key_size=200,
+        key_size=50,
         op_rate=10s,
-        ops_per_transaction=(max=2000,min=1000),
-        thread_count=40,
-        value_size=20000
+        # Any more than this and our insert transactions get rolled back.
+        ops_per_transaction=(max=30,min=0),
+        thread_count=100,
+        value_size=1000000
+    ),
+    checkpoint_config=
+    (
+        op_rate=120s,
     )
 ),
+# Operation tracker is disabled as it would create a very large table that cannot be cleaned up
+# as this test only inserts keys.
 operation_tracker=
 (
     enabled=false,
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/cache_resize_default.txt b/src/third_party/wiredtiger/test/cppsuite/configs/cache_resize_default.txt
index 9b4093238e5..ceadb982b2c 100644
--- a/src/third_party/wiredtiger/test/cppsuite/configs/cache_resize_default.txt
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/cache_resize_default.txt
@@ -22,7 +22,7 @@ workload_manager=
     insert_config=
     (
         key_size=1000000,
-        op_rate=4s,
+        op_rate=3s,
         ops_per_transaction=(min=2,max=2),
         thread_count=5,
     ),
@@ -37,5 +37,5 @@ operation_tracker=
     # Timestamp, transaction id,
     tracking_key_format=QQ,
     # Operation type, cache size
-    tracking_value_format=iS
+    tracking_value_format=iQ
 )
 \ No newline at end of file
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/cursor_bound_01_default.txt b/src/third_party/wiredtiger/test/cppsuite/configs/cursor_bound_01_default.txt
index 46570f2713e..30f72acf0af 100644
--- a/src/third_party/wiredtiger/test/cppsuite/configs/cursor_bound_01_default.txt
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/cursor_bound_01_default.txt
@@ -28,28 +28,28 @@ workload_manager=
     (
         key_size=10,
         op_rate=10ms,
-        thread_count=5,
+        thread_count=0,
         value_size=20
     ),
     read_config=
     (
         op_rate=3ms,
-        thread_count=10
+        thread_count=0
     ),
     remove_config=
     (
         op_rate=500ms,
-        thread_count=1
+        thread_count=0
     ),
     update_config=
     (
         op_rate=15ms,
-        thread_count=10,
+        thread_count=0,
         value_size=20
     ),
     custom_config=
     (
-        thread_count=5
+        thread_count=0
     )
 ),
 operation_tracker=
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt b/src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt
index 1c281906be4..db281368160 100644
--- a/src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt
@@ -1,8 +1,9 @@
-# Run for half an hour.
-duration_seconds=1800,
+# Run for an hour.
+duration_seconds=3600,
 # The configuration can produce a large number of updates at once, therefore use 500MB cache size 
 # to hold these values.
-cache_size_mb=500,
+# 1.5 GB
+cache_size_mb=1536,
 compression_enabled=true,
 statistics_config=
 (
@@ -11,40 +12,31 @@ statistics_config=
 ),
 metrics_monitor=
 (
-    # Seems to insert around 477K records. Give it +-20K margin.
     cache_hs_insert=
     (
-        max=497000,
-        min=457000,
-        postrun=false,
         save=true,
     ),
-    # Seems to remove 160K records. Give it a similar margin.
     cc_pages_removed=
     (
-        max=170000,
-        min=150000,
-        postrun=false,
         save=true,
     ),
     stat_cache_size=
     (
-        max=110,
+        # FIXME-WT-9339
+        # one day maybe we'll stop blowing out the cache.
+        max=200,
         runtime=true,
     ),
-    # The data files compress to around 25MB per table at the end of a run so 250MB total.
-    # +1.4GB for the history store. With an additional 150MB margin.
     stat_db_size=
     (
-        max=1900000000,
         save=true,
     ),
 ),
 timestamp_manager=
 (
     enabled=true,
-    oldest_lag=2,
-    stable_lag=5
+    oldest_lag=1,
+    stable_lag=10
 ),
 workload_manager=
 (
@@ -54,30 +46,29 @@ workload_manager=
     ),
     populate_config=
     (
-        collection_count=10,
+        collection_count=100,
         key_count_per_collection=1000,
-        key_size=5,
-        thread_count=10,
-        value_size=100000
+        key_size=50,
+        thread_count=100,
+        value_size=1000000
     ),
     read_config=
     (
         op_rate=5ms,
-        ops_per_transaction=(max=100,min=1),
-        thread_count=8
+        ops_per_transaction=(max=1000,min=1),
+        thread_count=20
     ),
     update_config=
     (
-        op_rate=10ms,
+        op_rate=1ms,
         # Be careful to not aim too high with this config, if we fill the dirty cache and
         # all threads are trying to update, they'll get pulled into eviction and will get stuck.
-        ops_per_transaction=(max=20,min=0),
-        thread_count=10,
-        value_size=100000
+        ops_per_transaction=(max=10,min=0),
+        thread_count=100,
+        value_size=1000000
     )
 ),
 operation_tracker=
 (
-    enabled=true,
-    op_rate=20s
+    enabled=false,
 )
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/operations_test_stress.txt b/src/third_party/wiredtiger/test/cppsuite/configs/operations_test_stress.txt
index 5d4b900dd17..f320cefe0b6 100644
--- a/src/third_party/wiredtiger/test/cppsuite/configs/operations_test_stress.txt
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/operations_test_stress.txt
@@ -1,14 +1,11 @@
 # Used as a stress test for the framework.
-duration_seconds=1500,
-cache_size_mb=1000,
+duration_seconds=600,
+cache_size_mb=2000,
 compression_enabled=true,
 metrics_monitor=
 (
     stat_db_size=
     (
-        #At the end of the run the data files are approximately 2.3MB each. Which is a total of:
-        #1.15GB, the history store isn't significant. Give the workload an extra 200MB of margin.
-        max=1350000000,
         # FIXME-WT-8886 - This check has been disabled to remove noisy failures in evergreen and
         # will be properly corrected in WT-8886.
         runtime=false,
@@ -18,58 +15,56 @@ metrics_monitor=
 timestamp_manager=
 (
     enabled=true,
-    oldest_lag=5,
+    oldest_lag=30,
     op_rate=1s,
-    stable_lag=10
+    stable_lag=15
 ),
 workload_manager=
 (
     checkpoint_config=
     (
-        op_rate=30s,
+        op_rate=120s,
     ),
     populate_config=
     (
-        collection_count=500,
-        #5GB of data
-        key_count_per_collection=1000,
+        collection_count=200,
+        #200GB of data
+        key_count_per_collection=10000,
         key_size=100,
-        thread_count=20,
-        value_size=10000
+        thread_count=200,
+        value_size=100000
     ),
     insert_config=
     (
         key_size=100,
         op_rate=10ms,
-        ops_per_transaction=(max=20,min=10),
-        thread_count=25,
-        value_size=10000
+        ops_per_transaction=(max=20,min=0),
+        thread_count=40,
+        value_size=1000000
     ),
     read_config=
     (
-        op_rate=3ms,
-        ops_per_transaction=(max=100,min=50),
-        thread_count=20
+        op_rate=1ms,
+        ops_per_transaction=(max=1000,min=50),
+        thread_count=40
     ),
     remove_config=
     (
-        op_rate=100ms,
-        ops_per_transaction=(max=100,min=50),
-        thread_count=5
+        op_rate=1ms,
+        ops_per_transaction=(max=50,min=0),
+        thread_count=20
     ),
     update_config=
     (
         op_rate=10ms,
-        ops_per_transaction=(max=10,min=5),
-        thread_count=25,
-        value_size=10000
+        ops_per_transaction=(max=10,min=0),
+        thread_count=40,
+        value_size=1000000
     )
 ),
 operation_tracker=
 (
-    # FIXME-WT-8640 Enable the tracker again once we have moved to a different host or modified the
-    # workload.
-    enabled=false,
+    enabled=true,
     # Run after checkpoint most of the time.
     op_rate=40s
 )
diff --git a/src/third_party/wiredtiger/test/cppsuite/src/component/metrics_writer.cpp b/src/third_party/wiredtiger/test/cppsuite/src/component/metrics_writer.cpp
index 5c632ab169d..f8c8bb2e242 100644
--- a/src/third_party/wiredtiger/test/cppsuite/src/component/metrics_writer.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/src/component/metrics_writer.cpp
@@ -39,11 +39,9 @@ metrics_writer::add_stat(const std::string &stat_string)
 void
 metrics_writer::output_perf_file(const std::string &test_name)
 {
-    std::ofstream perf_file;
+    std::ofstream perf_file(test_name + ".json");
     std::string stat_info = "[{\"info\":{\"test_name\": \"" + test_name + "\"},\"metrics\": [";
 
-    perf_file.open(test_name + ".json");
-
     for (const auto &stat : _stats)
         stat_info += stat + ",";
 
diff --git a/src/third_party/wiredtiger/test/cppsuite/src/component/operation_tracker.cpp b/src/third_party/wiredtiger/test/cppsuite/src/component/operation_tracker.cpp
index aa997875dab..ade74bf7b1c 100644
--- a/src/third_party/wiredtiger/test/cppsuite/src/component/operation_tracker.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/src/component/operation_tracker.cpp
@@ -193,7 +193,7 @@ operation_tracker::save_schema_operation(
 }
 
 int
-operation_tracker::save_operation(const uint64_t txn_id, const tracking_operation &operation,
+operation_tracker::save_operation(WT_SESSION *session, const tracking_operation &operation,
   const uint64_t &collection_id, const std::string &key, const std::string &value,
   wt_timestamp_t ts, scoped_cursor &op_track_cursor)
 {
@@ -210,15 +210,15 @@ operation_tracker::save_operation(const uint64_t txn_id, const tracking_operatio
           "save_operation: invalid operation " + std::to_string(static_cast<int>(operation));
         testutil_die(EINVAL, error_message.c_str());
     } else {
-        set_tracking_cursor(txn_id, operation, collection_id, key, value, ts, op_track_cursor);
+        set_tracking_cursor(session, operation, collection_id, key, value, ts, op_track_cursor);
         ret = op_track_cursor->insert(op_track_cursor.get());
     }
     return (ret);
 }
 
-/* Note that the transaction id is not used in the default implementation of the tracking table. */
+/* Note that session is not used in the default implementation of the tracking table. */
 void
-operation_tracker::set_tracking_cursor(const uint64_t txn_id, const tracking_operation &operation,
+operation_tracker::set_tracking_cursor(WT_SESSION *session, const tracking_operation &operation,
   const uint64_t &collection_id, const std::string &key, const std::string &value,
   wt_timestamp_t ts, scoped_cursor &op_track_cursor)
 {
diff --git a/src/third_party/wiredtiger/test/cppsuite/src/component/operation_tracker.h b/src/third_party/wiredtiger/test/cppsuite/src/component/operation_tracker.h
index 07e1a711a03..c4331501fe1 100644
--- a/src/third_party/wiredtiger/test/cppsuite/src/component/operation_tracker.h
+++ b/src/third_party/wiredtiger/test/cppsuite/src/component/operation_tracker.h
@@ -75,11 +75,11 @@ class operation_tracker : public component {
     void save_schema_operation(
       const tracking_operation &operation, const uint64_t &collection_id, wt_timestamp_t ts);
 
-    virtual void set_tracking_cursor(const uint64_t txn_id, const tracking_operation &operation,
+    virtual void set_tracking_cursor(WT_SESSION *session, const tracking_operation &operation,
       const uint64_t &collection_id, const std::string &key, const std::string &value,
       wt_timestamp_t ts, scoped_cursor &op_track_cursor);
 
-    int save_operation(const uint64_t txn_id, const tracking_operation &operation,
+    int save_operation(WT_SESSION *session, const tracking_operation &operation,
       const uint64_t &collection_id, const std::string &key, const std::string &value,
       wt_timestamp_t ts, scoped_cursor &op_track_cursor);
 
diff --git a/src/third_party/wiredtiger/test/cppsuite/src/main/test.cpp b/src/third_party/wiredtiger/test/cppsuite/src/main/test.cpp
index e00f676f17d..526486b9d1d 100644
--- a/src/third_party/wiredtiger/test/cppsuite/src/main/test.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/src/main/test.cpp
@@ -94,9 +94,8 @@ test::run()
     bool enable_logging, statistics_logging;
     configuration *statistics_config;
     std::string statistics_type;
-    /* Build the database creation config string. */
-    std::string db_create_config = CONNECTION_CREATE;
-
+    /* Build the database creation config string. Allow for a maximum 1024 sessions. */
+    std::string db_create_config = CONNECTION_CREATE + ",session_max=1024";
     /* Enable snappy compression or reverse collator if required. */
     if (_config->get_bool(COMPRESSION_ENABLED) || _config->get_bool(REVERSE_COLLATOR)) {
         db_create_config += ",extensions=[";
diff --git a/src/third_party/wiredtiger/test/cppsuite/src/main/thread_worker.cpp b/src/third_party/wiredtiger/test/cppsuite/src/main/thread_worker.cpp
index cacaa0f509a..5cf57d73944 100644
--- a/src/third_party/wiredtiger/test/cppsuite/src/main/thread_worker.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/src/main/thread_worker.cpp
@@ -120,9 +120,8 @@ thread_worker::update(
             testutil_die(ret, "unhandled error while trying to update a key");
     }
 
-    uint64_t txn_id = ((WT_SESSION_IMPL *)session.get())->txn->id;
     ret = op_tracker->save_operation(
-      txn_id, tracking_operation::INSERT, collection_id, key, value, ts, op_track_cursor);
+      session.get(), tracking_operation::INSERT, collection_id, key, value, ts, op_track_cursor);
 
     if (ret == 0)
         txn.add_op();
@@ -162,9 +161,8 @@ thread_worker::insert(
             testutil_die(ret, "unhandled error while trying to insert a key");
     }
 
-    uint64_t txn_id = ((WT_SESSION_IMPL *)session.get())->txn->id;
     ret = op_tracker->save_operation(
-      txn_id, tracking_operation::INSERT, collection_id, key, value, ts, op_track_cursor);
+      session.get(), tracking_operation::INSERT, collection_id, key, value, ts, op_track_cursor);
 
     if (ret == 0)
         txn.add_op();
@@ -200,9 +198,8 @@ thread_worker::remove(scoped_cursor &cursor, uint64_t collection_id, const std::
             testutil_die(ret, "unhandled error while trying to remove a key");
     }
 
-    uint64_t txn_id = ((WT_SESSION_IMPL *)session.get())->txn->id;
     ret = op_tracker->save_operation(
-      txn_id, tracking_operation::DELETE_KEY, collection_id, key, "", ts, op_track_cursor);
+      session.get(), tracking_operation::DELETE_KEY, collection_id, key, "", ts, op_track_cursor);
 
     if (ret == 0)
         txn.add_op();
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/bounded_cursor_perf.cpp b/src/third_party/wiredtiger/test/cppsuite/tests/bounded_cursor_perf.cpp
index 1dda1cc5b78..07a48267633 100644
--- a/src/third_party/wiredtiger/test/cppsuite/tests/bounded_cursor_perf.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/bounded_cursor_perf.cpp
@@ -63,7 +63,6 @@ class bounded_cursor_perf : public test {
          * Each read operation performs next() and prev() calls with both normal cursors and bounded
          * cursors.
          */
-        int range_ret_next, range_ret_prev, ret_next, ret_prev;
 
         /* Initialize the different timers for each function. */
         execution_timer bounded_next("bounded_next", test::_args.test_name);
@@ -89,14 +88,15 @@ class bounded_cursor_perf : public test {
         set_bounds(prev_range_cursor);
 
         while (tc->running()) {
+            int ret_next = 0, ret_prev = 0;
             while (ret_next != WT_NOTFOUND && ret_prev != WT_NOTFOUND && tc->running()) {
-                range_ret_next = bounded_next.track([&next_range_cursor]() -> int {
+                auto range_ret_next = bounded_next.track([&next_range_cursor]() -> int {
                     return next_range_cursor->next(next_range_cursor.get());
                 });
                 ret_next = default_next.track(
                   [&next_cursor]() -> int { return next_cursor->next(next_cursor.get()); });
 
-                range_ret_prev = bounded_prev.track([&prev_range_cursor]() -> int {
+                auto range_ret_prev = bounded_prev.track([&prev_range_cursor]() -> int {
                     return prev_range_cursor->prev(prev_range_cursor.get());
                 });
                 ret_prev = default_prev.track(
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/burst_inserts.cpp b/src/third_party/wiredtiger/test/cppsuite/tests/burst_inserts.cpp
index 57da2344b0f..422982f2eec 100644
--- a/src/third_party/wiredtiger/test/cppsuite/tests/burst_inserts.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/burst_inserts.cpp
@@ -96,9 +96,6 @@ class burst_inserts : public test {
                 std::chrono::seconds(_burst_duration)) {
                 tc->txn.try_begin();
                 auto key = tc->pad_string(std::to_string(start_key + added_count), tc->key_size);
-                cc.write_cursor->set_key(cc.write_cursor.get(), key.c_str());
-                testutil_assert(cc.write_cursor->search(cc.write_cursor.get()) == WT_NOTFOUND);
-
                 /* A return value of true implies the insert was successful. */
                 auto value =
                   random_generator::instance().generate_pseudo_random_string(tc->value_size);
@@ -130,9 +127,6 @@ class burst_inserts : public test {
                     }
                     added_count = 0;
                 }
-
-                /* Sleep as currently this loop is too fast. */
-                std::this_thread::sleep_for(std::chrono::milliseconds(10));
             }
             /* Close out our current txn. */
             if (tc->txn.active()) {
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/cache_resize.cpp b/src/third_party/wiredtiger/test/cppsuite/tests/cache_resize.cpp
index 5aa8d76ad62..68744f554f4 100644
--- a/src/third_party/wiredtiger/test/cppsuite/tests/cache_resize.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/cache_resize.cpp
@@ -27,6 +27,7 @@
  */
 
 #include "src/common/constants.h"
+#include "src/common/logger.h"
 #include "src/common/random_generator.h"
 #include "src/component/operation_tracker.h"
 #include "src/main/test.h"
@@ -44,12 +45,21 @@ class operation_tracker_cache_resize : public operation_tracker {
     }
 
     void
-    set_tracking_cursor(const uint64_t txn_id, const tracking_operation &operation,
-      const uint64_t &, const std::string &, const std::string &value, wt_timestamp_t ts,
+    set_tracking_cursor(WT_SESSION *session, const tracking_operation &operation, const uint64_t &,
+      const std::string &, const std::string &value, wt_timestamp_t ts,
       scoped_cursor &op_track_cursor) override final
     {
+        uint64_t txn_id = ((WT_SESSION_IMPL *)session)->txn->id;
+        /*
+         * The cache_size may have been changed between the time we make an insert to the DB and
+         * when we write the details to the tracking table, as such we can't take cache_size from
+         * the connection. Instead, write the cache size as part of the atomic insert into the DB
+         * and when populating the tracking table take it from there.
+         */
+        uint64_t cache_size = std::stoull(value);
+
         op_track_cursor->set_key(op_track_cursor.get(), ts, txn_id);
-        op_track_cursor->set_value(op_track_cursor.get(), operation, value.c_str());
+        op_track_cursor->set_value(op_track_cursor.get(), operation, cache_size);
     }
 };
 
@@ -102,12 +112,9 @@ class cache_resize : public test {
             const std::string key;
             const std::string value = std::to_string(new_cache_size);
 
-            /* Retrieve the current transaction id. */
-            uint64_t txn_id = ((WT_SESSION_IMPL *)tc->session.get())->txn->id;
-
             /* Save the change of cache size in the tracking table. */
             tc->txn.begin();
-            int ret = tc->op_tracker->save_operation(txn_id, tracking_operation::CUSTOM,
+            int ret = tc->op_tracker->save_operation(tc->session.get(), tracking_operation::CUSTOM,
               collection_id, key, value, tc->tsm->get_next_ts(), tc->op_track_cursor);
 
             if (ret == 0)
@@ -140,7 +147,6 @@ class cache_resize : public test {
               random_generator::instance().generate_pseudo_random_string(tc->key_size);
             const uint64_t cache_size =
               ((WT_CONNECTION_IMPL *)connection_manager::instance().get_connection())->cache_size;
-            /* Take into account the value size given in the test configuration file. */
             const std::string value = std::to_string(cache_size);
 
             tc->txn.try_begin();
@@ -165,7 +171,7 @@ class cache_resize : public test {
     validate(const std::string &operation_table_name, const std::string &,
       const std::vector<uint64_t> &) override final
     {
-        bool first_record = false;
+        bool first_record = true;
         int ret;
         uint64_t cache_size, num_records = 0, prev_txn_id;
         const uint64_t cache_size_500mb = 500000000;
@@ -188,7 +194,7 @@ class cache_resize : public test {
 
             uint64_t tracked_ts, tracked_txn_id;
             int tracked_op_type;
-            const char *tracked_cache_size;
+            uint64_t tracked_cache_size;
 
             testutil_check(cursor->get_key(cursor.get(), &tracked_ts, &tracked_txn_id));
             testutil_check(cursor->get_value(cursor.get(), &tracked_op_type, &tracked_cache_size));
@@ -196,7 +202,7 @@ class cache_resize : public test {
             logger::log_msg(LOG_TRACE,
               "Timestamp: " + std::to_string(tracked_ts) +
                 ", transaction id: " + std::to_string(tracked_txn_id) +
-                ", cache size: " + std::to_string(std::stoull(tracked_cache_size)));
+                ", cache size: " + std::to_string(tracked_cache_size));
 
             tracking_operation op_type = static_cast<tracking_operation>(tracked_op_type);
             /* There are only two types of operation tracked. */
@@ -223,8 +229,11 @@ class cache_resize : public test {
                  */
             }
             prev_txn_id = tracked_txn_id;
-            /* Save the last cache size seen by the transaction. */
-            cache_size = std::stoull(tracked_cache_size);
+            /*
+             * FIXME-WT-9339 - Save the last cache size seen by the transaction.
+             *
+             * cache_size = tracked_cache_size;
+             */
             ++num_records;
         }
         /* All records have been parsed, the last one still needs the be checked. */
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/cursor_bound_01.cpp b/src/third_party/wiredtiger/test/cppsuite/tests/cursor_bound_01.cpp
index 5482c4f899b..27eb855f10f 100644
--- a/src/third_party/wiredtiger/test/cppsuite/tests/cursor_bound_01.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/cursor_bound_01.cpp
@@ -49,19 +49,18 @@ class cursor_bound_01 : public test {
     /* Class helper to represent the lower and uppers bounds for the range cursor. */
     class bound {
         public:
-        bound() = default;
-        bound(uint64_t key_size_max, bool lower_bound, std::string key)
-            : _lower_bound(lower_bound), _key(key)
+        bound()
         {
-            bool set_inclusive = random_generator::instance().generate_integer(0, 1);
-            // FIXME: Use random strings, once bounds are implemented properly.
-            // auto key_size =
-            //   random_generator::instance().generate_integer(static_cast<uint64_t>(1),
-            //   key_size_max);
-            // auto random_key = random_generator::instance().generate_random_string(
-            //   key_size, characters_type::ALPHABET);
-            // _key = random_key;
-            _inclusive = set_inclusive;
+            clear();
+        };
+
+        bound(uint64_t key_size_max, bool lower_bound) : _lower_bound(lower_bound)
+        {
+            auto key_size =
+              random_generator::instance().generate_integer(static_cast<uint64_t>(1), key_size_max);
+            _key = random_generator::instance().generate_random_string(
+              key_size, characters_type::ALPHABET);
+            _inclusive = random_generator::instance().generate_integer(0, 1);
         }
 
         std::string
@@ -71,7 +70,7 @@ class cursor_bound_01 : public test {
               ",inclusive=" + std::string(_inclusive ? "true" : "false");
         }
 
-        std::string
+        const std::string &
         get_key() const
         {
             return _key;
@@ -83,6 +82,14 @@ class cursor_bound_01 : public test {
             return _inclusive;
         }
 
+        void
+        clear()
+        {
+            _key.clear();
+            _inclusive = false;
+            _lower_bound = false;
+        }
+
         private:
         std::string _key;
         bool _inclusive;
@@ -165,10 +172,27 @@ class cursor_bound_01 : public test {
 
         if (normal_ret == WT_NOTFOUND)
             return;
+
+        const char *normal_key;
+        testutil_check(normal_cursor->get_key(normal_cursor.get(), &normal_key));
+        /*
+         * It is possible that there are no keys within the range. Therefore make sure that normal
+         * cursor returns a key that is outside of the range.
+         */
+        if (range_ret == WT_NOTFOUND) {
+            if (next) {
+                testutil_assert(!upper_key.empty());
+                testutil_assert(!custom_lexicographical_compare(normal_key, upper_key, true));
+            } else {
+                testutil_assert(!lower_key.empty());
+                testutil_assert(custom_lexicographical_compare(normal_key, lower_key, false));
+            }
+            return;
+        }
         testutil_assert(range_ret == 0 && normal_ret == 0);
 
         /* Retrieve the key the cursor is pointing at. */
-        const char *normal_key, *range_key;
+        const char *range_key;
         testutil_check(normal_cursor->get_key(normal_cursor.get(), &normal_key));
         testutil_check(range_cursor->get_key(range_cursor.get(), &range_key));
         testutil_assert(std::string(normal_key).compare(range_key) == 0);
@@ -229,29 +253,40 @@ class cursor_bound_01 : public test {
             testutil_check(range_cursor->bound(range_cursor.get(), "action=clear"));
 
         if (set_random_bounds == LOWER_BOUND_SET || set_random_bounds == ALL_BOUNDS_SET) {
-            /* Reverse case. */
-            if (_reverse_collator_enabled)
-                lower_bound = bound(tc->key_size, true, std::string(tc->key_size, 'z'));
-            /* Normal case. */
-            else
-                lower_bound = bound(tc->key_size, true, "0");
+            lower_bound = bound(tc->key_size, true);
             range_cursor->set_key(range_cursor.get(), lower_bound.get_key().c_str());
             ret = range_cursor->bound(range_cursor.get(), lower_bound.get_config().c_str());
             testutil_assert(ret == 0 || ret == EINVAL);
+
+            /*
+             * It is possible that the new lower bound overlaps with the upper bound. In that case,
+             * just clear the lower bound and continue with test.
+             */
+            if (ret == EINVAL)
+                lower_bound.clear();
         }
 
         if (set_random_bounds == UPPER_BOUND_SET || set_random_bounds == ALL_BOUNDS_SET) {
-            /* Reverse case. */
-            if (_reverse_collator_enabled)
-                upper_bound = bound(tc->key_size, false, "0");
-            /* Normal case. */
-            else
-                upper_bound = bound(tc->key_size, false, std::string(tc->key_size, 'z'));
+            upper_bound = bound(tc->key_size, false);
             range_cursor->set_key(range_cursor.get(), upper_bound.get_key().c_str());
             ret = range_cursor->bound(range_cursor.get(), upper_bound.get_config().c_str());
             testutil_assert(ret == 0 || ret == EINVAL);
+
+            /*
+             * It is possible that the new upper bound overlaps with the lower bound. In that case,
+             * just clear the upper bound and continue with test.
+             */
+            if (ret == EINVAL)
+                upper_bound.clear();
         }
 
+        /*
+         * It is possible that upper bound and lower bound both get EINVAL, in that case clear all
+         * bounds.
+         */
+        if (upper_bound.get_key().empty() && lower_bound.get_key().empty())
+            testutil_check(range_cursor->bound(range_cursor.get(), "action=clear"));
+
         return std::make_pair(lower_bound, upper_bound);
     }
 
@@ -357,7 +392,7 @@ class cursor_bound_01 : public test {
              * When exact < 0, the returned key should be less than the search key and performing a
              * next() should be greater than the search key.
              */
-        } else if (range_exact < 0) {
+        } else {
             testutil_assert(custom_lexicographical_compare(key, search_key, false));
 
             /* Check that the next key is greater than the search key. */
@@ -410,7 +445,8 @@ class cursor_bound_01 : public test {
     validate_search_near_not_found(
       scoped_cursor &normal_cursor, const bound &lower_bound, const bound &upper_bound)
     {
-        int ret, exact;
+        int ret = 0, exact = 0;
+
         auto lower_key = lower_bound.get_key();
         auto upper_key = upper_bound.get_key();
         logger::log_msg(LOG_TRACE,
@@ -578,30 +614,34 @@ class cursor_bound_01 : public test {
         logger::log_msg(
           LOG_INFO, type_string(tc->type) + " thread {" + std::to_string(tc->id) + "} commencing.");
 
-        bound lower_bound, upper_bound;
         std::map<uint64_t, scoped_cursor> cursors;
-
+        /* Maintain the lower and upper bound for each cursor held in the cursors map. */
+        std::map<uint64_t, std::pair<bound, bound>> bounds;
         while (tc->running()) {
             /* Get a random collection to work on. */
             collection &coll = tc->db.get_random_collection();
 
             /* Find a cached cursor or create one if none exists. */
-            if (cursors.find(coll.id) == cursors.end())
+            if (cursors.find(coll.id) == cursors.end()) {
+                bound lower_bound, upper_bound;
                 cursors.emplace(coll.id, std::move(tc->session.open_scoped_cursor(coll.name)));
+                bounds.emplace(coll.id, std::move(std::make_pair(lower_bound, upper_bound)));
+            }
 
             /* Set random bounds on cached range cursor. */
             auto &range_cursor = cursors[coll.id];
-            auto bound_pair = set_random_bounds(tc, range_cursor);
+            auto &bound_pair = bounds[coll.id];
+            auto new_bound_pair = set_random_bounds(tc, range_cursor);
             /* Only update the bounds when the bounds have a key. */
-            if (!bound_pair.first.get_key().empty())
-                lower_bound = bound_pair.first;
-            if (!bound_pair.second.get_key().empty())
-                upper_bound = bound_pair.second;
+            if (!!new_bound_pair.first.get_key().empty())
+                bound_pair.first = new_bound_pair.first;
+            if (!new_bound_pair.second.get_key().empty())
+                bound_pair.second = new_bound_pair.second;
 
             /* Clear all bounds if both bounds don't have a key. */
-            if (bound_pair.first.get_key().empty() && bound_pair.second.get_key().empty()) {
-                lower_bound = bound_pair.first;
-                upper_bound = bound_pair.second;
+            if (new_bound_pair.first.get_key().empty() && new_bound_pair.second.get_key().empty()) {
+                bound_pair.first.clear();
+                bound_pair.second.clear();
             }
 
             scoped_cursor normal_cursor = tc->session.open_scoped_cursor(coll.name);
@@ -620,20 +660,19 @@ class cursor_bound_01 : public test {
                 auto srch_key = random_generator::instance().generate_random_string(
                   key_size, characters_type::ALPHABET);
 
-                int exact;
+                int exact = 0;
                 range_cursor->set_key(range_cursor.get(), srch_key.c_str());
                 auto ret = range_cursor->search_near(range_cursor.get(), &exact);
                 testutil_assert(ret == 0 || ret == WT_NOTFOUND);
 
                 /* Verify the bound search_near result using the normal cursor. */
-                validate_bound_search_near(
-                  ret, exact, range_cursor, normal_cursor, srch_key, lower_bound, upper_bound);
+                validate_bound_search_near(ret, exact, range_cursor, normal_cursor, srch_key,
+                  bound_pair.first, bound_pair.second);
 
                 tc->txn.add_op();
                 tc->txn.try_rollback();
                 tc->sleep();
             }
-            testutil_check(range_cursor->reset(range_cursor.get()));
         }
         /* Roll back the last transaction if still active now the work is finished. */
         if (tc->txn.active())
@@ -652,28 +691,33 @@ class cursor_bound_01 : public test {
           LOG_INFO, type_string(tc->type) + " thread {" + std::to_string(tc->id) + "} commencing.");
 
         std::map<uint64_t, scoped_cursor> cursors;
-        bound lower_bound, upper_bound;
+        /* Maintain the lower and upper bound for each cursor held in the cursors map. */
+        std::map<uint64_t, std::pair<bound, bound>> bounds;
         while (tc->running()) {
             /* Get a random collection to work on. */
             collection &coll = tc->db.get_random_collection();
 
             /* Find a cached cursor or create one if none exists. */
-            if (cursors.find(coll.id) == cursors.end())
+            if (cursors.find(coll.id) == cursors.end()) {
+                bound lower_bound, upper_bound;
                 cursors.emplace(coll.id, std::move(tc->session.open_scoped_cursor(coll.name)));
+                bounds.emplace(coll.id, std::move(std::make_pair(lower_bound, upper_bound)));
+            }
 
             /* Set random bounds on cached range cursor. */
             auto &range_cursor = cursors[coll.id];
-            auto bound_pair = set_random_bounds(tc, range_cursor);
+            auto &bound_pair = bounds[coll.id];
+            auto new_bound_pair = set_random_bounds(tc, range_cursor);
             /* Only update the bounds when the bounds have a key. */
-            if (!bound_pair.first.get_key().empty())
-                lower_bound = bound_pair.first;
-            if (!bound_pair.second.get_key().empty())
-                upper_bound = bound_pair.second;
-
-            /* Clear all bounds if both bounds doesn't have a key. */
-            if (bound_pair.first.get_key().empty() && bound_pair.second.get_key().empty()) {
-                lower_bound = bound_pair.first;
-                upper_bound = bound_pair.second;
+            if (!new_bound_pair.first.get_key().empty())
+                bound_pair.first = new_bound_pair.first;
+            if (!new_bound_pair.second.get_key().empty())
+                bound_pair.second = new_bound_pair.second;
+
+            /* Clear all bounds if both bounds don't have a key. */
+            if (new_bound_pair.first.get_key().empty() && new_bound_pair.second.get_key().empty()) {
+                bound_pair.first.clear();
+                bound_pair.second.clear();
             }
 
             scoped_cursor normal_cursor = tc->session.open_scoped_cursor(coll.name);
@@ -685,14 +729,14 @@ class cursor_bound_01 : public test {
             tc->txn.begin(
               "roundup_timestamps=(read=true),read_timestamp=" + tc->tsm->decimal_to_hex(ts));
             while (tc->txn.active() && tc->running()) {
-
-                cursor_traversal(range_cursor, normal_cursor, lower_bound, upper_bound, true);
-                cursor_traversal(range_cursor, normal_cursor, lower_bound, upper_bound, false);
+                cursor_traversal(
+                  range_cursor, normal_cursor, bound_pair.first, bound_pair.second, true);
+                cursor_traversal(
+                  range_cursor, normal_cursor, bound_pair.first, bound_pair.second, false);
                 tc->txn.add_op();
                 tc->txn.try_rollback();
                 tc->sleep();
             }
-            testutil_check(range_cursor->reset(range_cursor.get()));
         }
         /* Roll back the last transaction if still active now the work is finished. */
         if (tc->txn.active())
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/test_template.cpp b/src/third_party/wiredtiger/test/cppsuite/tests/test_template.cpp
index 217e90c3aae..ee75735152d 100644
--- a/src/third_party/wiredtiger/test/cppsuite/tests/test_template.cpp
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/test_template.cpp
@@ -42,13 +42,13 @@ class operation_tracker_template : public operation_tracker {
     }
 
     void
-    set_tracking_cursor(const uint64_t txn_id, const tracking_operation &operation,
+    set_tracking_cursor(WT_SESSION *session, const tracking_operation &operation,
       const uint64_t &collection_id, const std::string &key, const std::string &value,
       wt_timestamp_t ts, scoped_cursor &op_track_cursor) override final
     {
         /* You can replace this call to define your own tracking table contents. */
         operation_tracker::set_tracking_cursor(
-          txn_id, operation, collection_id, key, value, ts, op_track_cursor);
+          session, operation, collection_id, key, value, ts, op_track_cursor);
     }
 };
 
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/main.c b/src/third_party/wiredtiger/test/csuite/random_directio/main.c
index f1e08c5ea4a..d05a5227cdf 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/main.c
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/main.c
@@ -94,11 +94,19 @@ static const char *const uri_rev = "table:rev";
 #define DEFAULT_CYCLES 5
 #define DEFAULT_INTERVAL 3
 
+#define MAX_CKPT_INVL 6  /* Maximum interval between checkpoints */
+#define MAX_FLUSH_INVL 4 /* Maximum interval between flush_tier calls */
+
 #define KEY_SEP "_" /* Must be one char string */
 
 #define ENV_CONFIG                       \
     "create,log=(file_max=10M,enabled)," \
     "transaction_sync=(enabled,method=%s)"
+#define ENV_CONFIG_TIER \
+    ",tiered_storage=(bucket=./bucket,bucket_prefix=pfx-,local_retention=2,name=dir_store)"
+#define ENV_CONFIG_TIER_EXT                                  \
+    ",extensions=(%s../../../ext/storage_sources/dir_store/" \
+    "libwiredtiger_dir_store.so=(early_load=true))"
 #define ENV_CONFIG_REC "log=(recover=on)"
 
 /* 64 spaces */
@@ -156,6 +164,12 @@ static const char *const uri_rev = "table:rev";
 #define SCHEMA_FREQUENCY_DEFAULT 100
 static uint64_t schema_frequency;
 
+/*
+ * TODO: WT-7833 Lock to coordinate inserts and flush_tier. This lock should be removed when that
+ * ticket is fixed. Flush_tier should be able to run with ongoing operations.
+ */
+static pthread_rwlock_t flush_lock;
+
 #define TEST_STREQ(expect, got, message)                                 \
     do {                                                                 \
         if (!WT_STREQ(expect, got)) {                                    \
@@ -167,17 +181,20 @@ static uint64_t schema_frequency;
 /*
  * Values for flags used in various places.
  */
-#define SCHEMA_CREATE 0x0001
-#define SCHEMA_CREATE_CHECK 0x0002
-#define SCHEMA_DATA_CHECK 0x0004
-#define SCHEMA_DROP 0x0008
-#define SCHEMA_DROP_CHECK 0x0010
-#define SCHEMA_INTEGRATED 0x0020
-#define SCHEMA_RENAME 0x0040
-#define SCHEMA_VERBOSE 0x0080
+#define SCHEMA_CREATE 0x0001u
+#define SCHEMA_CREATE_CHECK 0x0002u
+#define SCHEMA_DATA_CHECK 0x0004u
+#define SCHEMA_DROP 0x0008u
+#define SCHEMA_DROP_CHECK 0x0010u
+#define SCHEMA_INTEGRATED 0x0020u
+#define SCHEMA_RENAME 0x0040u
+#define SCHEMA_VERBOSE 0x0080u
 #define SCHEMA_ALL                                                                               \
     (SCHEMA_CREATE | SCHEMA_CREATE_CHECK | SCHEMA_DATA_CHECK | SCHEMA_DROP | SCHEMA_DROP_CHECK | \
       SCHEMA_INTEGRATED | SCHEMA_RENAME)
+#define SCHEMA_MASK 0xffffu
+#define TEST_CKPT 0x10000u
+#define TEST_TIERED 0x20000u
 
 extern int __wt_optind;
 extern char *__wt_optarg;
@@ -204,6 +221,8 @@ usage(void)
 {
     fprintf(stderr, "usage: %s [options]\n", progname);
     fprintf(stderr, "options:\n");
+    fprintf(stderr, "  %-20s%s\n", "-B", "use tiered storage, requires -C checkpoint [false]");
+    fprintf(stderr, "  %-20s%s\n", "-C", "use checkpoint [false]");
     fprintf(stderr, "  %-20s%s\n", "-d data_size", "approximate size of keys and values [1000]");
     fprintf(stderr, "  %-20s%s\n", "-f schema frequency",
       "restart schema sequence every frequency period [100]");
@@ -376,9 +395,11 @@ schema_operation(WT_SESSION *session, uint32_t threadid, uint64_t id, uint32_t o
         testutil_check(session->open_cursor(session, uri1, NULL, NULL, &cursor));
         cursor->set_key(cursor, uri1);
         cursor->set_value(cursor, uri1);
+        testutil_check(pthread_rwlock_rdlock(&flush_lock));
         testutil_check(session->log_printf(session, "INSERT: %s", uri1));
         testutil_check(cursor->insert(cursor));
         testutil_check(session->log_printf(session, "INSERT: DONE %s", uri1));
+        testutil_check(pthread_rwlock_unlock(&flush_lock));
         testutil_check(cursor->close(cursor));
         break;
     case 2:
@@ -405,9 +426,11 @@ schema_operation(WT_SESSION *session, uint32_t threadid, uint64_t id, uint32_t o
         /*
         fprintf(stderr, "UPDATE: %s\n", uri2);
         */
+        testutil_check(pthread_rwlock_rdlock(&flush_lock));
         testutil_check(session->log_printf(session, "UPDATE: %s", uri2));
         testutil_check(cursor->update(cursor));
         testutil_check(session->log_printf(session, "UPDATE: DONE %s", uri2));
+        testutil_check(pthread_rwlock_unlock(&flush_lock));
         testutil_check(cursor->close(cursor));
         break;
     case 4:
@@ -438,6 +461,67 @@ schema_operation(WT_SESSION *session, uint32_t threadid, uint64_t id, uint32_t o
     return (ret);
 }
 
+/*
+ * thread_ckpt_run --
+ *     Runner function for the checkpoint thread.
+ */
+static WT_THREAD_RET
+thread_ckpt_run(void *arg)
+{
+    WT_RAND_STATE rnd;
+    WT_SESSION *session;
+    WT_THREAD_DATA *td;
+    uint32_t sleep_time;
+    int i;
+
+    __wt_random_init(&rnd);
+
+    td = (WT_THREAD_DATA *)arg;
+    /*
+     * Keep a separate file with the records we wrote for checking.
+     */
+    testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+    for (i = 1;; ++i) {
+        sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
+        sleep(sleep_time);
+        testutil_check(session->checkpoint(session, NULL));
+        printf("Checkpoint %d complete.\n", i);
+        fflush(stdout);
+    }
+    /* NOTREACHED */
+}
+
+/*
+ * thread_flush_run --
+ *     Runner function for the flush_tier thread.
+ */
+static WT_THREAD_RET
+thread_flush_run(void *arg)
+{
+    WT_RAND_STATE rnd;
+    WT_SESSION *session;
+    WT_THREAD_DATA *td;
+    uint32_t i, sleep_time;
+
+    __wt_random_init(&rnd);
+
+    td = (WT_THREAD_DATA *)arg;
+    testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+    for (i = 1;; ++i) {
+        sleep_time = __wt_random(&rnd) % MAX_FLUSH_INVL;
+        sleep(sleep_time);
+        /*
+         * Currently not testing any of the flush tier configuration strings other than defaults. We
+         * expect the defaults are what MongoDB wants for now.
+         */
+        testutil_check(pthread_rwlock_wrlock(&flush_lock));
+        testutil_check(session->flush_tier(session, NULL));
+        testutil_check(pthread_rwlock_unlock(&flush_lock));
+        printf("Flush tier %" PRIu32 " completed.\n", i);
+        fflush(stdout);
+    }
+    /* NOTREACHED */
+}
 static WT_THREAD_RET thread_run(void *) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
 
 /*
@@ -492,6 +576,7 @@ again:
         gen_kv(buf1, kvsize, i, td->id, large, true);
         gen_kv(buf2, kvsize, i, td->id, large, false);
 
+        testutil_check(pthread_rwlock_rdlock(&flush_lock));
         testutil_check(session->begin_transaction(session, NULL));
         cursor->set_key(cursor, buf1);
         /*
@@ -517,8 +602,10 @@ again:
          * operations are not part of the transaction operations for the main table. If we are
          * running 'integrated' then we'll first do the schema operations and commit later.
          */
-        if (!F_ISSET(td, SCHEMA_INTEGRATED))
+        if (!F_ISSET(td, SCHEMA_INTEGRATED)) {
             testutil_check(session->commit_transaction(session, NULL));
+            testutil_check(pthread_rwlock_unlock(&flush_lock));
+        }
         /*
          * If we are doing a schema test, generate operations for additional tables. Each table has
          * a 'lifetime' of 4 values of the id.
@@ -538,8 +625,10 @@ again:
                 /*
                  * Only rollback if integrated and we have an active transaction.
                  */
-                if (F_ISSET(td, SCHEMA_INTEGRATED))
+                if (F_ISSET(td, SCHEMA_INTEGRATED)) {
                     testutil_check(session->rollback_transaction(session, NULL));
+                    testutil_check(pthread_rwlock_unlock(&flush_lock));
+                }
                 sleep(1);
                 goto again;
             }
@@ -547,8 +636,10 @@ again:
         /*
          * If schema operations are integrated, commit the transaction now that they're complete.
          */
-        if (F_ISSET(td, SCHEMA_INTEGRATED))
+        if (F_ISSET(td, SCHEMA_INTEGRATED)) {
             testutil_check(session->commit_transaction(session, NULL));
+            testutil_check(pthread_rwlock_unlock(&flush_lock));
+        }
     }
     /* NOTREACHED */
 }
@@ -559,14 +650,20 @@ again:
  *     threads, and copied/recovered.
  */
 static void
-create_db(const char *method)
+create_db(const char *method, uint32_t flags)
 {
     WT_CONNECTION *conn;
     WT_SESSION *session;
-    char envconf[512];
+    char envconf[512], tierconf[128];
 
     testutil_check(__wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG, method));
+    if (LF_ISSET(TEST_TIERED)) {
+        testutil_check(__wt_snprintf(tierconf, sizeof(tierconf), ENV_CONFIG_TIER_EXT, ""));
+        strcat(envconf, tierconf);
+        strcat(envconf, ENV_CONFIG_TIER);
+    }
 
+    printf("create_db: wiredtiger_open configuration: %s\n", envconf);
     testutil_check(wiredtiger_open(home, NULL, envconf, &conn));
     testutil_check(conn->open_session(conn, NULL, NULL, &session));
     testutil_check(session->create(session, uri_main, "key_format=S,value_format=S"));
@@ -593,15 +690,22 @@ fill_db(uint32_t nth, uint32_t datasize, const char *method, uint32_t flags)
     WT_CONNECTION *conn;
     WT_THREAD_DATA *td;
     wt_thread_t *thr;
-    uint32_t i;
-    char envconf[512];
+    uint32_t ckpt_id, flush_id, i;
+    char envconf[512], tierconf[128];
 
-    thr = dcalloc(nth, sizeof(*thr));
-    td = dcalloc(nth, sizeof(WT_THREAD_DATA));
+    /* Allocate number of threads plus two more for checkpoint and flush. */
+    thr = dcalloc(nth + 2, sizeof(*thr));
+    td = dcalloc(nth + 2, sizeof(WT_THREAD_DATA));
     if (chdir(home) != 0)
         testutil_die(errno, "Child chdir: %s", home);
     testutil_check(__wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG, method));
+    if (LF_ISSET(TEST_TIERED)) {
+        testutil_check(__wt_snprintf(tierconf, sizeof(tierconf), ENV_CONFIG_TIER_EXT, "../"));
+        strcat(envconf, tierconf);
+        strcat(envconf, ENV_CONFIG_TIER);
+    }
 
+    printf("fill_db: wiredtiger_open configuration: %s\n", envconf);
     testutil_check(wiredtiger_open(".", NULL, envconf, &conn));
 
     datasize += 1; /* Add an extra byte for string termination */
@@ -617,6 +721,18 @@ fill_db(uint32_t nth, uint32_t datasize, const char *method, uint32_t flags)
     }
     printf("Spawned %" PRIu32 " writer threads\n", nth);
     fflush(stdout);
+    if (LF_ISSET(TEST_CKPT)) {
+        ckpt_id = nth;
+        td[ckpt_id].conn = conn;
+        td[ckpt_id].id = ckpt_id;
+        testutil_check(__wt_thread_create(NULL, &thr[ckpt_id], thread_ckpt_run, &td[ckpt_id]));
+    }
+    if (LF_ISSET(TEST_TIERED)) {
+        flush_id = nth + 1;
+        td[flush_id].conn = conn;
+        td[flush_id].id = flush_id;
+        testutil_check(__wt_thread_create(NULL, &thr[flush_id], thread_flush_run, &td[flush_id]));
+    }
     /*
      * The threads never exit, so the child will just wait here until it is killed.
      */
@@ -805,7 +921,7 @@ check_db(uint32_t nth, uint32_t datasize, pid_t pid, bool directio, uint32_t fla
     uint64_t gotid, id;
     uint64_t *lastid;
     uint32_t gotth, kvsize, th, threadmap;
-    char checkdir[4096], dbgdir[4096], savedir[4096];
+    char checkdir[4096], dbgdir[4096], envconf[512], savedir[4096], tierconf[128];
     char *gotkey, *gotvalue, *keybuf, *p;
     char **large_arr;
 
@@ -837,7 +953,13 @@ check_db(uint32_t nth, uint32_t datasize, pid_t pid, bool directio, uint32_t fla
     copy_directory(checkdir, savedir, false);
 
     printf("Open database, run recovery and verify content\n");
-    ret = wiredtiger_open(checkdir, NULL, ENV_CONFIG_REC, &conn);
+    testutil_check(__wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG_REC));
+    if (LF_ISSET(TEST_TIERED)) {
+        testutil_check(__wt_snprintf(tierconf, sizeof(tierconf), ENV_CONFIG_TIER_EXT, ""));
+        strcat(envconf, tierconf);
+        strcat(envconf, ENV_CONFIG_TIER);
+    }
+    ret = wiredtiger_open(checkdir, NULL, envconf, &conn);
     /* If this fails, abort the child process before we die so we can see what it was doing. */
     if (ret != 0) {
         if (pid != 0)
@@ -1091,8 +1213,14 @@ main(int argc, char *argv[])
           __wt_snprintf_len_set(p, sizeof(args) - (size_t)(p - args), &size, " %s", argv[i]));
         p += size;
     }
-    while ((ch = __wt_getopt(progname, argc, argv, "d:f:h:i:m:n:pS:T:t:v")) != EOF)
+    while ((ch = __wt_getopt(progname, argc, argv, "BCd:f:h:i:m:n:PpS:T:t:v")) != EOF)
         switch (ch) {
+        case 'B':
+            LF_SET(TEST_TIERED);
+            break;
+        case 'C':
+            LF_SET(TEST_CKPT);
+            break;
         case 'd':
             datasize = (uint32_t)atoi(__wt_optarg);
             if (datasize > LARGE_WRITE_SIZE || datasize < MIN_DATA_SIZE) {
@@ -1144,7 +1272,7 @@ main(int argc, char *argv[])
                 else if (WT_STREQ(arg, "integrated"))
                     LF_SET(SCHEMA_INTEGRATED);
                 else if (WT_STREQ(arg, "none"))
-                    flags = 0;
+                    flags = flags & ~SCHEMA_MASK;
                 else if (WT_STREQ(arg, "rename"))
                     LF_SET(SCHEMA_RENAME);
                 else if (WT_STREQ(arg, "verbose"))
@@ -1173,6 +1301,10 @@ main(int argc, char *argv[])
     if (argc != 0)
         usage();
 
+    if (LF_ISSET(TEST_TIERED) && !LF_ISSET(TEST_CKPT))
+        usage();
+
+    testutil_check(pthread_rwlock_init(&flush_lock, NULL));
     testutil_work_dir_from_path(home, sizeof(home), working_dir);
     /*
      * If the user wants to verify they need to tell us how many threads there were so we know what
@@ -1199,6 +1331,10 @@ main(int argc, char *argv[])
         if ((status = system(buf)) < 0)
             testutil_die(status, "system: %s", buf);
         testutil_make_work_dir(home);
+        if (LF_ISSET(TEST_TIERED)) {
+            testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/bucket", home));
+            testutil_make_work_dir(buf);
+        }
 
         __wt_random_init_seed(NULL, &rnd);
         if (rand_time) {
@@ -1213,7 +1349,7 @@ main(int argc, char *argv[])
         }
         printf("Parent: Create %" PRIu32 " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
 
-        create_db(method);
+        create_db(method, flags);
         if (!populate_only) {
             /*
              * Fork a child to insert as many items. We will then randomly suspend the child, run
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.c b/src/third_party/wiredtiger/test/csuite/random_directio/util.c
index 4b91501a2d8..0806ba878f4 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/util.c
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.c
@@ -40,11 +40,12 @@
 #define COPY_BUF_SIZE ((size_t)(64 * 1024))
 
 /*
- * copy_directory --
- *     Copy a directory, using direct IO if indicated.
+ * copy_directory_int --
+ *     Copy a directory, using direct IO if indicated. Recursive internal function. Assumes all
+ *     cleanup has already happened at the destination.
  */
-void
-copy_directory(const char *fromdir, const char *todir, bool directio)
+static void
+copy_directory_int(const char *fromdir, const char *todir, bool directio)
 {
     struct dirent *dp;
     struct stat sb;
@@ -66,21 +67,6 @@ copy_directory(const char *fromdir, const char *todir, bool directio)
     buf = NULL;
     blksize = bufsize = 0;
 
-    dirp = opendir(todir);
-    if (dirp != NULL) {
-        while ((dp = readdir(dirp)) != NULL) {
-            /*
-             * Skip . and ..
-             */
-            if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)
-                continue;
-            testutil_check(__wt_snprintf(tofile, sizeof(tofile), "%s/%s", todir, dp->d_name));
-            testutil_check(unlink(tofile));
-        }
-        testutil_check(closedir(dirp));
-        testutil_check(rmdir(todir));
-    }
-
     testutil_check(mkdir(todir, 0777));
     dirp = opendir(fromdir);
     testutil_assert(dirp != NULL);
@@ -91,9 +77,13 @@ copy_directory(const char *fromdir, const char *todir, bool directio)
          */
         if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)
             continue;
-
         testutil_check(__wt_snprintf(fromfile, sizeof(fromfile), "%s/%s", fromdir, dp->d_name));
         testutil_check(__wt_snprintf(tofile, sizeof(tofile), "%s/%s", todir, dp->d_name));
+        if (dp->d_type == DT_DIR) {
+            copy_directory_int(fromfile, tofile, directio);
+            continue;
+        }
+
         rfd = open(fromfile, O_RDONLY | openflags, 0);
         /*
          * The child process may have been stopped during a drop and WiredTiger drop will do an
@@ -155,3 +145,30 @@ copy_directory(const char *fromdir, const char *todir, bool directio)
     testutil_check(closedir(dirp));
     free(orig_buf);
 }
+
+/*
+ * clean_directory --
+ *     Clean up a directory, use system to remove sub-directories too.
+ */
+static void
+clean_directory(const char *todir)
+{
+    int status;
+    char buf[512];
+
+    testutil_check(__wt_snprintf(buf, sizeof(buf), "rm -rf %s", todir));
+    if ((status = system(buf)) < 0)
+        testutil_die(status, "system: %s", buf);
+}
+
+/*
+ * copy_directory --
+ *     Copy a directory, using direct IO if indicated. Wrapper because the sub functions can be
+ *     called recursively if there are sub-directories present.
+ */
+void
+copy_directory(const char *fromdir, const char *todir, bool directio)
+{
+    clean_directory(todir);
+    copy_directory_int(fromdir, todir, directio);
+}
diff --git a/src/third_party/wiredtiger/test/csuite/schema_abort/main.c b/src/third_party/wiredtiger/test/csuite/schema_abort/main.c
index 3f93ec25a02..456087b3e04 100644
--- a/src/third_party/wiredtiger/test/csuite/schema_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/schema_abort/main.c
@@ -55,7 +55,8 @@ static char home[1024]; /* Program working dir */
  * inserted and it records the timestamp that was used for that insertion.
  */
 #define INVALID_KEY UINT64_MAX
-#define MAX_CKPT_INVL 2 /* Maximum interval between checkpoints */
+#define MAX_CKPT_INVL 6  /* Maximum interval between checkpoints */
+#define MAX_FLUSH_INVL 4 /* Maximum interval between flush_tier calls */
 /* Set large, some slow I/O systems take tens of seconds to fsync. */
 #define MAX_STARTUP 30 /* Seconds to start up and set stable */
 #define MAX_TH 12
@@ -75,7 +76,7 @@ static const char *const uri_collection = "table:collection";
 
 static const char *const ckpt_file = "checkpoint_done";
 
-static bool compat, inmem, use_columns, use_ts, use_txn;
+static bool compat, inmem, tiered, use_columns, use_ts, use_txn;
 static volatile bool stable_set;
 static volatile uint64_t global_ts = 1;
 static volatile uint64_t uid = 1;
@@ -85,11 +86,22 @@ typedef struct {
 } THREAD_TS;
 static volatile THREAD_TS th_ts[MAX_TH];
 
+/*
+ * TODO: WT-7833 Lock to coordinate inserts and flush_tier. This lock should be removed when that
+ * ticket is fixed. Flush_tier should be able to run with ongoing operations.
+ */
+static pthread_rwlock_t flush_lock;
+
 #define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
 #define ENV_CONFIG_DEF                                        \
     "create,"                                                 \
     "eviction_updates_trigger=95,eviction_updates_target=80," \
     "log=(enabled,file_max=10M,remove=false)"
+#define ENV_CONFIG_TIER \
+    ",tiered_storage=(bucket=./bucket,bucket_prefix=pfx-,local_retention=2,name=dir_store)"
+#define ENV_CONFIG_TIER_EXT                                   \
+    ",extensions=(../../../../ext/storage_sources/dir_store/" \
+    "libwiredtiger_dir_store.so=(early_load=true))"
 #define ENV_CONFIG_TXNSYNC \
     ENV_CONFIG_DEF         \
     ",transaction_sync=(enabled,method=none)"
@@ -157,7 +169,7 @@ subtest_error_handler(
 
     /* Filter out errors about bulk load usage - they are annoying */
     if (strstr(message, "bulk-load is only supported on newly") == NULL)
-        fprintf(stderr, "%s", message);
+        fprintf(stderr, "%s\n", message);
     return (0);
 }
 
@@ -245,7 +257,7 @@ test_bulk_unique(THREAD_DATA *td, int force)
     WT_DECL_RET;
     WT_SESSION *session;
     uint64_t my_uid;
-    char new_uri[64];
+    char dropconf[128], new_uri[64];
 
     testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
 
@@ -267,9 +279,13 @@ test_bulk_unique(THREAD_DATA *td, int force)
     else if (ret != EINVAL)
         testutil_die(ret, "session.open_cursor bulk unique: %s, new_uri");
 
-    while ((ret = session->drop(session, new_uri, force ? "force" : NULL)) != 0)
+    testutil_check(__wt_snprintf(dropconf, sizeof(dropconf), "force=%s", force ? "true" : "false"));
+    /* For testing we want to remove objects too. */
+    if (tiered)
+        strcat(dropconf, ",remove_shared=true");
+    while ((ret = session->drop(session, new_uri, dropconf)) != 0)
         if (ret != EBUSY)
-            testutil_die(ret, "session.drop: %s", new_uri);
+            testutil_die(ret, "session.drop: %s %s", new_uri, dropconf);
 
     if (use_txn && (ret = session->commit_transaction(session, NULL)) != 0 && ret != EINVAL)
         testutil_die(ret, "session.commit bulk unique");
@@ -407,6 +423,9 @@ test_upgrade(THREAD_DATA *td)
     WT_DECL_RET;
     WT_SESSION *session;
 
+    /* FIXME-WT-9423 Remove this return when tiered storage supports upgrade. */
+    if (tiered)
+        return;
     testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
 
     if ((ret = session->upgrade(session, uri, NULL)) != 0)
@@ -426,6 +445,9 @@ test_verify(THREAD_DATA *td)
     WT_DECL_RET;
     WT_SESSION *session;
 
+    /* FIXME-WT-9423 Remove this return when tiered storage supports verify. */
+    if (tiered)
+        return;
     testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
 
     if ((ret = session->verify(session, uri, NULL)) != 0)
@@ -442,8 +464,8 @@ test_verify(THREAD_DATA *td)
 static WT_THREAD_RET
 thread_ts_run(void *arg)
 {
-    WT_SESSION *session;
     THREAD_DATA *td;
+    WT_SESSION *session;
     uint64_t i, last_ts, oldest_ts, this_ts;
     char tscfg[64];
 
@@ -501,9 +523,9 @@ thread_ckpt_run(void *arg)
 {
     struct timespec now, start;
     FILE *fp;
+    THREAD_DATA *td;
     WT_RAND_STATE rnd;
     WT_SESSION *session;
-    THREAD_DATA *td;
     uint64_t ts;
     uint32_t sleep_time;
     int i;
@@ -523,7 +545,7 @@ thread_ckpt_run(void *arg)
      * Keep writing checkpoints until killed by parent.
      */
     __wt_epoch(NULL, &start);
-    for (i = 0;;) {
+    for (i = 1;; ++i) {
         sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
         sleep(sleep_time);
         if (use_ts) {
@@ -551,7 +573,7 @@ thread_ckpt_run(void *arg)
          * Since this is the default, send in this string even if running without timestamps.
          */
         testutil_check(session->checkpoint(session, "use_timestamp=true"));
-        printf("Checkpoint %d complete.  Minimum ts %" PRIu64 "\n", ++i, ts);
+        printf("Checkpoint %d complete.  Minimum ts %" PRIu64 "\n", i, ts);
         fflush(stdout);
         /*
          * Create the checkpoint file so that the parent process knows at least one checkpoint has
@@ -569,6 +591,42 @@ thread_ckpt_run(void *arg)
 }
 
 /*
+ * thread_flush_run --
+ *     Runner function for the flush_tier thread.
+ */
+static WT_THREAD_RET
+thread_flush_run(void *arg)
+{
+    THREAD_DATA *td;
+    WT_DECL_RET;
+    WT_RAND_STATE rnd;
+    WT_SESSION *session;
+    uint32_t i, sleep_time;
+
+    __wt_random_init(&rnd);
+
+    td = (THREAD_DATA *)arg;
+    testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
+    for (i = 1;; ++i) {
+        sleep_time = __wt_random(&rnd) % MAX_FLUSH_INVL;
+        sleep(sleep_time);
+        /*
+         * Currently not testing any of the flush tier configuration strings other than defaults. We
+         * expect the defaults are what MongoDB wants for now.
+         */
+        testutil_check(pthread_rwlock_wrlock(&flush_lock));
+        if ((ret = session->flush_tier(session, NULL)) != 0) {
+            if (ret != EBUSY)
+                testutil_die(ret, "session.flush_tier");
+        } else
+            printf("Flush tier %" PRIu32 " completed.\n", ++i);
+        testutil_check(pthread_rwlock_unlock(&flush_lock));
+        fflush(stdout);
+    }
+    /* NOTREACHED */
+}
+
+/*
  * thread_run --
  *     Runner function for the worker threads.
  */
@@ -576,11 +634,11 @@ static WT_THREAD_RET
 thread_run(void *arg)
 {
     FILE *fp;
+    THREAD_DATA *td;
     WT_CURSOR *cur_coll, *cur_local, *cur_oplog;
     WT_ITEM data;
     WT_RAND_STATE rnd;
     WT_SESSION *oplog_session, *session;
-    THREAD_DATA *td;
     uint64_t i, stable_ts;
     char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
     char kname[64], tscfg[64];
@@ -682,6 +740,7 @@ thread_run(void *arg)
         if (use_ts)
             stable_ts = __wt_atomic_addv64(&global_ts, 1);
 
+        testutil_check(pthread_rwlock_rdlock(&flush_lock));
         testutil_check(session->begin_transaction(session, NULL));
         if (use_prep)
             testutil_check(oplog_session->begin_transaction(oplog_session, NULL));
@@ -758,6 +817,7 @@ thread_run(void *arg)
         data.data = lbuf;
         cur_local->set_value(cur_local, &data);
         testutil_check(cur_local->insert(cur_local));
+        testutil_check(pthread_rwlock_unlock(&flush_lock));
 
         /*
          * Save the timestamp and key separately for checking later.
@@ -782,11 +842,11 @@ run_workload(uint32_t nth)
     WT_SESSION *session;
     THREAD_DATA *td;
     wt_thread_t *thr;
-    uint32_t ckpt_id, i, ts_id;
-    char envconf[512], tableconf[128];
+    uint32_t ckpt_id, flush_id, i, ts_id;
+    char envconf[1024], tableconf[128];
 
-    thr = dcalloc(nth + 2, sizeof(*thr));
-    td = dcalloc(nth + 2, sizeof(THREAD_DATA));
+    thr = dcalloc(nth + 3, sizeof(*thr));
+    td = dcalloc(nth + 3, sizeof(THREAD_DATA));
     stable_set = false;
     if (chdir(home) != 0)
         testutil_die(errno, "Child chdir: %s", home);
@@ -796,6 +856,10 @@ run_workload(uint32_t nth)
         strcpy(envconf, ENV_CONFIG_TXNSYNC);
     if (compat)
         strcat(envconf, ENV_CONFIG_COMPAT);
+    if (tiered) {
+        strcat(envconf, ENV_CONFIG_TIER_EXT);
+        strcat(envconf, ENV_CONFIG_TIER);
+    }
 
     testutil_check(wiredtiger_open(NULL, &event_handler, envconf, &conn));
     testutil_check(conn->open_session(conn, NULL, NULL, &session));
@@ -830,6 +894,13 @@ run_workload(uint32_t nth)
         printf("Create timestamp thread\n");
         testutil_check(__wt_thread_create(NULL, &thr[ts_id], thread_ts_run, &td[ts_id]));
     }
+    flush_id = nth + 2;
+    if (tiered) {
+        td[flush_id].conn = conn;
+        td[flush_id].info = nth;
+        printf("Create flush_tier thread\n");
+        testutil_check(__wt_thread_create(NULL, &thr[flush_id], thread_flush_run, &td[flush_id]));
+    }
     printf("Create %" PRIu32 " writer threads\n", nth);
     for (i = 0; i < nth; ++i) {
         td[i].conn = conn;
@@ -918,14 +989,14 @@ main(int argc, char *argv[])
     uint64_t stable_fp, stable_val;
     uint32_t i, nth, timeout;
     int ch, status;
-    char buf[512], statname[1024];
+    char buf[1024], statname[1024];
     char fname[64], kname[64];
     const char *working_dir;
     bool fatal, preserve, rand_th, rand_time, verify_only;
 
     (void)testutil_set_progname(argv);
 
-    compat = inmem = false;
+    compat = inmem = tiered = false;
     use_ts = true;
     /*
      * Setting this to false forces us to use internal library code. Allow an override but default
@@ -939,8 +1010,11 @@ main(int argc, char *argv[])
     verify_only = false;
     working_dir = "WT_TEST.schema-abort";
 
-    while ((ch = __wt_getopt(progname, argc, argv, "Cch:mpT:t:vxz")) != EOF)
+    while ((ch = __wt_getopt(progname, argc, argv, "BCch:mpT:t:vxz")) != EOF)
         switch (ch) {
+        case 'B':
+            tiered = true;
+            break;
         case 'C':
             compat = true;
             break;
@@ -981,6 +1055,7 @@ main(int argc, char *argv[])
     if (argc != 0)
         usage();
 
+    testutil_check(pthread_rwlock_init(&flush_lock, NULL));
     testutil_work_dir_from_path(home, sizeof(home), working_dir);
     /*
      * If the user wants to verify they need to tell us how many threads there were so we can find
@@ -992,6 +1067,10 @@ main(int argc, char *argv[])
     }
     if (!verify_only) {
         testutil_make_work_dir(home);
+        if (tiered) {
+            testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/bucket", home));
+            testutil_make_work_dir(buf);
+        }
 
         __wt_random_init_seed(NULL, &rnd);
         if (rand_time) {
@@ -1008,8 +1087,9 @@ main(int argc, char *argv[])
         printf("Parent: compatibility: %s, in-mem log sync: %s, timestamp in use: %s\n",
           compat ? "true" : "false", inmem ? "true" : "false", use_ts ? "true" : "false");
         printf("Parent: Create %" PRIu32 " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
-        printf("CONFIG: %s%s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n", progname,
-          compat ? " -C" : "", inmem ? " -m" : "", !use_ts ? " -z" : "", working_dir, nth, timeout);
+        printf("CONFIG: %s%s%s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n", progname,
+          compat ? " -C" : "", inmem ? " -m" : "", tiered ? " -B" : "", !use_ts ? " -z" : "",
+          working_dir, nth, timeout);
         /*
          * Fork a child to insert as many items. We will then randomly kill the child, run recovery
          * and make sure all items we wrote exist after recovery runs.
@@ -1059,10 +1139,15 @@ main(int argc, char *argv[])
     testutil_copy_data(home);
     printf("Open database, run recovery and verify content\n");
 
+    strcpy(buf, ENV_CONFIG_REC);
+    if (tiered) {
+        strcat(buf, ENV_CONFIG_TIER_EXT);
+        strcat(buf, ENV_CONFIG_TIER);
+    }
     /*
      * Open the connection which forces recovery to be run.
      */
-    testutil_check(wiredtiger_open(NULL, &event_handler, ENV_CONFIG_REC, &conn));
+    testutil_check(wiredtiger_open(NULL, &event_handler, buf, &conn));
     testutil_check(conn->open_session(conn, NULL, NULL, &session));
     /*
      * Open a cursor on all the tables.
diff --git a/src/third_party/wiredtiger/test/csuite/tiered_abort/main.c b/src/third_party/wiredtiger/test/csuite/tiered_abort/main.c
index 76aa0dd7eb2..da22bf30bd6 100644
--- a/src/third_party/wiredtiger/test/csuite/tiered_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/tiered_abort/main.c
@@ -211,7 +211,7 @@ thread_ckpt_run(void *arg)
      * Keep a separate file with the records we wrote for checking.
      */
     testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
-    for (i = 0;; ++i) {
+    for (i = 1;; ++i) {
         sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
         sleep(sleep_time);
         /*
@@ -250,7 +250,12 @@ thread_flush_run(void *arg)
     testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/%s", home, sentinel_file));
     (void)unlink(buf);
     testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
-    for (i = 0;;) {
+    /*
+     * Increment at the end of the loop so we only count actual calls to flush_tier and don't
+     * increment for skipping for the first checkpoint. The condition for creating the sentinel file
+     * requires proper counting.
+     */
+    for (i = 1;;) {
         sleep_time = __wt_random(&rnd) % MAX_FLUSH_INVL;
         sleep(sleep_time);
         testutil_check(td->conn->query_timestamp(td->conn, ts_string, "get=last_checkpoint"));
@@ -271,10 +276,11 @@ thread_flush_run(void *arg)
          * Create the sentinel file so that the parent process knows the desired number of
          * flush_tier calls have finished and can start its timer.
          */
-        if (++i == flush_calls) {
+        if (i == flush_calls) {
             testutil_assert_errno((fp = fopen(buf, "w")) != NULL);
             testutil_assert_errno(fclose(fp) == 0);
         }
+        ++i;
     }
     /* NOTREACHED */
 }
@@ -619,15 +625,12 @@ verify_tiered(WT_SESSION *session)
                 testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/%s", home, name));
                 ret = stat(buf, &sb);
                 /*
-                 * Logged tables, i.e. "oplog" or "local" may be unable to remove the last object
-                 * from before the restart due to recovery applying log records. So if we get a stat
-                 * return that indicates the file exists, verify it is one of those tables.
+                 * If we get a stat return that indicates the file exists, verify it is must be the
+                 * second last object only. Since we're running with flush_checkpoint debug mode
+                 * turned on, the recovery and checkpoint after flush_tier may open the last object
+                 * that existed prior to crash. All earlier objects must not exist.
                  */
-                if (i == last - 1 && ret == 0)
-                    testutil_assert(
-                      WT_PREFIX_MATCH(name, uri_local) || WT_PREFIX_MATCH(name, uri_oplog));
-                else
-                    testutil_assert(ret != 0);
+                testutil_assert(ret != 0 || i == last - 1);
                 /* Verify earlier objects exist in the bucket directory. */
                 testutil_check(
                   __wt_snprintf(buf, sizeof(buf), "%s/%s/%s%s", home, BUCKET, BUCKET_PFX, name));
@@ -805,8 +808,9 @@ main(int argc, char *argv[])
     if (chdir(home) != 0)
         testutil_die(errno, "parent chdir: %s", home);
 
-    /* Copy the data to a separate folder for debugging purpose. */
-    testutil_copy_data(home);
+    if (!verify_only)
+        /* Copy the data to a separate folder for debugging purpose. */
+        testutil_copy_data(home);
 
     /* Come back to root directory, so we can link wiredtiger with extensions properly. */
     if (chdir("../") != 0)
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index a20b1d23d81..66769c008ff 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -224,7 +224,7 @@ thread_ckpt_run(void *arg)
     (void)unlink(ckpt_file);
     testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
     first_ckpt = true;
-    for (i = 0;; ++i) {
+    for (i = 1;; ++i) {
         sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
         sleep(sleep_time);
         /*
diff --git a/src/third_party/wiredtiger/test/csuite/wt8057_compact_stress/main.c b/src/third_party/wiredtiger/test/csuite/wt8057_compact_stress/main.c
index baa6d920a8e..dfcbe95c23b 100644
--- a/src/third_party/wiredtiger/test/csuite/wt8057_compact_stress/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt8057_compact_stress/main.c
@@ -69,7 +69,7 @@ subtest_error_handler(
     (void)(handler);
     (void)(session);
     (void)(error);
-    fprintf(stderr, "%s", message);
+    fprintf(stderr, "%s\n", message);
     return (0);
 }
 
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 105abffe6d7..a58e73d2715 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -168,8 +168,8 @@ functions:
             mkdir -p cmake_build
             cd cmake_build
             $CMAKE -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/mongodbtoolchain_v4_clang.cmake -DCMAKE_C_FLAGS="-ggdb" -DWITH_PIC=1 \
-              -DHAVE_DIAGNOSTIC=1 ${NON_BARRIER_DIAGNOSTIC_YIELDS|} -DCMAKE_BUILD_TYPE=ASan \
-              -DHAVE_BUILTIN_EXTENSION_LZ4=1 -DHAVE_BUILTIN_EXTENSION_SNAPPY=1 -DHAVE_BUILTIN_EXTENSION_ZLIB=1 ${configure_python_setting|} \
+              -DENABLE_STRICT=1 -DHAVE_DIAGNOSTIC=1 ${NON_BARRIER_DIAGNOSTIC_YIELDS|} -DCMAKE_BUILD_TYPE=ASan \
+              -DHAVE_BUILTIN_EXTENSION_LZ4=1 -DHAVE_BUILTIN_EXTENSION_SNAPPY=1 -DHAVE_BUILTIN_EXTENSION_ZLIB=1 -DHAVE_BUILTIN_EXTENSION_ZSTD=1 ${configure_python_setting|} \
               -G "${cmake_generator|Ninja}" ../.
           fi
     - *make_wiredtiger
@@ -347,15 +347,68 @@ functions:
         ${test_env_vars|} $CTEST -L check ${smp_command|} --output-on-failure ${check_args|} 2>&1
 
   "cppsuite test":
-    command: shell.exec
-    params:
-      # The tests need to be executed in the cppsuite directory as some required libraries have
-      # their paths defined relative to this directory.
-      working_dir: "wiredtiger/cmake_build/test/cppsuite"
-      script: |
-        set -o errexit
-        set -o verbose
-        ${test_env_vars|} ./run -t ${test_name} -C '${test_config}' -f ${test_config_filename} -l 2
+    - command: shell.exec
+      params:
+        # The tests need to be executed in the cppsuite directory as some required libraries have
+        # their paths defined relative to this directory.
+        # The below script saves the exit code from the test to use it later in this function. By
+        # doing this we can define our own custom artifact upload task without it being cancelled by
+        # the test failing.
+        # Additionally if the test fails perf statistics won't be uploaded as they may be invalid
+        # due to the test failure.
+        working_dir: "wiredtiger/cmake_build/test/cppsuite"
+        shell: bash
+        script: |
+          set -o verbose
+          ${test_env_vars|} ./run -t ${test_name} -C '${test_config}' -f ${test_config_filename} -l 2
+          exit_code=$?
+          echo "$exit_code" > cppsuite_exit_code
+          if [ "$exit_code" != 0 ]; then
+            echo "[{\"info\":{\"test_name\": \"${test_name}\"},\"metrics\": []}]" > ${test_name}.json
+          fi
+          exit 0
+    - command: perf.send
+      params:
+        file: ./wiredtiger/cmake_build/test/cppsuite/${test_name}.json
+    # Delete unnecessary data from the upload.
+    - command: shell.exec
+      params:
+        script: |
+          rm -rf wiredtiger/cmake_build/examples
+          rm -rf wiredtiger/cmake_build/bench
+          mv wiredtiger/cmake_build/test/cppsuite wiredtiger/cmake_build/
+          rm -rf wiredtiger/cmake_build/test/
+          mkdir wiredtiger/cmake_build/test/
+          mv wiredtiger/cmake_build/cppsuite wiredtiger/cmake_build/test/cppsuite
+    - command: archive.targz_pack
+      params:
+        target: archive.tgz
+        source_dir: wiredtiger/cmake_build/
+        include:
+          - "./**"
+    - command: s3.put
+      params:
+        aws_secret: ${aws_secret}
+        aws_key: ${aws_key}
+        local_file: archive.tgz
+        bucket: build_external
+        permissions: public-read
+        content_type: application/tar
+        display_name: cppsuite-test
+        remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${task_name}_${build_id}${postfix|}.tgz
+    # We remove the wiredtiger directory here to avoid to getting archived again by post tasks.
+    - command: shell.exec
+      params:
+        shell: bash
+        script: |
+          set -o verbose
+          if [ -f wiredtiger/cmake_build/test/cppsuite/cppsuite_exit_code ]; then
+            exit_code=`cat wiredtiger/cmake_build/test/cppsuite/cppsuite_exit_code`
+          else
+            exit_code=0
+          fi
+          rm -rf wiredtiger
+          exit "$exit_code"
 
   "wt2853_perf test":
     command: shell.exec
@@ -749,9 +802,7 @@ variables:
     exec_timeout_secs: 25200
     commands:
       - func: "get project"
-      - func: "compile wiredtiger"
-        vars:
-          posix_configure_flags: -DENABLE_STRICT=1 -DHAVE_DIAGNOSTIC=1 -DHAVE_BUILTIN_EXTENSION_LZ4=1 -DHAVE_BUILTIN_EXTENSION_SNAPPY=1 -DHAVE_BUILTIN_EXTENSION_ZLIB=1 -DHAVE_BUILTIN_EXTENSION_ZSTD=1
+      - func: "compile wiredtiger address sanitizer"
       - func: "format test script"
         vars:
           format_test_script_args: -R -t 360
@@ -1067,9 +1118,6 @@ tasks:
           test_config: debug_mode=(cursor_copy=true)
           test_config_filename: configs/operations_test_default.txt
           test_name: operations_test
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/operations_test
 
   - name: cppsuite-hs-cleanup-default
     tags: ["pull_request"]
@@ -1082,9 +1130,18 @@ tasks:
           test_config: debug_mode=(cursor_copy=true)
           test_config_filename: configs/hs_cleanup_default.txt
           test_name: hs_cleanup
-      - func: "upload test stats"
+
+  - name: cppsuite-burst-inserts-default
+    tags: ["pull_request"]
+    depends_on:
+      - name: compile
+    commands:
+      - func: "fetch artifacts"
+      - func: "cppsuite test"
         vars:
-          test_path: test/cppsuite/hs_cleanup
+          test_config: debug_mode=(cursor_copy=true)
+          test_config_filename: configs/burst_inserts_default.txt
+          test_name: burst_inserts
 
   - name: cppsuite-search-near-01-default
     tags: ["pull_request"]
@@ -1096,9 +1153,6 @@ tasks:
         vars:
           test_config_filename: configs/search_near_01_default.txt
           test_name: search_near_01
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/search_near_01
 
   - name: cppsuite-search-near-02-default
     tags: ["pull_request"]
@@ -1110,9 +1164,6 @@ tasks:
         vars:
           test_config_filename: configs/search_near_02_default.txt
           test_name: search_near_02
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/search_near_02
 
   - name: cppsuite-search-near-03-default
     tags: ["pull_request"]
@@ -1124,9 +1175,6 @@ tasks:
         vars:
           test_config_filename: configs/search_near_03_default.txt
           test_name: search_near_03
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/search_near_03
 
   - name: cppsuite-bounded-cursor-perf-default
     tags: ["pull_request"]
@@ -1148,9 +1196,6 @@ tasks:
         vars:
           test_config_filename: configs/operations_test_stress.txt
           test_name: operations_test
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/operations_test
 
   - name: cppsuite-hs-cleanup-stress
     depends_on:
@@ -1161,9 +1206,16 @@ tasks:
         vars:
           test_config_filename: configs/hs_cleanup_stress.txt
           test_name: hs_cleanup
-      - func: "upload test stats"
+
+  - name: cppsuite-burst-inserts-stress
+    depends_on:
+      - name: compile
+    commands:
+      - func: "fetch artifacts"
+      - func: "cppsuite test"
         vars:
-          test_path: test/cppsuite/hs_cleanup
+          test_config_filename: configs/burst_inserts_stress.txt
+          test_name: burst_inserts
 
   - name: cppsuite-search-near-01-stress
     depends_on:
@@ -1174,9 +1226,6 @@ tasks:
         vars:
           test_config_filename: configs/search_near_01_stress.txt
           test_name: search_near_01
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/search_near_01
 
   - name: cppsuite-search-near-02-stress
     depends_on:
@@ -1187,9 +1236,6 @@ tasks:
         vars:
           test_config_filename: configs/search_near_02_stress.txt
           test_name: search_near_02
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/search_near_02
 
   - name: cppsuite-search-near-03-stress
     depends_on:
@@ -1200,9 +1246,6 @@ tasks:
         vars:
           test_config_filename: configs/search_near_03_stress.txt
           test_name: search_near_03
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/search_near_03
 
   - name: cppsuite-bounded-cursor-perf-stress
     depends_on:
@@ -1213,9 +1256,6 @@ tasks:
         vars:
           test_config_filename: configs/bounded_cursor_perf_stress.txt
           test_name: bounded_cursor_perf
-      - func: "upload test stats"
-        vars:
-          test_path: test/cppsuite/bounded_cursor_perf
 
   # End of cppsuite test tasks.
   # Start of csuite test tasks
@@ -2753,6 +2793,7 @@ tasks:
             -DCMAKE_C_FLAGS="-ggdb"
             -DENABLE_PYTHON=1
             -DWITH_PIC=1
+            -DENABLE_STRICT=1
       - command: shell.exec
         params:
           working_dir: "wiredtiger/bench/workgen/runner"
@@ -2882,9 +2923,9 @@ tasks:
     exec_timeout_secs: 25200
     commands:
       - func: "get project"
-      - func: "compile wiredtiger"
+      - func: "compile wiredtiger address sanitizer"
         vars: 
-          posix_configure_flags: -DENABLE_STRICT=1 -DHAVE_DIAGNOSTIC=1 -DNON_BARRIER_DIAGNOSTIC_YIELDS=1 -DHAVE_BUILTIN_EXTENSION_LZ4=1 -DHAVE_BUILTIN_EXTENSION_SNAPPY=1 -DHAVE_BUILTIN_EXTENSION_ZLIB=1 -DHAVE_BUILTIN_EXTENSION_ZSTD=1
+          NON_BARRIER_DIAGNOSTIC_YIELDS: -DNON_BARRIER_DIAGNOSTIC_YIELDS=1
       - func: "format test script"
         vars:
           format_test_script_args: -R -t 360
@@ -2959,6 +3000,7 @@ tasks:
             ./upload-results-atlas.py ${atlas_wt_perf_test_user} ${atlas_wt_perf_pass} wt-perf-tests many-collection-test ${branch_name} $res_dir/results/results.json
 
   - name: cyclomatic-complexity
+    tags: ["pull_request"]
     commands:
       - func: "get project"
       - command: shell.exec
@@ -2966,6 +3008,7 @@ tasks:
           working_dir: "wiredtiger"
           shell: bash
           script: |
+            t=__wt.$$
             set -o verbose
 
             # Install Metrix++, ensuring it is outside the 'src' directory
@@ -2980,9 +3023,15 @@ tasks:
             # Set the cyclomatic complexity limit to 20
             python "../metrixplusplus/metrix++.py" limit --max-limit=std.code.complexity:cyclomatic:20
 
-            # Fail if there are functions with cyclomatic complexity larger than 95
-            set -o errexit
-            python "../metrixplusplus/metrix++.py" limit --max-limit=std.code.complexity:cyclomatic:95
+            # Fail if there are functions with cyclomatic complexity larger than 91
+            python "../metrixplusplus/metrix++.py" limit --max-limit=std.code.complexity:cyclomatic:91 > $t
+            if grep -q 'exceeds' $t; then 
+                echo "[ERROR]:complexity:cyclomatic: Complexity limit exceeded."
+                cat $t
+                echo "[ERROR]:complexity:cyclomatic: Finished " && rm $t && exit 1
+            else
+                cat $t && rm $t 
+            fi
 
     #############################
     # Performance Tests for lsm #
@@ -4189,9 +4238,9 @@ buildvariants:
 
 - name: cppsuite-stress-tests
   display_name: "Cppsuite Stress Tests"
-  batchtime: 480 # 3 times a day
+  batchtime: 960 # Twice a day
   run_on:
-  - ubuntu2004-test
+  - ubuntu2004-medium
   expansions:
     test_env_vars:
       WT_TOPDIR=$(git rev-parse --show-toplevel)
@@ -4212,6 +4261,7 @@ buildvariants:
   tasks:
     - name: compile
     - name: cppsuite-operations-test-stress
+    - name: cppsuite-burst-inserts-stress
     - name: cppsuite-hs-cleanup-stress
     - name: cppsuite-search-near-01-stress
     - name: cppsuite-search-near-02-stress
diff --git a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
index 259a9c82c18..3adcf139075 100755
--- a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
+++ b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
@@ -820,10 +820,6 @@ if [ "$newer" = true ]; then
         [[ $((i+1)) < ${#newer_release_branches[@]} ]] && \
         (verify_test_format ${newer_release_branches[$((i+1))]} ${newer_release_branches[$i]} "row" false)
     done
-    for i in ${!test_checkpoint_release_branches[@]}; do
-        [[ $((i+1)) < ${#test_checkpoint_release_branches[@]} ]] && \
-        (verify_test_checkpoint ${test_checkpoint_release_branches[$((i+1))]} ${test_checkpoint_release_branches[$i]} "row")
-    done
 fi
 
 # Upgrade/downgrade testing for supported access methods.
diff --git a/src/third_party/wiredtiger/test/suite/test_alter03.py b/src/third_party/wiredtiger/test/suite/test_alter03.py
index dcc5c6062a1..e62b8f999bb 100644
--- a/src/third_party/wiredtiger/test/suite/test_alter03.py
+++ b/src/third_party/wiredtiger/test/suite/test_alter03.py
@@ -63,11 +63,6 @@ class test_alter03(TieredConfigMixin, wttest.WiredTigerTestCase):
             # should contain the provided file meta string.
             if self.is_tiered_scenario():
                 c.set_key('file:' + self.name + '-0000000001.wtobj')
-                
-                # Removing quotes wrapping app metadata value just to make the test pass.
-                # FIXME: WT-9036
-                if (file_metastr == 'app_metadata="meta_data_1",'):
-                    file_metastr = 'app_metadata=meta_data_1,'
             else:
                 c.set_key('file:' + self.name + '.wt')
 
diff --git a/src/third_party/wiredtiger/test/suite/test_checkpoint25.py b/src/third_party/wiredtiger/test/suite/test_checkpoint25.py
index fb13ef84d81..aef9abeabbf 100644
--- a/src/third_party/wiredtiger/test/suite/test_checkpoint25.py
+++ b/src/third_party/wiredtiger/test/suite/test_checkpoint25.py
@@ -103,7 +103,6 @@ class test_checkpoint(wttest.WiredTigerTestCase):
         if ts is not None:
             cfg += ',debug=(checkpoint_read_timestamp=' + self.timestamp_str(ts) + ')'
         cursor = self.session.open_cursor(ds.uri, None, cfg)
-        #self.session.begin_transaction()
         count = 0
         zcount = 0
         for k, v in cursor:
@@ -112,7 +111,6 @@ class test_checkpoint(wttest.WiredTigerTestCase):
             else:
                 self.assertEqual(v, value)
                 count += 1
-        #self.session.rollback_transaction()
         self.assertEqual(count, nrows)
         self.assertEqual(zcount, zeros if self.value_format == '8t' else 0)
         cursor.close()
diff --git a/src/third_party/wiredtiger/test/suite/test_compat01.py b/src/third_party/wiredtiger/test/suite/test_compat01.py
index 3979693a16a..082246baa25 100644
--- a/src/third_party/wiredtiger/test/suite/test_compat01.py
+++ b/src/third_party/wiredtiger/test/suite/test_compat01.py
@@ -53,6 +53,7 @@ class test_compat01(wttest.WiredTigerTestCase, suite_subprocess):
     # and without the patch number.  Test both.
     start_compat = [
         ('def', dict(compat1='none', logv1=5)),
+        ('110', dict(compat1='11.0', logv1=5)),
         ('100', dict(compat1='10.0', logv1=5)),
         ('33', dict(compat1='3.3', logv1=4)),
         ('32', dict(compat1='3.2', logv1=3)),
@@ -65,6 +66,7 @@ class test_compat01(wttest.WiredTigerTestCase, suite_subprocess):
     ]
     restart_compat = [
         ('def2', dict(compat2='none', logv2=5)),
+        ('110_2', dict(compat2='11.0', logv2=5)),
         ('100_2', dict(compat2='10.0', logv2=5)),
         ('33_2', dict(compat2='3.3', logv2=4)),
         ('32_2', dict(compat2='3.2', logv2=3)),
diff --git a/src/third_party/wiredtiger/test/suite/test_compat02.py b/src/third_party/wiredtiger/test/suite/test_compat02.py
index 8f4fee9569d..3ed1c9c1457 100644
--- a/src/third_party/wiredtiger/test/suite/test_compat02.py
+++ b/src/third_party/wiredtiger/test/suite/test_compat02.py
@@ -57,6 +57,7 @@ class test_compat02(wttest.WiredTigerTestCase, suite_subprocess):
 
     compat_create = [
         ('def', dict(create_rel='none', log_create=5)),
+        ('110', dict(create_rel="11.0", log_create=5)),
         ('100', dict(create_rel="10.0", log_create=5)),
         ('33', dict(create_rel="3.3", log_create=4)),
         ('32', dict(create_rel="3.2", log_create=3)),
@@ -67,6 +68,7 @@ class test_compat02(wttest.WiredTigerTestCase, suite_subprocess):
 
     compat_release = [
         ('def_rel', dict(rel='none', log_rel=5)),
+        ('110_rel', dict(rel="11.0", log_rel=5)),
         ('100_rel', dict(rel="10.0", log_rel=5)),
         ('33_rel', dict(rel="3.3", log_rel=4)),
         ('32_rel', dict(rel="3.2", log_rel=3)),
@@ -85,7 +87,7 @@ class test_compat02(wttest.WiredTigerTestCase, suite_subprocess):
     compat_max = [
         ('future_max', dict(max_req=future_rel, log_max=future_logv)),
         ('def_max', dict(max_req='none', log_max=5)),
-        ('100_max', dict(max_req="10.0", log_max=5)),
+        ('110_max', dict(max_req="11.0", log_max=5)),
         ('33_max', dict(max_req="3.3", log_max=4)),
         ('32_max', dict(max_req="3.2", log_max=3)),
         ('30_max', dict(max_req="3.0", log_max=2)),
diff --git a/src/third_party/wiredtiger/test/suite/test_compat03.py b/src/third_party/wiredtiger/test/suite/test_compat03.py
index fcbc7be2cda..71a21ec22c4 100644
--- a/src/third_party/wiredtiger/test/suite/test_compat03.py
+++ b/src/third_party/wiredtiger/test/suite/test_compat03.py
@@ -58,6 +58,7 @@ class test_compat03(wttest.WiredTigerTestCase, suite_subprocess):
     compat_release = [
         ('future_rel', dict(rel=future_rel, log_rel=future_logv)),
         ('def_rel', dict(rel='none', log_rel=5)),
+        ('110_rel', dict(rel="11.0", log_rel=5)),
         ('100_rel', dict(rel="10.0", log_rel=5)),
         ('33_rel', dict(rel="3.3", log_rel=4)),
         ('32_rel', dict(rel="3.2", log_rel=3)),
@@ -76,7 +77,7 @@ class test_compat03(wttest.WiredTigerTestCase, suite_subprocess):
     compat_max = [
         ('future_max', dict(max_req=future_rel, log_max=future_logv)),
         ('def_max', dict(max_req='none', log_max=5)),
-        ('100_max', dict(max_req="10.0", log_max=5)),
+        ('110_max', dict(max_req="11.0", log_max=5)),
         ('33_max', dict(max_req="3.3", log_max=4)),
         ('32_max', dict(max_req="3.2", log_max=3)),
         ('30_max', dict(max_req="3.0", log_max=2)),
diff --git a/src/third_party/wiredtiger/test/suite/test_compat04.py b/src/third_party/wiredtiger/test/suite/test_compat04.py
index ea393f03ee9..9d59aff4b6d 100644
--- a/src/third_party/wiredtiger/test/suite/test_compat04.py
+++ b/src/third_party/wiredtiger/test/suite/test_compat04.py
@@ -54,6 +54,7 @@ class test_compat04(wttest.WiredTigerTestCase, suite_subprocess):
     #
     create_release = [
         ('def_rel', dict(create_rel='none', log_crrel=5)),
+        ('110_rel', dict(create_rel="11.0", log_crrel=5)),
         ('100_rel', dict(create_rel="10.0", log_crrel=5)),
         ('33_rel', dict(create_rel="3.3", log_crrel=4)),
         ('32_rel', dict(create_rel="3.2", log_crrel=3)),
@@ -62,6 +63,7 @@ class test_compat04(wttest.WiredTigerTestCase, suite_subprocess):
         ('26_rel', dict(create_rel="2.6", log_crrel=1)),
     ]
     reconfig_release = [
+        ('110_rel', dict(rel="11.0", log_rel=5)),
         ('100_rel', dict(rel="10.0", log_rel=5)),
         ('33_rel', dict(rel="3.3", log_rel=4)),
         ('32_rel', dict(rel="3.2", log_rel=3)),
diff --git a/src/third_party/wiredtiger/test/suite/test_cursor21.py b/src/third_party/wiredtiger/test/suite/test_cursor21.py
new file mode 100644
index 00000000000..e508e7091de
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_cursor21.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_cursor21.py
+#   Test cursor reposition
+
+import wttest
+from wtscenario import make_scenarios
+from wiredtiger import stat
+
+class test_cursor21(wttest.WiredTigerTestCase):
+    uri = "table:test_cursor21"
+
+    format_values = [
+        ('column', dict(key_format='r', value_format='i')),
+        ('row_integer', dict(key_format='i', value_format='i')),
+    ]
+    reposition_values = [
+        ('no_reposition', dict(reposition=False)),
+        ('reposition', dict(reposition=True))
+    ]
+    scenarios = make_scenarios(format_values, reposition_values)
+
+    def conn_config(self):
+        config='cache_size=100MB,statistics=(all)'
+        if self.reposition:
+            config += ',debug_mode=[cursor_reposition=true],timing_stress_for_test=(evict_reposition)'
+        return config
+
+    def get_stat(self, stat, local_session = None):
+        if (local_session != None):
+            stat_cursor = local_session.open_cursor('statistics:')
+        else:
+            stat_cursor = self.session.open_cursor('statistics:')
+        val = stat_cursor[stat][2]
+        stat_cursor.close()
+        return val
+
+    def check_reposition(self, count):
+        reposition_count = self.get_stat(stat.conn.cursor_reposition, self.session)
+        if self.reposition:
+            count = reposition_count - count
+            # Ensure that the reposition stat is greater than 0, indicating that repositon happened.
+            self.assertGreater(count, 0)
+        else:
+            self.assertEqual(reposition_count, 0)
+        return reposition_count
+
+    def test_cursor21(self):
+        format = 'key_format={},value_format={}'.format(self.key_format, self.value_format)
+        reposition_count = 0
+        self.session.create(self.uri, format)
+        cursor = self.session.open_cursor(self.uri)
+
+        # insert
+        self.session.begin_transaction()
+        for i in range(1, 10000):
+            cursor[i] = i
+        self.session.commit_transaction()
+
+        # next
+        self.session.begin_transaction()
+        for i in range(1, 10000):
+            cursor.next()
+            self.assertEqual(cursor.get_value(), i)
+        self.session.commit_transaction()
+
+        reposition_count = self.check_reposition(reposition_count)
+        cursor.reset()
+
+        # prev
+        self.session.begin_transaction()
+        for i in range(9999, 0, -1):
+            cursor.prev()
+            self.assertEqual(cursor.get_value(), i)
+        self.session.commit_transaction()
+
+        reposition_count = self.check_reposition(reposition_count)
+        cursor.reset()
+
+        # search
+        self.session.begin_transaction()
+        for i in range(1, 10000):
+            cursor.set_key(i)
+            cursor.search()
+            self.assertEqual(cursor.get_value(), i)
+        self.session.commit_transaction()
+
+        reposition_count = self.check_reposition(reposition_count)
+        cursor.reset()
+
+        # search_near
+        self.session.begin_transaction()
+        for i in range(1, 10000):
+            cursor.set_key(i)
+            cursor.search_near()
+            self.assertEqual(cursor.get_value(), i)
+        self.session.commit_transaction()
+
+        reposition_count += self.check_reposition(reposition_count)
+        cursor.close()
+        self.session.close()
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable36.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable36.py
new file mode 100644
index 00000000000..2530323b827
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable36.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wttest
+from helper import simulate_crash_restart
+from wiredtiger import stat, WiredTigerError, wiredtiger_strerror, WT_NOTFOUND, WT_ROLLBACK
+from wtdataset import SimpleDataSet
+from wtscenario import make_scenarios
+
+# test_rollback_to_stable36.py
+#
+# Check the behavior of a fast-truncated page where the truncation is not stable but
+# everything else on the page is.
+
+class test_rollback_to_stable36(wttest.WiredTigerTestCase):
+    conn_config = 'statistics=(all)'
+    session_config = 'isolation=snapshot'
+
+    # Hook to run using remove instead of truncate for reference. This should not alter the
+    # behavior... but may if things are broken. Disable the reference version by default as it's
+    # only useful when investigating behavior changes. This list is first in the make_scenarios
+    # call so the additional cases don't change the scenario numbering.
+    trunc_values = [
+        ('truncate', dict(trunc_with_remove=False)),
+        #('remove', dict(trunc_with_remove=True)),
+    ]
+    format_values = [
+        ('column', dict(key_format='r', value_format='S', extraconfig='')),
+        ('column_fix', dict(key_format='r', value_format='8t',
+            extraconfig=',allocation_size=512,leaf_page_max=512')),
+        ('integer_row', dict(key_format='i', value_format='S', extraconfig='')),
+    ]
+    rollback_modes = [
+        ('runtime', dict(crash=False)),
+        ('recovery', dict(crash=True)),
+    ]
+    scenarios = make_scenarios(trunc_values, format_values, rollback_modes)
+
+    def truncate(self, uri, make_key, keynum1, keynum2):
+        if self.trunc_with_remove:
+            cursor = self.session.open_cursor(uri)
+            err = 0
+            for k in range(keynum1, keynum2 + 1):
+                cursor.set_key(k)
+                try:
+                    err = cursor.remove()
+                except WiredTigerError as e:
+                    if wiredtiger_strerror(WT_ROLLBACK) in str(e):
+                        err = WT_ROLLBACK
+                    else:
+                        raise e
+                if err != 0:
+                    break
+            cursor.close()
+        else:
+            lo_cursor = self.session.open_cursor(uri)
+            hi_cursor = self.session.open_cursor(uri)
+            lo_cursor.set_key(make_key(keynum1))
+            hi_cursor.set_key(make_key(keynum2))
+            try:
+                err = self.session.truncate(None, lo_cursor, hi_cursor, None)
+            except WiredTigerError as e:
+                if wiredtiger_strerror(WT_ROLLBACK) in str(e):
+                    err = WT_ROLLBACK
+                else:
+                    raise e
+            lo_cursor.close()
+            hi_cursor.close()
+        return err
+
+    def check(self, ds, value, nrows, ts):
+        cursor = self.session.open_cursor(ds.uri)
+        self.session.begin_transaction('read_timestamp=' + self.timestamp_str(ts))
+        count = 0
+        for k, v in cursor:
+            self.assertEqual(v, value)
+            count += 1
+        self.assertEqual(count, nrows)
+        self.session.rollback_transaction()
+        cursor.close()
+
+    def test_rollback_to_stable36(self):
+        nrows = 1000
+
+        # Create a table.
+        uri = "table:rollback_to_stable36"
+        ds = SimpleDataSet(
+            self, uri, 0, key_format=self.key_format, value_format=self.value_format,
+            config=self.extraconfig)
+        ds.populate()
+
+        if self.value_format == '8t':
+            value_a = 97
+        else:
+            value_a = "aaaaa" * 100
+
+        # Pin oldest and stable timestamps to 1.
+        self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1) +
+            ',stable_timestamp=' + self.timestamp_str(1))
+
+        # Write some baseline data to table 1 at time 10.
+        cursor1 = self.session.open_cursor(ds.uri)
+        self.session.begin_transaction()
+        for i in range(1, nrows + 1):
+            cursor1[ds.key(i)] = value_a
+            if i % 109 == 0:
+                self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(10))
+                self.session.begin_transaction()
+        self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(10))
+        cursor1.close()
+
+        # Mark it stable.
+        self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(10))
+
+        # Reopen the connection so nothing is in memory and we can fast-truncate.
+        self.reopen_conn()
+
+        # Truncate most of the table.
+        # Commit the truncate at time 20.
+        self.session.begin_transaction()
+        err = self.truncate(ds.uri, ds.key, 50, nrows - 50)
+        self.assertEqual(err, 0)
+        self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20))
+
+        # Make sure we did at least one fast-delete. For columns, there's no fast-delete
+        # support (yet) so assert we didn't.
+        stat_cursor = self.session.open_cursor('statistics:', None, None)
+        fastdelete_pages = stat_cursor[stat.conn.rec_page_delete_fast][2]
+        if self.key_format == 'r' or self.trunc_with_remove:
+            self.assertEqual(fastdelete_pages, 0)
+        else:
+            self.assertGreater(fastdelete_pages, 0)
+        stat_cursor.close()
+
+        # Checkpoint.
+        self.session.checkpoint()
+
+        # Roll back, either via crashing or by explicit RTS.
+        if self.crash:
+            simulate_crash_restart(self, ".", "RESTART")
+        else:
+            self.conn.rollback_to_stable()
+
+        # Currently rolling back a fast-truncate works by instantiating the pages and
+        # rolling back the instantiated updates, so we should see some page instantiations.
+        # (But again, not for columns, yet.)
+        stat_cursor = self.session.open_cursor('statistics:', None, None)
+        read_deleted = stat_cursor[stat.conn.cache_read_deleted][2]
+        if self.key_format == 'r' or self.trunc_with_remove:
+            self.assertEqual(read_deleted, 0)
+        else:
+            self.assertGreater(read_deleted, 0)
+        stat_cursor.close()
+
+        # Validate the data; we should see all of it, since the truncations weren't stable.
+        self.check(ds, value_a, nrows, 15)
+        self.check(ds, value_a, nrows, 25)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_tiered02.py b/src/third_party/wiredtiger/test/suite/test_tiered02.py
index 2ff667ee946..2844bd32484 100755
--- a/src/third_party/wiredtiger/test/suite/test_tiered02.py
+++ b/src/third_party/wiredtiger/test/suite/test_tiered02.py
@@ -36,10 +36,7 @@ from wtscenario import make_scenarios
 class test_tiered02(wttest.WiredTigerTestCase, TieredConfigMixin):
     complex_dataset = [
         ('simple_ds', dict(complex_dataset=False)),
-        
-        # Commented out complex dataset that tests column groups and indexes because it crashes
-        # in the middle of the test. FIXME: WT-9001
-        #('complex_ds', dict(complex_dataset=True)),
+        ('complex_ds', dict(complex_dataset=True)),
     ]
 
     # Make scenarios for different cloud service providers
@@ -123,7 +120,10 @@ class test_tiered02(wttest.WiredTigerTestCase, TieredConfigMixin):
         self.progress('Create simple data set (50)')
         ds = self.get_dataset(50)
         self.progress('populate')
-        ds.populate()
+        # Don't (re)create any of the tables or indices from here on out.
+        # We will keep a cursor open on the table, and creation requires
+        # exclusive access.
+        ds.populate(create=False)
         ds.check()
         self.progress('open extra cursor on ' + self.uri)
         cursor = self.session.open_cursor(self.uri, None, None)
@@ -138,7 +138,7 @@ class test_tiered02(wttest.WiredTigerTestCase, TieredConfigMixin):
         self.progress('Create simple data set (100)')
         ds = self.get_dataset(100)
         self.progress('populate')
-        ds.populate()
+        ds.populate(create=False)
         ds.check()
         self.progress('checkpoint')
         self.session.checkpoint()
@@ -149,7 +149,7 @@ class test_tiered02(wttest.WiredTigerTestCase, TieredConfigMixin):
         self.progress('Create simple data set (200)')
         ds = self.get_dataset(200)
         self.progress('populate')
-        ds.populate()
+        ds.populate(create=False)
         ds.check()
         cursor.close()
         self.progress('close_conn')
@@ -166,7 +166,7 @@ class test_tiered02(wttest.WiredTigerTestCase, TieredConfigMixin):
         self.progress('Create simple data set (300)')
         ds = self.get_dataset(300)
         self.progress('populate')
-        ds.populate()
+        ds.populate(create=False)
         ds.check()
 
         # We haven't done a flush so there should be
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp20.py b/src/third_party/wiredtiger/test/suite/test_timestamp20.py
index 896b1052c0c..e1bfc6d0e0d 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp20.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp20.py
@@ -107,7 +107,10 @@ class test_timestamp20(wttest.WiredTigerTestCase):
             cursor[self.get_key(i)] = value5
             self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(40))
 
+        # Run a checkpoint to clean the tree to ensure the pages are evictable
+        self.session.checkpoint()
         self.evict(uri)
+
         self.session.begin_transaction('read_timestamp=' + self.timestamp_str(30))
         for i in range(1, 10000):
             self.assertEqual(cursor[self.get_key(i)], value4)
@@ -182,7 +185,10 @@ class test_timestamp20(wttest.WiredTigerTestCase):
             cursor[self.get_key(i)] = value3
             self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(50))
 
+        # Run a checkpoint to clean the tree to ensure the pages are evictable
+        self.session.checkpoint()
         self.evict(uri)
+
         # Open up a new transaction and read at 30.
         # We shouldn't be able to see past no timestamp due to txnid visibility.
         self.session.begin_transaction('read_timestamp=' + self.timestamp_str(30))
diff --git a/src/third_party/wiredtiger/test/suite/test_truncate12.py b/src/third_party/wiredtiger/test/suite/test_truncate12.py
new file mode 100644
index 00000000000..b8c33433ace
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_truncate12.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wttest
+from helper import simulate_crash_restart
+from wiredtiger import stat, WiredTigerError, wiredtiger_strerror, WT_NOTFOUND, WT_ROLLBACK
+from wtdataset import SimpleDataSet
+from wtscenario import make_scenarios
+
+# test_truncate12.py
+#
+# Make sure that transaction IDs on truncates are handled properly after recovery,
+# even if the truncate information is loaded during recovery and stays in cache.
+#
+# This version uses timestamps and no logging.
+
+class test_truncate12(wttest.WiredTigerTestCase):
+    conn_config = 'statistics=(all)'
+    session_config = 'isolation=snapshot'
+
+    # Hook to run using remove instead of truncate for reference. This should not alter the
+    # behavior... but may if things are broken. Disable the reference version by default as it's
+    # only useful when investigating behavior changes. This list is first in the make_scenarios
+    # call so the additional cases don't change the scenario numbering.
+    trunc_values = [
+        ('truncate', dict(trunc_with_remove=False)),
+        #('remove', dict(trunc_with_remove=True)),
+    ]
+    format_values = [
+        ('column', dict(key_format='r', value_format='S', extraconfig='')),
+        ('column_fix', dict(key_format='r', value_format='8t',
+            extraconfig=',allocation_size=512,leaf_page_max=512')),
+        ('integer_row', dict(key_format='i', value_format='S', extraconfig='')),
+    ]
+    scenarios = make_scenarios(trunc_values, format_values)
+
+    def truncate(self, uri, make_key, keynum1, keynum2):
+        if self.trunc_with_remove:
+            cursor = self.session.open_cursor(uri)
+            err = 0
+            for k in range(keynum1, keynum2 + 1):
+                cursor.set_key(k)
+                try:
+                    err = cursor.remove()
+                except WiredTigerError as e:
+                    if wiredtiger_strerror(WT_ROLLBACK) in str(e):
+                        err = WT_ROLLBACK
+                    else:
+                        raise e
+                if err != 0:
+                    break
+            cursor.close()
+        else:
+            lo_cursor = self.session.open_cursor(uri)
+            hi_cursor = self.session.open_cursor(uri)
+            lo_cursor.set_key(make_key(keynum1))
+            hi_cursor.set_key(make_key(keynum2))
+            try:
+                err = self.session.truncate(None, lo_cursor, hi_cursor, None)
+            except WiredTigerError as e:
+                if wiredtiger_strerror(WT_ROLLBACK) in str(e):
+                    err = WT_ROLLBACK
+                else:
+                    raise e
+            lo_cursor.close()
+            hi_cursor.close()
+        return err
+
+    def check(self, ds, cursor, value, keep, nrows):
+        def expect(lo, hi):
+            for i in range(lo, hi):
+                self.assertEqual(cursor[ds.key(i)], value)
+        def expectNone(lo, hi):
+            for i in range(lo, hi):
+                cursor.set_key(ds.key(i))
+                if self.value_format == '8t' and i <= nrows:
+                    # In FLCS, deleted values read back as zero. Except past end-of-table.
+                    self.assertEqual(cursor.search(), 0)
+                    self.assertEqual(cursor.get_value(), 0)
+                else:
+                    self.assertEqual(cursor.search(), WT_NOTFOUND)
+
+        # Expect 1..keep+1 to have values, and the rest not.
+        expect(1, keep + 1)
+        expectNone(keep + 1, nrows + 1)
+
+    def test_truncate12(self):
+        nrows = 5000
+        keep_rows = 5
+
+        # Create two tables.
+        uri1 = "table:truncate12a"
+        uri2 = "table:truncate12b"
+        ds1 = SimpleDataSet(
+            self, uri1, 0, key_format=self.key_format, value_format=self.value_format,
+            config=self.extraconfig)
+        ds2 = SimpleDataSet(
+            self, uri2, 0, key_format=self.key_format, value_format=self.value_format,
+            config=self.extraconfig)
+        ds1.populate()
+        ds2.populate()
+
+        if self.value_format == '8t':
+            value_a = 97
+            value_b = 98
+            value_small = 42
+        else:
+            value_a = "aaaaa" * 100
+            value_b = "bbbbb" * 100
+            value_small = "***"
+
+        # Pin oldest and stable timestamps to 1.
+        self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1) +
+            ',stable_timestamp=' + self.timestamp_str(1))
+
+        # Write some baseline data to table 1 at time 10.
+        cursor1 = self.session.open_cursor(ds1.uri)
+        self.session.begin_transaction()
+        for i in range(1, nrows + 1):
+            cursor1[ds1.key(i)] = value_a
+            if i % 480 == 0:
+                self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(10))
+                self.session.begin_transaction()
+        self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(10))
+        cursor1.close()
+
+        # Mark it stable.
+        self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(10))
+
+        # Reopen the connection so nothing is in memory and we can fast-truncate.
+        self.reopen_conn()
+
+        # Write a lot of rubbish to table 2 to cycle through transaction IDs.
+        # Do this at time 20.
+        cursor2 = self.session.open_cursor(ds2.uri)
+        for i in range(1, nrows + 1):
+            self.session.begin_transaction()
+            cursor2[ds2.key(i)] = value_small
+            self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20))
+
+        # Truncate all of table 1 except for the first few keys.
+        # Commit the truncate at time 30.
+        self.session.begin_transaction()
+        err = self.truncate(ds1.uri, ds1.key, keep_rows + 1, nrows)
+        self.assertEqual(err, 0)
+        self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(30))
+
+        # Make sure we did at least one fast-delete. For columns, there's no fast-delete
+        # support (yet) so assert we didn't.
+        stat_cursor = self.session.open_cursor('statistics:', None, None)
+        fastdelete_pages = stat_cursor[stat.conn.rec_page_delete_fast][2]
+        if self.key_format == 'r' or self.trunc_with_remove:
+            self.assertEqual(fastdelete_pages, 0)
+        else:
+            self.assertGreater(fastdelete_pages, 0)
+
+        # Now update the values we left behind, at time 40.
+        cursor1 = self.session.open_cursor(ds1.uri)
+        self.session.begin_transaction()
+        for i in range(1, keep_rows + 1):
+            cursor1[ds1.key(i)] = value_b
+        self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(40))
+        cursor1.close()
+
+        # Doing that should not have instantiated any deleted pages.
+        stat_cursor = self.session.open_cursor('statistics:', None, None)
+        read_deleted = stat_cursor[stat.conn.cache_read_deleted][2]
+        self.assertEqual(read_deleted, 0)
+        stat_cursor.close()
+
+        # Advance stable to 35. We'll be rolling back the updated keys but not the truncate.
+        self.conn.set_timestamp('stable_timestamp=' + self.timestamp_str(35))
+
+        # Checkpoint so the truncate gets written out. We're interested in transaction ID
+        # handling across database runs, so we need it all out on disk.
+        self.session.checkpoint('name=pointy')
+
+        # Now crash. It's important to do this (and not just reopen) so the unstable material
+        # gets rolled back during recovery in startup and not by the shutdown-time RTS.
+        simulate_crash_restart(self, ".", "RESTART")
+
+        # Recovery should not have instantiated any deleted pages. But it should have loaded
+        # the first internal page, which should contain at least a few deleted pages.
+        stat_cursor = self.session.open_cursor('statistics:', None, None)
+        read_deleted = stat_cursor[stat.conn.cache_read_deleted][2]
+        self.assertEqual(read_deleted, 0)
+        stat_cursor.close()
+
+        # Validate the data. Because we cranked forward the transaction IDs, the truncate
+        # transactions should have large transaction IDs and if we mishandle the write
+        # generation because the internal pages were loaded during RTS, the truncates won't
+        # be visible.
+        cursor1 = self.session.open_cursor(ds1.uri)
+        self.session.begin_transaction('read_timestamp=' + self.timestamp_str(50))
+        self.check(ds1, cursor1, value_a, keep_rows, nrows)
+        self.session.rollback_transaction()
+        cursor1.close()
+
+        # For good measure, validate the data in the checkpoint we wrote as well.
+        # (This isn't part of the primary goal of this test but is fast and doesn't hurt.)
+        pointy_cursor = self.session.open_cursor(ds1.uri, None, "checkpoint=pointy")
+        self.check(ds1, pointy_cursor, value_a, keep_rows, nrows)
+        pointy_cursor.close()
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/wtdataset.py b/src/third_party/wiredtiger/test/suite/wtdataset.py
index 7839cff8a04..d3a2cc21d79 100755
--- a/src/third_party/wiredtiger/test/suite/wtdataset.py
+++ b/src/third_party/wiredtiger/test/suite/wtdataset.py
@@ -64,19 +64,21 @@ class BaseDataSet(object):
     def fill(self):
         self.store_range(1, self.rows)
 
-    def postfill(self):
+    def postfill_create(self):
         pass
 
     @classmethod
     def is_lsm(cls):
         return False
 
-    def populate(self):
+    def populate(self, create=True):
         self.testcase.pr('populate: ' + self.uri + ' with '
                          + str(self.rows) + ' rows')
-        self.create()
+        if create:
+            self.create()
         self.fill()
-        self.postfill()
+        if create:
+            self.postfill_create()
 
     # Create a key for a Simple or Complex data set.
     @staticmethod
@@ -261,7 +263,7 @@ class ComplexDataSet(BaseDataSet):
             session.create('index:' + tablepart + index[0],
                            ',columns=(' + index[1] + '),' + self.config)
 
-    def postfill(self):
+    def postfill_create(self):
         # add some indices after filling the table
         tablepart = self.uri.split(":")[1] + ':'
         session = self.testcase.session
diff --git a/src/third_party/wiredtiger/test/utility/parse_opts.c b/src/third_party/wiredtiger/test/utility/parse_opts.c
index b92b8c5f185..98eabe89b7e 100644
--- a/src/third_party/wiredtiger/test/utility/parse_opts.c
+++ b/src/third_party/wiredtiger/test/utility/parse_opts.c
@@ -49,11 +49,14 @@ testutil_parse_opts(int argc, char *const *argv, TEST_OPTS *opts)
 
     testutil_print_command_line(argc, argv);
 
-    while ((ch = __wt_getopt(opts->progname, argc, argv, "A:b:dh:n:o:pR:T:t:vW:")) != EOF)
+    while ((ch = __wt_getopt(opts->progname, argc, argv, "A:Bb:dh:n:o:pR:T:t:vW:")) != EOF)
         switch (ch) {
         case 'A': /* Number of append threads */
             opts->n_append_threads = (uint64_t)atoll(__wt_optarg);
             break;
+        case 'B': /* Use tiered storage objects and buckets. */
+            opts->tiered = true;
+            break;
         case 'b': /* Build directory */
             opts->build_dir = dstrdup(__wt_optarg);
             break;
diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h
index 1ae875865ce..2bab5a89532 100644
--- a/src/third_party/wiredtiger/test/utility/test_util.h
+++ b/src/third_party/wiredtiger/test/utility/test_util.h
@@ -65,9 +65,10 @@ typedef struct {
     FILE *progress_fp; /* Progress tracking file */
     char *progress_file_name;
 
+    bool do_data_ops;          /* Have schema ops use data */
     bool preserve;             /* Don't remove files on exit */
+    bool tiered;               /* Configure tiered storage */
     bool verbose;              /* Run in verbose mode */
-    bool do_data_ops;          /* Have schema ops use data */
     uint64_t nrecords;         /* Number of records */
     uint64_t nops;             /* Number of operations */
     uint64_t nthreads;         /* Number of threads */
author	Eric Cox <eric.cox@mongodb.com>	2022-06-24 13:52:42 +0000
committer	Eric Cox <eric.cox@mongodb.com>	2022-06-24 13:52:42 +0000
commit	e41eb06388b603a2575e826d87051eebd38d52f5 (patch)
tree	2fd04f7aa3047bacb6b5f81ea802ae51ecd7b844
parent	e27fb371450c1aecbf3045c13c9a5257560ee615 (diff)
parent	d37641e0439f48745a656272a09eb121636ae7a2 (diff)
download	mongo-e41eb06388b603a2575e826d87051eebd38d52f5.tar.gz