From b1474e5c22fd2106c1e7c6493052e8fbc450e289 Mon Sep 17 00:00:00 2001
From: Anton Korshunov <anton.korshunov@mongodb.com>
Date: Mon, 28 Nov 2022 14:19:20 +0000
Subject: SERVER-71051 Make CE module less dependent on statistics module

---
 src/mongo/db/SConscript                            |    3 +-
 src/mongo/db/commands/SConscript                   |    2 +-
 src/mongo/db/commands/analyze_cmd.cpp              |    4 +-
 src/mongo/db/exec/sbe/SConscript                   |    3 +-
 src/mongo/db/mongod_main.cpp                       |   10 +-
 src/mongo/db/pipeline/SConscript                   |    4 +-
 src/mongo/db/pipeline/accumulator.h                |    4 +-
 .../accumulator_internal_construct_stats.cpp       |    8 +-
 src/mongo/db/query/SConscript                      |    1 +
 src/mongo/db/query/ce/SConscript                   |  184 +---
 src/mongo/db/query/ce/array_histogram.cpp          |  214 ----
 src/mongo/db/query/ce/array_histogram.h            |  148 ---
 src/mongo/db/query/ce/ce_array_data_test.cpp       |  295 -----
 src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp   |  227 ----
 src/mongo/db/query/ce/ce_edge_cases_test.cpp       | 1002 -----------------
 .../db/query/ce/ce_generated_histograms_test.cpp   |  363 ------
 src/mongo/db/query/ce/ce_heuristic.cpp             |  611 ----------
 src/mongo/db/query/ce/ce_heuristic.h               |   49 -
 src/mongo/db/query/ce/ce_heuristic_test.cpp        | 1009 -----------------
 src/mongo/db/query/ce/ce_hinted.cpp                |  108 --
 src/mongo/db/query/ce/ce_hinted.h                  |   58 -
 src/mongo/db/query/ce/ce_histogram.cpp             |  289 -----
 src/mongo/db/query/ce/ce_histogram.h               |   54 -
 src/mongo/db/query/ce/ce_histogram_test.cpp        | 1156 -------------------
 src/mongo/db/query/ce/ce_interpolation_test.cpp    |  505 ---------
 src/mongo/db/query/ce/ce_sampling.cpp              |  362 ------
 src/mongo/db/query/ce/ce_sampling.h                |   56 -
 src/mongo/db/query/ce/ce_test_utils.cpp            |  216 ----
 src/mongo/db/query/ce/ce_test_utils.h              |  250 -----
 src/mongo/db/query/ce/collection_statistics.h      |   60 -
 .../db/query/ce/collection_statistics_impl.cpp     |   71 --
 src/mongo/db/query/ce/collection_statistics_impl.h |   67 --
 .../db/query/ce/collection_statistics_mock.cpp     |   53 -
 src/mongo/db/query/ce/collection_statistics_mock.h |   64 --
 .../db/query/ce/generated_histograms_test.cpp      |  366 ++++++
 .../db/query/ce/heuristic_dataflow_nodes_test.cpp  |  221 ++++
 src/mongo/db/query/ce/heuristic_estimator.cpp      |  600 ++++++++++
 src/mongo/db/query/ce/heuristic_estimator.h        |   49 +
 src/mongo/db/query/ce/heuristic_estimator_test.cpp |  978 +++++++++++++++++
 src/mongo/db/query/ce/hinted_estimator.cpp         |  100 ++
 src/mongo/db/query/ce/hinted_estimator.h           |   57 +
 .../db/query/ce/histogram_array_data_test.cpp      |  298 +++++
 .../db/query/ce/histogram_edge_cases_test.cpp      | 1007 +++++++++++++++++
 src/mongo/db/query/ce/histogram_estimation.cpp     |  488 --------
 src/mongo/db/query/ce/histogram_estimation.h       |  106 --
 src/mongo/db/query/ce/histogram_estimator.cpp      |  272 +++++
 src/mongo/db/query/ce/histogram_estimator.h        |   54 +
 src/mongo/db/query/ce/histogram_estimator_test.cpp | 1161 ++++++++++++++++++++
 .../db/query/ce/histogram_interpolation_test.cpp   |  508 +++++++++
 .../db/query/ce/histogram_predicate_estimation.cpp |  496 +++++++++
 .../db/query/ce/histogram_predicate_estimation.h   |  106 ++
 src/mongo/db/query/ce/max_diff.cpp                 |  376 -------
 src/mongo/db/query/ce/max_diff.h                   |   82 --
 src/mongo/db/query/ce/maxdiff_histogram_test.cpp   |   34 +-
 src/mongo/db/query/ce/maxdiff_test_utils.cpp       |  120 --
 src/mongo/db/query/ce/maxdiff_test_utils.h         |   76 --
 src/mongo/db/query/ce/rand_utils.cpp               |  391 -------
 src/mongo/db/query/ce/rand_utils.h                 |  191 ----
 src/mongo/db/query/ce/rand_utils_new.cpp           |  249 -----
 src/mongo/db/query/ce/rand_utils_new.h             |  354 ------
 src/mongo/db/query/ce/sampling_estimator.cpp       |  341 ++++++
 src/mongo/db/query/ce/sampling_estimator.h         |   56 +
 src/mongo/db/query/ce/scalar_histogram.cpp         |  194 ----
 src/mongo/db/query/ce/scalar_histogram.h           |  120 --
 src/mongo/db/query/ce/stats.idl                    |  102 --
 src/mongo/db/query/ce/stats_cache.cpp              |   82 --
 src/mongo/db/query/ce/stats_cache.h                |   84 --
 src/mongo/db/query/ce/stats_cache_loader.h         |   61 -
 src/mongo/db/query/ce/stats_cache_loader_impl.cpp  |   86 --
 src/mongo/db/query/ce/stats_cache_loader_impl.h    |   47 -
 src/mongo/db/query/ce/stats_cache_loader_mock.cpp  |   53 -
 src/mongo/db/query/ce/stats_cache_loader_mock.h    |   54 -
 src/mongo/db/query/ce/stats_cache_loader_test.cpp  |  116 --
 .../query/ce/stats_cache_loader_test_fixture.cpp   |   76 --
 .../db/query/ce/stats_cache_loader_test_fixture.h  |   60 -
 src/mongo/db/query/ce/stats_cache_test.cpp         |  133 ---
 src/mongo/db/query/ce/stats_catalog.cpp            |  115 --
 src/mongo/db/query/ce/stats_catalog.h              |   80 --
 src/mongo/db/query/ce/stats_path_test.cpp          |  131 ---
 src/mongo/db/query/ce/test_utils.cpp               |  214 ++++
 src/mongo/db/query/ce/test_utils.h                 |  231 ++++
 src/mongo/db/query/ce/value_utils.cpp              |  254 -----
 src/mongo/db/query/ce/value_utils.h                |  123 ---
 src/mongo/db/query/ce_mode_parameter.cpp           |    4 +-
 src/mongo/db/query/ce_mode_parameter.h             |    4 +-
 src/mongo/db/query/ce_mode_parameter_test.cpp      |    4 +-
 src/mongo/db/query/cost_model/SConscript           |    2 +-
 src/mongo/db/query/cost_model/cost_estimator.cpp   |  418 -------
 src/mongo/db/query/cost_model/cost_estimator.h     |   56 -
 .../db/query/cost_model/cost_estimator_impl.cpp    |  418 +++++++
 .../db/query/cost_model/cost_estimator_impl.h      |   56 +
 .../db/query/cost_model/cost_estimator_test.cpp    |   12 +-
 src/mongo/db/query/cqf_get_executor.cpp            |   49 +-
 src/mongo/db/query/optimizer/cascades/interfaces.h |    8 +-
 .../query/optimizer/cascades/logical_rewriter.cpp  |    6 +-
 .../db/query/optimizer/cascades/logical_rewriter.h |    4 +-
 src/mongo/db/query/optimizer/cascades/memo.cpp     |   13 +-
 src/mongo/db/query/optimizer/cascades/memo.h       |    4 +-
 .../query/optimizer/cascades/physical_rewriter.cpp |    6 +-
 .../query/optimizer/cascades/physical_rewriter.h   |    4 +-
 src/mongo/db/query/optimizer/opt_phase_manager.cpp |   12 +-
 src/mongo/db/query/optimizer/opt_phase_manager.h   |   12 +-
 src/mongo/db/query/optimizer/utils/ce_math.cpp     |    5 +-
 src/mongo/db/query/optimizer/utils/ce_math.h       |    6 +-
 .../db/query/optimizer/utils/unit_test_utils.cpp   |   32 +-
 .../db/query/optimizer/utils/unit_test_utils.h     |   10 +-
 src/mongo/db/query/query_knobs.idl                 |    2 +-
 src/mongo/db/query/stats/SConscript                |  123 +++
 src/mongo/db/query/stats/array_histogram.cpp       |  209 ++++
 src/mongo/db/query/stats/array_histogram.h         |  142 +++
 src/mongo/db/query/stats/collection_statistics.h   |   60 +
 .../db/query/stats/collection_statistics_impl.cpp  |   72 ++
 .../db/query/stats/collection_statistics_impl.h    |   67 ++
 .../db/query/stats/collection_statistics_mock.cpp  |   53 +
 .../db/query/stats/collection_statistics_mock.h    |   64 ++
 src/mongo/db/query/stats/max_diff.cpp              |  378 +++++++
 src/mongo/db/query/stats/max_diff.h                |   82 ++
 src/mongo/db/query/stats/maxdiff_test_utils.cpp    |  120 ++
 src/mongo/db/query/stats/maxdiff_test_utils.h      |   74 ++
 src/mongo/db/query/stats/rand_utils.cpp            |  392 +++++++
 src/mongo/db/query/stats/rand_utils.h              |  188 ++++
 src/mongo/db/query/stats/rand_utils_new.cpp        |  250 +++++
 src/mongo/db/query/stats/rand_utils_new.h          |  353 ++++++
 src/mongo/db/query/stats/scalar_histogram.cpp      |  192 ++++
 src/mongo/db/query/stats/scalar_histogram.h        |  120 ++
 src/mongo/db/query/stats/stats.idl                 |  102 ++
 src/mongo/db/query/stats/stats_cache.cpp           |   74 ++
 src/mongo/db/query/stats/stats_cache.h             |   81 ++
 src/mongo/db/query/stats/stats_cache_loader.h      |   58 +
 .../db/query/stats/stats_cache_loader_impl.cpp     |   82 ++
 src/mongo/db/query/stats/stats_cache_loader_impl.h |   45 +
 .../db/query/stats/stats_cache_loader_mock.cpp     |   50 +
 src/mongo/db/query/stats/stats_cache_loader_mock.h |   52 +
 .../db/query/stats/stats_cache_loader_test.cpp     |  116 ++
 .../stats/stats_cache_loader_test_fixture.cpp      |   74 ++
 .../query/stats/stats_cache_loader_test_fixture.h  |   60 +
 src/mongo/db/query/stats/stats_cache_test.cpp      |  131 +++
 src/mongo/db/query/stats/stats_catalog.cpp         |  108 ++
 src/mongo/db/query/stats/stats_catalog.h           |   77 ++
 src/mongo/db/query/stats/stats_path_test.cpp       |  129 +++
 src/mongo/db/query/stats/value_utils.cpp           |  252 +++++
 src/mongo/db/query/stats/value_utils.h             |  120 ++
 142 files changed, 12315 insertions(+), 12445 deletions(-)
 delete mode 100644 src/mongo/db/query/ce/array_histogram.cpp
 delete mode 100644 src/mongo/db/query/ce/array_histogram.h
 delete mode 100644 src/mongo/db/query/ce/ce_array_data_test.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_edge_cases_test.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_generated_histograms_test.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_heuristic.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_heuristic.h
 delete mode 100644 src/mongo/db/query/ce/ce_heuristic_test.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_hinted.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_hinted.h
 delete mode 100644 src/mongo/db/query/ce/ce_histogram.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_histogram.h
 delete mode 100644 src/mongo/db/query/ce/ce_histogram_test.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_interpolation_test.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_sampling.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_sampling.h
 delete mode 100644 src/mongo/db/query/ce/ce_test_utils.cpp
 delete mode 100644 src/mongo/db/query/ce/ce_test_utils.h
 delete mode 100644 src/mongo/db/query/ce/collection_statistics.h
 delete mode 100644 src/mongo/db/query/ce/collection_statistics_impl.cpp
 delete mode 100644 src/mongo/db/query/ce/collection_statistics_impl.h
 delete mode 100644 src/mongo/db/query/ce/collection_statistics_mock.cpp
 delete mode 100644 src/mongo/db/query/ce/collection_statistics_mock.h
 create mode 100644 src/mongo/db/query/ce/generated_histograms_test.cpp
 create mode 100644 src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp
 create mode 100644 src/mongo/db/query/ce/heuristic_estimator.cpp
 create mode 100644 src/mongo/db/query/ce/heuristic_estimator.h
 create mode 100644 src/mongo/db/query/ce/heuristic_estimator_test.cpp
 create mode 100644 src/mongo/db/query/ce/hinted_estimator.cpp
 create mode 100644 src/mongo/db/query/ce/hinted_estimator.h
 create mode 100644 src/mongo/db/query/ce/histogram_array_data_test.cpp
 create mode 100644 src/mongo/db/query/ce/histogram_edge_cases_test.cpp
 delete mode 100644 src/mongo/db/query/ce/histogram_estimation.cpp
 delete mode 100644 src/mongo/db/query/ce/histogram_estimation.h
 create mode 100644 src/mongo/db/query/ce/histogram_estimator.cpp
 create mode 100644 src/mongo/db/query/ce/histogram_estimator.h
 create mode 100644 src/mongo/db/query/ce/histogram_estimator_test.cpp
 create mode 100644 src/mongo/db/query/ce/histogram_interpolation_test.cpp
 create mode 100644 src/mongo/db/query/ce/histogram_predicate_estimation.cpp
 create mode 100644 src/mongo/db/query/ce/histogram_predicate_estimation.h
 delete mode 100644 src/mongo/db/query/ce/max_diff.cpp
 delete mode 100644 src/mongo/db/query/ce/max_diff.h
 delete mode 100644 src/mongo/db/query/ce/maxdiff_test_utils.cpp
 delete mode 100644 src/mongo/db/query/ce/maxdiff_test_utils.h
 delete mode 100644 src/mongo/db/query/ce/rand_utils.cpp
 delete mode 100644 src/mongo/db/query/ce/rand_utils.h
 delete mode 100644 src/mongo/db/query/ce/rand_utils_new.cpp
 delete mode 100644 src/mongo/db/query/ce/rand_utils_new.h
 create mode 100644 src/mongo/db/query/ce/sampling_estimator.cpp
 create mode 100644 src/mongo/db/query/ce/sampling_estimator.h
 delete mode 100644 src/mongo/db/query/ce/scalar_histogram.cpp
 delete mode 100644 src/mongo/db/query/ce/scalar_histogram.h
 delete mode 100644 src/mongo/db/query/ce/stats.idl
 delete mode 100644 src/mongo/db/query/ce/stats_cache.cpp
 delete mode 100644 src/mongo/db/query/ce/stats_cache.h
 delete mode 100644 src/mongo/db/query/ce/stats_cache_loader.h
 delete mode 100644 src/mongo/db/query/ce/stats_cache_loader_impl.cpp
 delete mode 100644 src/mongo/db/query/ce/stats_cache_loader_impl.h
 delete mode 100644 src/mongo/db/query/ce/stats_cache_loader_mock.cpp
 delete mode 100644 src/mongo/db/query/ce/stats_cache_loader_mock.h
 delete mode 100644 src/mongo/db/query/ce/stats_cache_loader_test.cpp
 delete mode 100644 src/mongo/db/query/ce/stats_cache_loader_test_fixture.cpp
 delete mode 100644 src/mongo/db/query/ce/stats_cache_loader_test_fixture.h
 delete mode 100644 src/mongo/db/query/ce/stats_cache_test.cpp
 delete mode 100644 src/mongo/db/query/ce/stats_catalog.cpp
 delete mode 100644 src/mongo/db/query/ce/stats_catalog.h
 delete mode 100644 src/mongo/db/query/ce/stats_path_test.cpp
 create mode 100644 src/mongo/db/query/ce/test_utils.cpp
 create mode 100644 src/mongo/db/query/ce/test_utils.h
 delete mode 100644 src/mongo/db/query/ce/value_utils.cpp
 delete mode 100644 src/mongo/db/query/ce/value_utils.h
 delete mode 100644 src/mongo/db/query/cost_model/cost_estimator.cpp
 delete mode 100644 src/mongo/db/query/cost_model/cost_estimator.h
 create mode 100644 src/mongo/db/query/cost_model/cost_estimator_impl.cpp
 create mode 100644 src/mongo/db/query/cost_model/cost_estimator_impl.h
 create mode 100644 src/mongo/db/query/stats/SConscript
 create mode 100644 src/mongo/db/query/stats/array_histogram.cpp
 create mode 100644 src/mongo/db/query/stats/array_histogram.h
 create mode 100644 src/mongo/db/query/stats/collection_statistics.h
 create mode 100644 src/mongo/db/query/stats/collection_statistics_impl.cpp
 create mode 100644 src/mongo/db/query/stats/collection_statistics_impl.h
 create mode 100644 src/mongo/db/query/stats/collection_statistics_mock.cpp
 create mode 100644 src/mongo/db/query/stats/collection_statistics_mock.h
 create mode 100644 src/mongo/db/query/stats/max_diff.cpp
 create mode 100644 src/mongo/db/query/stats/max_diff.h
 create mode 100644 src/mongo/db/query/stats/maxdiff_test_utils.cpp
 create mode 100644 src/mongo/db/query/stats/maxdiff_test_utils.h
 create mode 100644 src/mongo/db/query/stats/rand_utils.cpp
 create mode 100644 src/mongo/db/query/stats/rand_utils.h
 create mode 100644 src/mongo/db/query/stats/rand_utils_new.cpp
 create mode 100644 src/mongo/db/query/stats/rand_utils_new.h
 create mode 100644 src/mongo/db/query/stats/scalar_histogram.cpp
 create mode 100644 src/mongo/db/query/stats/scalar_histogram.h
 create mode 100644 src/mongo/db/query/stats/stats.idl
 create mode 100644 src/mongo/db/query/stats/stats_cache.cpp
 create mode 100644 src/mongo/db/query/stats/stats_cache.h
 create mode 100644 src/mongo/db/query/stats/stats_cache_loader.h
 create mode 100644 src/mongo/db/query/stats/stats_cache_loader_impl.cpp
 create mode 100644 src/mongo/db/query/stats/stats_cache_loader_impl.h
 create mode 100644 src/mongo/db/query/stats/stats_cache_loader_mock.cpp
 create mode 100644 src/mongo/db/query/stats/stats_cache_loader_mock.h
 create mode 100644 src/mongo/db/query/stats/stats_cache_loader_test.cpp
 create mode 100644 src/mongo/db/query/stats/stats_cache_loader_test_fixture.cpp
 create mode 100644 src/mongo/db/query/stats/stats_cache_loader_test_fixture.h
 create mode 100644 src/mongo/db/query/stats/stats_cache_test.cpp
 create mode 100644 src/mongo/db/query/stats/stats_catalog.cpp
 create mode 100644 src/mongo/db/query/stats/stats_catalog.h
 create mode 100644 src/mongo/db/query/stats/stats_path_test.cpp
 create mode 100644 src/mongo/db/query/stats/value_utils.cpp
 create mode 100644 src/mongo/db/query/stats/value_utils.h

diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index acfed69144a..a33c66d3cdf 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -1520,6 +1520,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/concurrency/exception_util',
         '$BUILD_DIR/mongo/db/exec/sbe/query_sbe_abt',
         '$BUILD_DIR/mongo/db/internal_transactions_feature_flag',
+        '$BUILD_DIR/mongo/db/query/ce/query_ce_heuristic',
         '$BUILD_DIR/mongo/db/query/ce/query_ce_histogram',
         '$BUILD_DIR/mongo/db/query/ce/query_ce_sampling',
         '$BUILD_DIR/mongo/db/query/optimizer/optimizer',
@@ -2330,7 +2331,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/change_stream_options_manager',
         '$BUILD_DIR/mongo/db/change_streams_cluster_parameter',
         '$BUILD_DIR/mongo/db/pipeline/change_stream_expired_pre_image_remover',
-        '$BUILD_DIR/mongo/db/query/ce/query_ce_histogram',
+        '$BUILD_DIR/mongo/db/query/stats/query_stats',
         '$BUILD_DIR/mongo/db/s/query_analysis_writer',
         '$BUILD_DIR/mongo/db/set_change_stream_state_coordinator',
         '$BUILD_DIR/mongo/idl/cluster_server_parameter',
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript
index 3dd709d03e7..d18125f0da1 100644
--- a/src/mongo/db/commands/SConscript
+++ b/src/mongo/db/commands/SConscript
@@ -360,9 +360,9 @@ env.Library(
         '$BUILD_DIR/mongo/db/ops/write_ops_exec',
         '$BUILD_DIR/mongo/db/pipeline/aggregation_request_helper',
         '$BUILD_DIR/mongo/db/pipeline/process_interface/mongo_process_interface',
-        '$BUILD_DIR/mongo/db/query/ce/query_ce_histogram',
         '$BUILD_DIR/mongo/db/query/command_request_response',
         '$BUILD_DIR/mongo/db/query/cursor_response_idl',
+        '$BUILD_DIR/mongo/db/query/stats/query_stats',
         '$BUILD_DIR/mongo/db/query/telemetry',
         '$BUILD_DIR/mongo/db/query_exec',
         '$BUILD_DIR/mongo/db/repl/replica_set_messages',
diff --git a/src/mongo/db/commands/analyze_cmd.cpp b/src/mongo/db/commands/analyze_cmd.cpp
index d47a3b55b41..d6a31329f1c 100644
--- a/src/mongo/db/commands/analyze_cmd.cpp
+++ b/src/mongo/db/commands/analyze_cmd.cpp
@@ -38,8 +38,8 @@
 #include "mongo/db/namespace_string.h"
 #include "mongo/db/query/allowed_contexts.h"
 #include "mongo/db/query/analyze_command_gen.h"
-#include "mongo/db/query/ce/stats_catalog.h"
 #include "mongo/db/query/query_feature_flags_gen.h"
+#include "mongo/db/query/stats/stats_catalog.h"
 #include "mongo/rpc/get_status_from_command_result.h"
 
 namespace mongo {
@@ -195,7 +195,7 @@ public:
                 uassertStatusOK(getStatusFromCommandResult(analyzeResult));
 
                 // Invalidate statistics in the cache for the analyzed path
-                StatsCatalog& statsCatalog = StatsCatalog::get(opCtx);
+                stats::StatsCatalog& statsCatalog = stats::StatsCatalog::get(opCtx);
                 uassertStatusOK(statsCatalog.invalidatePath(nss, key->toString()));
 
             } else if (sampleSize || sampleRate) {
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript
index fc66fc3519e..bf2d6170cf1 100644
--- a/src/mongo/db/exec/sbe/SConscript
+++ b/src/mongo/db/exec/sbe/SConscript
@@ -137,8 +137,7 @@ env.Library(
         'abt/abt_lower.cpp',
     ],
     LIBDEPS=[
-        '$BUILD_DIR/mongo/db/query/ce/query_ce_heuristic',
-        '$BUILD_DIR/mongo/db/query/optimizer/optimizer',
+        '$BUILD_DIR/mongo/db/query/optimizer/optimizer_base',
         'query_sbe',
         'query_sbe_stages',
         'query_sbe_storage',
diff --git a/src/mongo/db/mongod_main.cpp b/src/mongo/db/mongod_main.cpp
index a23b35f911a..a018c618cbf 100644
--- a/src/mongo/db/mongod_main.cpp
+++ b/src/mongo/db/mongod_main.cpp
@@ -107,9 +107,9 @@
 #include "mongo/db/periodic_runner_job_abort_expired_transactions.h"
 #include "mongo/db/pipeline/change_stream_expired_pre_image_remover.h"
 #include "mongo/db/pipeline/process_interface/replica_set_node_process_interface.h"
-#include "mongo/db/query/ce/stats_cache_loader_impl.h"
-#include "mongo/db/query/ce/stats_catalog.h"
 #include "mongo/db/query/internal_plans.h"
+#include "mongo/db/query/stats/stats_cache_loader_impl.h"
+#include "mongo/db/query/stats/stats_catalog.h"
 #include "mongo/db/read_write_concern_defaults_cache_lookup_mongod.h"
 #include "mongo/db/repl/drop_pending_collection_reaper.h"
 #include "mongo/db/repl/initial_syncer_factory.h"
@@ -855,9 +855,9 @@ ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) {
 
     LogicalSessionCache::set(serviceContext, makeLogicalSessionCacheD(kind));
 
-    auto cacheLoader = std::make_unique<StatsCacheLoaderImpl>();
-    auto catalog = std::make_unique<StatsCatalog>(serviceContext, std::move(cacheLoader));
-    StatsCatalog::set(serviceContext, std::move(catalog));
+    auto cacheLoader = std::make_unique<stats::StatsCacheLoaderImpl>();
+    auto catalog = std::make_unique<stats::StatsCatalog>(serviceContext, std::move(cacheLoader));
+    stats::StatsCatalog::set(serviceContext, std::move(catalog));
 
     if (analyze_shard_key::supportsPersistingSampledQueriesIgnoreFCV()) {
         analyze_shard_key::QueryAnalysisWriter::get(serviceContext).onStartup();
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index 422eef81f39..b4843172e39 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -133,8 +133,8 @@ env.Library(
     ],
     LIBDEPS=[
         '$BUILD_DIR/mongo/db/exec/document_value/document_value',
-        '$BUILD_DIR/mongo/db/query/ce/query_stats',
         '$BUILD_DIR/mongo/db/query/query_knobs',
+        '$BUILD_DIR/mongo/db/query/stats/query_stats',
         '$BUILD_DIR/mongo/db/query_expressions',
         '$BUILD_DIR/mongo/scripting/scripting_common',
         '$BUILD_DIR/mongo/util/summation',
@@ -334,13 +334,13 @@ pipelineEnv.Library(
         '$BUILD_DIR/mongo/db/index/key_generator',
         '$BUILD_DIR/mongo/db/pipeline/change_stream_error_extra_info',
         '$BUILD_DIR/mongo/db/pipeline/lite_parsed_document_source',
-        '$BUILD_DIR/mongo/db/query/ce/query_stats_gen',
         '$BUILD_DIR/mongo/db/query/collation/collator_factory_interface',
         '$BUILD_DIR/mongo/db/query/collation/collator_interface',
         '$BUILD_DIR/mongo/db/query/cursor_response_idl',
         '$BUILD_DIR/mongo/db/query/datetime/date_time_support',
         '$BUILD_DIR/mongo/db/query/query_knobs',
         '$BUILD_DIR/mongo/db/query/sort_pattern',
+        '$BUILD_DIR/mongo/db/query/stats/stats_gen',
         '$BUILD_DIR/mongo/db/query/telemetry',
         '$BUILD_DIR/mongo/db/query_expressions',
         '$BUILD_DIR/mongo/db/repl/apply_ops_command_info',
diff --git a/src/mongo/db/pipeline/accumulator.h b/src/mongo/db/pipeline/accumulator.h
index df070ff6531..e99b9db3ec1 100644
--- a/src/mongo/db/pipeline/accumulator.h
+++ b/src/mongo/db/pipeline/accumulator.h
@@ -43,7 +43,7 @@
 #include "mongo/db/exec/document_value/value_comparator.h"
 #include "mongo/db/pipeline/expression.h"
 #include "mongo/db/pipeline/expression_context.h"
-#include "mongo/db/query/ce/value_utils.h"
+#include "mongo/db/query/stats/value_utils.h"
 #include "mongo/stdx/unordered_set.h"
 #include "mongo/util/summation.h"
 
@@ -250,7 +250,7 @@ public:
 
 private:
     double _count;
-    std::vector<ce::SBEValue> _values;
+    std::vector<stats::SBEValue> _values;
 };
 
 class AccumulatorLast final : public AccumulatorState {
diff --git a/src/mongo/db/pipeline/accumulator_internal_construct_stats.cpp b/src/mongo/db/pipeline/accumulator_internal_construct_stats.cpp
index e5f2a5b0c70..bdcd67c6c24 100644
--- a/src/mongo/db/pipeline/accumulator_internal_construct_stats.cpp
+++ b/src/mongo/db/pipeline/accumulator_internal_construct_stats.cpp
@@ -35,8 +35,8 @@
 #include "mongo/db/pipeline/accumulator.h"
 #include "mongo/db/pipeline/expression_context.h"
 #include "mongo/db/query/allowed_contexts.h"
-#include "mongo/db/query/ce/max_diff.h"
-#include "mongo/db/query/ce/value_utils.h"
+#include "mongo/db/query/stats/max_diff.h"
+#include "mongo/db/query/stats/value_utils.h"
 #include "mongo/logv2/log.h"
 
 
@@ -71,7 +71,7 @@ void AccumulatorInternalConstructStats::processInternal(const Value& input, bool
     auto val = doc["val"];
 
     LOGV2_DEBUG(6735800, 4, "Extracted document", "val"_attr = val);
-    _values.emplace_back(ce::SBEValue(mongo::optimizer::convertFrom(val)));
+    _values.emplace_back(stats::SBEValue(mongo::optimizer::convertFrom(val)));
 
     _count++;
     _memUsageBytes = sizeof(*this);
@@ -81,7 +81,7 @@ Value AccumulatorInternalConstructStats::getValue(bool toBeMerged) {
     uassert(8423374, "Can not merge analyze pipelines", !toBeMerged);
 
     // Generate and serialize maxdiff histogram for scalar and array values.
-    auto arrayHistogram = ce::createArrayEstimator(_values, ce::ScalarHistogram::kMaxBuckets);
+    auto arrayHistogram = stats::createArrayEstimator(_values, stats::ScalarHistogram::kMaxBuckets);
     auto stats = stats::makeStatistics(_count, arrayHistogram);
 
     return Value(stats);
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index 02890313b21..1d3e11ccc13 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -11,6 +11,7 @@ env.SConscript(
         'cost_model',
         'datetime',
         'optimizer',
+        'stats',
     ],
     exports=[
         'env',
diff --git a/src/mongo/db/query/ce/SConscript b/src/mongo/db/query/ce/SConscript
index d1e33484f03..c027c82f743 100644
--- a/src/mongo/db/query/ce/SConscript
+++ b/src/mongo/db/query/ce/SConscript
@@ -7,7 +7,7 @@ env = env.Clone()
 env.Library(
     target="query_ce_heuristic",
     source=[
-        'ce_heuristic.cpp',
+        'heuristic_estimator.cpp',
     ],
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/db/query/optimizer/optimizer_memo',
@@ -17,7 +17,7 @@ env.Library(
 env.Library(
     target="query_ce_hinted",
     source=[
-        'ce_hinted.cpp',
+        'hinted_estimator.cpp',
     ],
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/db/query/optimizer/optimizer_memo',
@@ -28,62 +28,32 @@ env.Library(
 env.Library(
     target="query_ce_histogram",
     source=[
-        'ce_histogram.cpp',
-        'collection_statistics_impl.cpp',
-        'histogram_estimation.cpp',
-        'stats_catalog.cpp',
-        'stats_cache.cpp',
-        'stats_cache_loader_impl.cpp',
+        'histogram_estimator.cpp',
+        'histogram_predicate_estimation.cpp',
     ],
     LIBDEPS_PRIVATE=[
-        '$BUILD_DIR/mongo/db/dbdirectclient',
-        '$BUILD_DIR/mongo/db/pipeline/pipeline',
+        '$BUILD_DIR/mongo/db/pipeline/abt_utils',
         '$BUILD_DIR/mongo/db/query/optimizer/optimizer_memo',
         '$BUILD_DIR/mongo/db/query/optimizer/optimizer_rewrites',
-        '$BUILD_DIR/mongo/util/caching',
-        '$BUILD_DIR/mongo/util/concurrency/thread_pool',
-        'query_stats',
+        '$BUILD_DIR/mongo/db/query/stats/stats_histograms',
     ],
 )
 
 env.Library(
     target="query_ce_sampling",
     source=[
-        'ce_sampling.cpp',
+        'sampling_estimator.cpp',
     ],
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/db/exec/sbe/query_sbe_abt',
-        '$BUILD_DIR/mongo/db/query/optimizer/optimizer_memo',
-    ],
-)
-
-env.Library(
-    target="query_stats",
-    source=[
-        'array_histogram.cpp',
-        'scalar_histogram.cpp',
-        'stats.idl',
-        'value_utils.cpp',
-    ],
-    LIBDEPS=[
-        '$BUILD_DIR/mongo/db/exec/sbe/query_sbe_values',
-    ],
-)
-
-env.Library(
-    target="query_stats_gen",
-    source=[
-        'max_diff.cpp',
-    ],
-    LIBDEPS=[
-        'query_stats',
+        '$BUILD_DIR/mongo/db/query/optimizer/optimizer',
     ],
 )
 
 env.Library(
-    target="ce_test_utils",
+    target="test_utils",
     source=[
-        'ce_test_utils.cpp',
+        'test_utils.cpp',
     ],
     LIBDEPS=[
         '$BUILD_DIR/mongo/base',
@@ -94,150 +64,79 @@ env.Library(
         'query_ce_heuristic',
         'query_ce_histogram',
         'query_ce_sampling',
-        'query_stats',
     ],
 )
 
 env.CppUnitTest(
-    target="ce_histogram_test",
+    target="histogram_estimator_test",
     source=[
-        "ce_histogram_test.cpp",
-        "collection_statistics_mock.cpp",
+        "histogram_estimator_test.cpp",
     ],
     LIBDEPS=[
-        'ce_test_utils',
+        '$BUILD_DIR/mongo/db/query/stats/stats_test_utils',
+        'test_utils',
     ],
 )
 
 env.CppUnitTest(
-    target="ce_interpolation_test",
+    target="histogram_interpolation_test",
     source=[
-        "ce_interpolation_test.cpp",
+        "histogram_interpolation_test.cpp",
     ],
     LIBDEPS=[
-        'ce_test_utils',
+        'test_utils',
     ],
 )
 
 env.CppUnitTest(
-    target="ce_heuristic_test",
+    target="heuristic_estimator_test",
     source=[
-        "ce_heuristic_test.cpp",
+        "heuristic_estimator_test.cpp",
     ],
     LIBDEPS=[
-        'ce_test_utils',
+        'test_utils',
     ],
 )
 
 env.CppUnitTest(
-    target="ce_array_data_test",
+    target="histogram_array_data_test",
     source=[
-        "ce_array_data_test.cpp",
+        "histogram_array_data_test.cpp",
     ],
     LIBDEPS=[
-        'ce_test_utils',
+        '$BUILD_DIR/mongo/db/query/stats/stats_test_utils',
+        'test_utils',
     ],
 )
 
 env.CppUnitTest(
-    target="ce_edge_cases_test",
+    target="histogram_edge_cases_test",
     source=[
-        "ce_edge_cases_test.cpp",
+        "histogram_edge_cases_test.cpp",
     ],
     LIBDEPS=[
-        'ce_test_utils',
-        'query_stats_test_utils',
+        '$BUILD_DIR/mongo/db/query/stats/stats_test_utils',
+        'test_utils',
     ],
 )
 
 env.CppUnitTest(
-    target="ce_dataflow_nodes_test",
+    target="heuristic_dataflow_nodes_test",
     source=[
-        "ce_dataflow_nodes_test.cpp",
+        "heuristic_dataflow_nodes_test.cpp",
     ],
     LIBDEPS=[
-        'ce_test_utils',
+        'test_utils',
     ],
 )
 
 env.CppUnitTest(
-    target='stats_cache_loader_test',
+    target="generated_histograms_test",
     source=[
-        'stats_cache_loader_test.cpp',
-        'stats_cache_loader_test_fixture.cpp',
+        "generated_histograms_test.cpp",
     ],
     LIBDEPS=[
-        '$BUILD_DIR/mongo/db/auth/authmocks',
-        '$BUILD_DIR/mongo/db/catalog/collection_crud',
-        '$BUILD_DIR/mongo/db/commands/test_commands_enabled',
-        '$BUILD_DIR/mongo/db/index_builds_coordinator_mongod',
-        '$BUILD_DIR/mongo/db/multitenancy',
-        '$BUILD_DIR/mongo/db/op_observer/op_observer',
-        '$BUILD_DIR/mongo/db/op_observer/op_observer_impl',
-        '$BUILD_DIR/mongo/db/query/datetime/date_time_support',
-        '$BUILD_DIR/mongo/db/query/query_test_service_context',
-        '$BUILD_DIR/mongo/db/query_expressions',
-        '$BUILD_DIR/mongo/db/repl/drop_pending_collection_reaper',
-        '$BUILD_DIR/mongo/db/repl/oplog',
-        '$BUILD_DIR/mongo/db/repl/optime',
-        '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
-        '$BUILD_DIR/mongo/db/repl/replmocks',
-        '$BUILD_DIR/mongo/db/repl/storage_interface_impl',
-        '$BUILD_DIR/mongo/db/server_base',
-        '$BUILD_DIR/mongo/db/service_context',
-        '$BUILD_DIR/mongo/db/service_context_d_test_fixture',
-        '$BUILD_DIR/mongo/db/service_context_test_fixture',
-        '$BUILD_DIR/mongo/db/shard_role',
-        '$BUILD_DIR/mongo/db/storage/wiredtiger/storage_wiredtiger',
-        '$BUILD_DIR/mongo/db/timeseries/timeseries_options',
-        '$BUILD_DIR/mongo/unittest/unittest',
-        '$BUILD_DIR/mongo/util/clock_source_mock',
-        '$BUILD_DIR/mongo/util/fail_point',
-        '$BUILD_DIR/mongo/util/pcre_wrapper',
-        'query_ce_histogram',
-        'query_stats',
-    ],
-)
-
-env.CppUnitTest(
-    target="stats_cache_test",
-    source=[
-        "stats_cache_test.cpp",
-        "stats_cache_loader_mock.cpp",
-    ],
-    LIBDEPS=[
-        '$BUILD_DIR/mongo/base',
-        '$BUILD_DIR/mongo/db/service_context',
-        'ce_test_utils',
-    ],
-)
-
-env.CppUnitTest(
-    target="stats_path_test",
-    source=[
-        "stats_path_test.cpp",
-    ],
-    LIBDEPS=[
-        '$BUILD_DIR/mongo/base',
-        '$BUILD_DIR/mongo/db/service_context',
-        'ce_test_utils',
-    ],
-)
-
-env.Library(
-    target="query_stats_test_utils",
-    source=[
-        'rand_utils.cpp',
-        'rand_utils_new.cpp',
-        'maxdiff_test_utils.cpp',
-    ],
-    LIBDEPS=[
-        '$BUILD_DIR/mongo/base',
-        '$BUILD_DIR/mongo/db/exec/sbe/sbe_abt_test_util',
-        "$BUILD_DIR/mongo/unittest/unittest",
-        'query_ce_histogram',
-        'query_stats',
-        'query_stats_gen',
+        'test_utils',
     ],
 )
 
@@ -247,18 +146,7 @@ env.CppUnitTest(
         'maxdiff_histogram_test.cpp',
     ],
     LIBDEPS=[
-        'ce_test_utils',
-        'query_stats_test_utils',
-    ],
-)
-
-env.CppUnitTest(
-    target="ce_generated_histograms_test",
-    source=[
-        "ce_generated_histograms_test.cpp",
-    ],
-    LIBDEPS=[
-        'ce_test_utils',
-        'query_stats_test_utils',
+        '$BUILD_DIR/mongo/db/query/stats/stats_test_utils',
+        'test_utils',
     ],
 )
diff --git a/src/mongo/db/query/ce/array_histogram.cpp b/src/mongo/db/query/ce/array_histogram.cpp
deleted file mode 100644
index 66ab117e60d..00000000000
--- a/src/mongo/db/query/ce/array_histogram.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/value_utils.h"
-
-namespace mongo {
-namespace ce {
-using namespace sbe;
-
-TypeCounts mapStatsTypeCountToTypeCounts(std::vector<TypeTag> tc) {
-    TypeCounts out;
-    for (const auto& t : tc) {
-        out.emplace(deserialize(t.getTypeName().toString()), t.getCount());
-    }
-    return out;
-}
-
-ArrayHistogram::ArrayHistogram() : ArrayHistogram(ScalarHistogram(), {}) {}
-
-ArrayHistogram::ArrayHistogram(Statistics stats)
-    : ArrayHistogram(stats.getScalarHistogram(),
-                     mapStatsTypeCountToTypeCounts(stats.getTypeCount()),
-                     stats.getTrueCount(),
-                     stats.getFalseCount()) {
-    // TODO SERVER-71513: initialize non-scalar histogram fields.
-}
-
-ArrayHistogram::ArrayHistogram(ScalarHistogram scalar,
-                               TypeCounts typeCounts,
-                               ScalarHistogram arrayUnique,
-                               ScalarHistogram arrayMin,
-                               ScalarHistogram arrayMax,
-                               TypeCounts arrayTypeCounts,
-                               double emptyArrayCount,
-                               double trueCount,
-                               double falseCount)
-    : _scalar(std::move(scalar)),
-      _typeCounts(std::move(typeCounts)),
-      _emptyArrayCount(emptyArrayCount),
-      _trueCount(trueCount),
-      _falseCount(falseCount),
-      _arrayUnique(std::move(arrayUnique)),
-      _arrayMin(std::move(arrayMin)),
-      _arrayMax(std::move(arrayMax)),
-      _arrayTypeCounts(std::move(arrayTypeCounts)) {
-    invariant(isArray());
-}
-
-ArrayHistogram::ArrayHistogram(ScalarHistogram scalar,
-                               TypeCounts typeCounts,
-                               double trueCount,
-                               double falseCount)
-    : _scalar(std::move(scalar)),
-      _typeCounts(std::move(typeCounts)),
-      _emptyArrayCount(0.0),
-      _trueCount(trueCount),
-      _falseCount(falseCount),
-      _arrayUnique(boost::none),
-      _arrayMin(boost::none),
-      _arrayMax(boost::none),
-      _arrayTypeCounts(boost::none) {
-    invariant(!isArray());
-}
-
-bool ArrayHistogram::isArray() const {
-    return _arrayUnique && _arrayMin && _arrayMax && _arrayTypeCounts;
-}
-
-std::string typeCountsToString(const TypeCounts& typeCounts) {
-    std::ostringstream os;
-    os << "{";
-    bool first = true;
-    for (auto [tag, count] : typeCounts) {
-        if (!first)
-            os << ", ";
-        os << tag << ": " << count;
-        first = false;
-    }
-    os << "}";
-    return os.str();
-}
-
-std::string ArrayHistogram::toString() const {
-    std::ostringstream os;
-    os << "{\n";
-    os << " scalar: " << _scalar.toString();
-    os << ",\n typeCounts: " << typeCountsToString(_typeCounts);
-    if (isArray()) {
-        os << ",\n arrayUnique: " << _arrayUnique->toString();
-        os << ",\n arrayMin: " << _arrayMin->toString();
-        os << ",\n arrayMax: " << _arrayMax->toString();
-        os << ",\n arrayTypeCounts: " << typeCountsToString(*_arrayTypeCounts);
-    }
-    os << "\n}\n";
-    return os.str();
-}
-
-const ScalarHistogram& ArrayHistogram::getScalar() const {
-    return _scalar;
-}
-
-const ScalarHistogram& ArrayHistogram::getArrayUnique() const {
-    invariant(isArray());
-    return *_arrayUnique;
-}
-
-const ScalarHistogram& ArrayHistogram::getArrayMin() const {
-    invariant(isArray());
-    return *_arrayMin;
-}
-
-const ScalarHistogram& ArrayHistogram::getArrayMax() const {
-    invariant(isArray());
-    return *_arrayMax;
-}
-
-const TypeCounts& ArrayHistogram::getTypeCounts() const {
-    return _typeCounts;
-}
-
-const TypeCounts& ArrayHistogram::getArrayTypeCounts() const {
-    invariant(isArray());
-    return *_arrayTypeCounts;
-}
-
-double ArrayHistogram::getArrayCount() const {
-    if (isArray()) {
-        auto findArray = _typeCounts.find(value::TypeTags::Array);
-        uassert(6979504,
-                "Histogram with array data must have a total array count.",
-                findArray != _typeCounts.end());
-        double arrayCount = findArray->second;
-        uassert(6979503, "Histogram with array data must have at least one array.", arrayCount > 0);
-        return arrayCount;
-    }
-    return 0;
-}
-
-BSONObj ArrayHistogram::serialize() const {
-    BSONObjBuilder histogramBuilder;
-
-    // Serialize boolean type counters.
-    histogramBuilder.append("trueCount", getTrueCount());
-    histogramBuilder.append("falseCount", getFalseCount());
-
-    // Serialize empty array counts.
-    histogramBuilder.appendNumber("emptyArrayCount", getEmptyArrayCount());
-
-    // Serialize type counts.
-    BSONArrayBuilder typeCountBuilder(histogramBuilder.subarrayStart("typeCount"));
-    const auto& typeCounts = getTypeCounts();
-    for (const auto& [sbeType, count] : typeCounts) {
-        auto typeCount = BSON("typeName" << ce::serialize(sbeType) << "count" << count);
-        typeCountBuilder.append(typeCount);
-    }
-    typeCountBuilder.doneFast();
-
-    // Serialize scalar histogram.
-    histogramBuilder.append("scalarHistogram", getScalar().serialize());
-
-    // TODO SERVER-71513: serialize array histograms.
-
-    histogramBuilder.doneFast();
-    return histogramBuilder.obj();
-}
-}  // namespace ce
-
-// TODO: update this once SERVER-71051 is done.
-namespace stats {
-BSONObj makeStatistics(double documents, const ce::ArrayHistogram& arrayHistogram) {
-    BSONObjBuilder builder;
-    builder.appendNumber("documents", documents);
-    builder.appendElements(arrayHistogram.serialize());
-    builder.doneFast();
-    return builder.obj();
-}
-
-BSONObj makeStatsPath(StringData path, double documents, const ce::ArrayHistogram& arrayHistogram) {
-    BSONObjBuilder builder;
-    builder.append("_id", path);
-    builder.append("statistics", makeStatistics(documents, arrayHistogram));
-    builder.doneFast();
-    return builder.obj();
-}
-}  // namespace stats
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/array_histogram.h b/src/mongo/db/query/ce/array_histogram.h
deleted file mode 100644
index 2ce33d330b5..00000000000
--- a/src/mongo/db/query/ce/array_histogram.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <map>
-
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/scalar_histogram.h"
-#include "mongo/db/query/ce/stats_gen.h"
-
-namespace mongo {
-namespace ce {
-
-using TypeCounts = std::map<sbe::value::TypeTags, double>;
-
-class ArrayHistogram {
-public:
-    // Constructs an empty scalar histogram.
-    ArrayHistogram();
-
-    // Constructor using StatsPath IDL as input.
-    ArrayHistogram(Statistics stats);
-
-    // Constructor for scalar field histograms.
-    ArrayHistogram(ScalarHistogram scalar,
-                   TypeCounts typeCounts,
-                   double trueCount = 0.0,
-                   double falseCount = 0.0);
-
-    // Constructor for array field histograms. We have to initialize all array fields in this case.
-    ArrayHistogram(ScalarHistogram scalar,
-                   TypeCounts typeCounts,
-                   ScalarHistogram arrayUnique,
-                   ScalarHistogram arrayMin,
-                   ScalarHistogram arrayMax,
-                   TypeCounts arrayTypeCounts,
-                   double emptyArrayCount = 0.0,
-                   double trueCount = 0.0,
-                   double falseCount = 0.0);
-
-    // ArrayHistogram is neither copy-constructible nor copy-assignable.
-    ArrayHistogram(const ArrayHistogram&) = delete;
-    ArrayHistogram& operator=(const ArrayHistogram&) = delete;
-
-    // However, it is move-constructible and move-assignable.
-    ArrayHistogram(ArrayHistogram&&) = default;
-    ArrayHistogram& operator=(ArrayHistogram&&) = default;
-    ~ArrayHistogram() = default;
-
-    std::string toString() const;
-
-    // Serialize to BSON for storage in stats collection.
-    BSONObj serialize() const;
-
-    const ScalarHistogram& getScalar() const;
-    const ScalarHistogram& getArrayUnique() const;
-    const ScalarHistogram& getArrayMin() const;
-    const ScalarHistogram& getArrayMax() const;
-    const TypeCounts& getTypeCounts() const;
-    const TypeCounts& getArrayTypeCounts() const;
-
-    // Returns whether or not this histogram includes array data points.
-    bool isArray() const;
-
-    // Get the total number of arrays in the histogram's path including empty arrays.
-    double getArrayCount() const;
-
-    // Get the total number of empty arrays ( [] ) in the histogram's path.
-    double getEmptyArrayCount() const {
-        return _emptyArrayCount;
-    }
-
-    // Get the count of true booleans.
-    double getTrueCount() const {
-        return _trueCount;
-    }
-
-    // Get the count of false booleans.
-    double getFalseCount() const {
-        return _falseCount;
-    }
-
-private:
-    /* Fields for all paths. */
-
-    // Contains values which appeared originally as scalars on the path.
-    ScalarHistogram _scalar;
-    // The number of values of each type.
-    TypeCounts _typeCounts;
-    // The number of empty arrays - they are not accounted for in the histograms.
-    double _emptyArrayCount;
-    // The counts of true & false booleans.
-    double _trueCount;
-    double _falseCount;
-
-    /* Fields for array paths (only initialized if arrays are present). */
-
-    // Contains unique scalar values originating from arrays.
-    boost::optional<ScalarHistogram> _arrayUnique;
-    // Contains minimum values originating from arrays **per class**.
-    boost::optional<ScalarHistogram> _arrayMin;
-    // Contains maximum values originating from arrays **per class**.
-    boost::optional<ScalarHistogram> _arrayMax;
-    // The number of values of each type inside all arrays.
-    boost::optional<TypeCounts> _arrayTypeCounts;
-};
-}  // namespace ce
-// TODO: update this once SERVER-71051 is done.
-namespace stats {
-/**
- * Returns an owned BSON Object representing data matching mongo::Statistics IDL.
- */
-BSONObj makeStatistics(double documents, const ce::ArrayHistogram& arrayHistogram);
-
-/**
- * Returns an owned BSON Object representing data matching mongo::StatsPath IDL.
- */
-BSONObj makeStatsPath(StringData path, double documents, const ce::ArrayHistogram& arrayHistogram);
-}  // namespace stats
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/ce_array_data_test.cpp b/src/mongo/db/query/ce/ce_array_data_test.cpp
deleted file mode 100644
index 587ab4b7364..00000000000
--- a/src/mongo/db/query/ce/ce_array_data_test.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include <vector>
-
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/ce_test_utils.h"
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/db/query/query_test_service_context.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo::ce {
-namespace {
-
-using namespace sbe;
-
-/**
- * Structure representing a range query and its estimated and actual cardinalities.
- * Used to record hand-crafted queries over a pre-generated dataset.
- */
-struct QuerySpec {
-    // Low bound of the query range.
-    int32_t low;
-    // Upper bound of the query range.
-    int32_t high;
-    // Estimated cardinality of $match query.
-    double estMatch;
-    // Actual cardinality of $match query.
-    double actMatch;
-    // Estimated cardinality of $elemMatch query.
-    double estElemMatch;
-    // Actual cardinality of $elemMatch query.
-    double actElemMatch;
-};
-
-static std::pair<double, double> computeErrors(size_t actualCard, double estimatedCard) {
-    double error = estimatedCard - actualCard;
-    double relError = (actualCard == 0) ? (estimatedCard == 0 ? 0.0 : -1.0) : error / actualCard;
-    return std::make_pair(error, relError);
-}
-
-static std::string serializeQuery(QuerySpec& q, bool isElemMatch) {
-    std::ostringstream os;
-    os << "{$match: {a: {";
-    if (isElemMatch) {
-        os << "$elemMatch: {";
-    }
-    os << "$gt: " << q.low;
-    os << ", $lt: " << q.high;
-    if (isElemMatch) {
-        os << "}";
-    }
-    os << "}}}\n";
-    return os.str();
-}
-
-static std::string computeRMSE(std::vector<QuerySpec>& querySet, bool isElemMatch) {
-    double rms = 0.0, relRms = 0.0, meanAbsSelErr = 0.0;
-    size_t trialSize = querySet.size();
-    const size_t dataSize = 1000;
-
-    std::ostringstream os;
-    os << "\nQueries:\n";
-    for (auto& q : querySet) {
-        double estimatedCard = isElemMatch ? q.estElemMatch : q.estMatch;
-        double actualCard = isElemMatch ? q.actElemMatch : q.actMatch;
-
-        auto [error, relError] = computeErrors(actualCard, estimatedCard);
-        rms += error * error;
-        relRms += relError * relError;
-        meanAbsSelErr += std::abs(error);
-        os << serializeQuery(q, isElemMatch);
-        os << "Estimated: " << estimatedCard << " Actual " << actualCard << " (Error: " << error
-           << " RelError: " << relError << ")\n\n";
-    }
-    rms = std::sqrt(rms / trialSize);
-    relRms = std::sqrt(relRms / trialSize);
-    meanAbsSelErr /= (trialSize * dataSize);
-
-    os << "=====" << (isElemMatch ? " ElemMatch errors: " : "Match errors:") << "=====\n";
-    os << "RMSE : " << rms << " RelRMSE : " << relRms
-       << " MeanAbsSelectivityError: " << meanAbsSelErr << std::endl;
-    return os.str();
-}
-
-TEST(EstimatorArrayDataTest, Histogram1000ArraysSmall10Buckets) {
-    std::vector<BucketData> scalarData{{}};
-    const ScalarHistogram scalarHist = createHistogram(scalarData);
-
-    std::vector<BucketData> minData{{0, 5.0, 0.0, 0.0},
-                                    {553, 2.0, 935.0, 303.0},
-                                    {591, 4.0, 2.0, 1.0},
-                                    {656, 2.0, 21.0, 12.0},
-                                    {678, 3.0, 6.0, 3.0},
-                                    {693, 2.0, 1.0, 1.0},
-                                    {730, 1.0, 6.0, 3.0},
-                                    {788, 1.0, 2.0, 2.0},
-                                    {847, 2.0, 4.0, 1.0},
-                                    {867, 1.0, 0.0, 0.0}};
-
-    const ScalarHistogram aMinHist = createHistogram(minData);
-
-    std::vector<BucketData> maxData{{117, 1.0, 0.0, 0.0},
-                                    {210, 1.0, 1.0, 1.0},
-                                    {591, 1.0, 8.0, 4.0},
-                                    {656, 1.0, 0.0, 0.0},
-                                    {353, 2.0, 18.0, 9.0},
-                                    {610, 5.0, 125.0, 65.0},
-                                    {733, 8.0, 134.0, 53.0},
-                                    {768, 6.0, 50.0, 16.0},
-                                    {957, 8.0, 448.0, 137.0},
-                                    {1000, 7.0, 176.0, 40.0}};
-
-    const ScalarHistogram aMaxHist = createHistogram(maxData);
-
-    std::vector<BucketData> uniqueData{{0, 5.0, 0.0, 0.0},
-                                       {16, 11.0, 74.0, 13.0},
-                                       {192, 13.0, 698.0, 148.0},
-                                       {271, 9.0, 312.0, 70.0},
-                                       {670, 7.0, 1545.0, 355.0},
-                                       {712, 9.0, 159.0, 32.0},
-                                       {776, 11.0, 247.0, 54.0},
-                                       {869, 9.0, 361.0, 85.0},
-                                       {957, 8.0, 323.0, 76.0},
-                                       {1000, 7.0, 188.0, 40.0}};
-
-    const ScalarHistogram aUniqueHist = createHistogram(uniqueData);
-
-    TypeCounts typeCounts;
-    TypeCounts arrayTypeCounts;
-    // Dataset generated as 1000 arrays of size between 3 to 5.
-    typeCounts.insert({value::TypeTags::Array, 1000});
-    arrayTypeCounts.insert({value::TypeTags::NumberInt32, 3996});
-
-    const ArrayHistogram arrHist(scalarHist,
-                                 typeCounts,
-                                 aUniqueHist,
-                                 aMinHist,
-                                 aMaxHist,
-                                 arrayTypeCounts,
-                                 0 /* emptyArrayCount */);
-
-    std::vector<QuerySpec> querySet{{10, 20, 35.7, 93.0, 37.8, 39.0},
-                                    {10, 60, 103.3, 240.0, 158.0, 196.0},
-                                    {320, 330, 554.5, 746.0, 26.0, 30.0},
-                                    {320, 400, 672.9, 832.0, 231.5, 298.0},
-                                    {980, 990, 88.8, 101.0, 36.5, 41.0},
-                                    {970, 1050, 129.7, 141.0, 129.7, 141.0}};
-
-    for (const auto q : querySet) {
-        // $match query, includeScalar = true.
-        double estCard = estimateCardRange(arrHist,
-                                           false /* lowInclusive */,
-                                           value::TypeTags::NumberInt32,
-                                           sbe::value::bitcastFrom<int32_t>(q.low),
-                                           false /* highInclusive */,
-                                           value::TypeTags::NumberInt32,
-                                           sbe::value::bitcastFrom<int32_t>(q.high),
-                                           true /* includeScalar */);
-        ASSERT_APPROX_EQUAL(estCard, q.estMatch, 0.1);
-
-        // $elemMatch query, includeScalar = false.
-        estCard = estimateCardRange(arrHist,
-                                    false /* lowInclusive */,
-                                    value::TypeTags::NumberInt32,
-                                    sbe::value::bitcastFrom<int32_t>(q.low),
-                                    false /* highInclusive */,
-                                    value::TypeTags::NumberInt32,
-                                    sbe::value::bitcastFrom<int32_t>(q.high),
-                                    false /* includeScalar */);
-        ASSERT_APPROX_EQUAL(estCard, q.estElemMatch, 0.1);
-    }
-    std::cout << computeRMSE(querySet, false /* isElemMatch */) << std::endl;
-    std::cout << computeRMSE(querySet, true /* isElemMatch */) << std::endl;
-}
-
-TEST(EstimatorArrayDataTest, Histogram1000ArraysLarge10Buckets) {
-    std::vector<BucketData> scalarData{{}};
-    const ScalarHistogram scalarHist = createHistogram(scalarData);
-
-    std::vector<BucketData> minData{{0, 2.0, 0.0, 0.0},
-                                    {1324, 4.0, 925.0, 408.0},
-                                    {1389, 5.0, 7.0, 5.0},
-                                    {1521, 2.0, 16.0, 10.0},
-                                    {1621, 2.0, 13.0, 7.0},
-                                    {1852, 5.0, 10.0, 9.0},
-                                    {1864, 2.0, 0.0, 0.0},
-                                    {1971, 1.0, 3.0, 3.0},
-                                    {2062, 2.0, 0.0, 0.0},
-                                    {2873, 1.0, 0.0, 0.0}};
-
-    const ScalarHistogram aMinHist = createHistogram(minData);
-
-    std::vector<BucketData> maxData{{2261, 1.0, 0.0, 0.0},
-                                    {2673, 1.0, 0.0, 0.0},
-                                    {2930, 1.0, 1.0, 1.0},
-                                    {3048, 2.0, 2.0, 2.0},
-                                    {3128, 3.0, 1.0, 1.0},
-                                    {3281, 2.0, 0.0, 0.0},
-                                    {3378, 2.0, 7.0, 5.0},
-                                    {3453, 4.0, 2.0, 2.0},
-                                    {3763, 6.0, 44.0, 23.0},
-                                    {5000, 1.0, 920.0, 416.0}};
-
-    const ScalarHistogram aMaxHist = createHistogram(maxData);
-
-    std::vector<BucketData> uniqueData{{0, 2.0, 0.0, 0.0},
-                                       {1106, 9.0, 1970.0, 704.0},
-                                       {1542, 11.0, 736.0, 280.0},
-                                       {3267, 6.0, 3141.0, 1097.0},
-                                       {3531, 6.0, 461.0, 175.0},
-                                       {3570, 7.0, 48.0, 20.0},
-                                       {4573, 8.0, 1851.0, 656.0},
-                                       {4619, 6.0, 65.0, 30.0},
-                                       {4782, 5.0, 265.0, 99.0},
-                                       {5000, 1.0, 342.0, 135.0}};
-
-    const ScalarHistogram aUniqueHist = createHistogram(uniqueData);
-
-    TypeCounts typeCounts;
-    TypeCounts arrayTypeCounts;
-    // Dataset generated as 1000 arrays of size between 8 to 10.
-    typeCounts.insert({value::TypeTags::Array, 1000});
-    arrayTypeCounts.insert({value::TypeTags::NumberInt32, 8940});
-
-    const ArrayHistogram arrHist(scalarHist,
-                                 typeCounts,
-                                 aUniqueHist,
-                                 aMinHist,
-                                 aMaxHist,
-                                 arrayTypeCounts,
-                                 0 /* emptyArrayCount */);
-
-    std::vector<QuerySpec> querySet{{10, 20, 13.7, 39.0, 9.7, 26.0},
-                                    {10, 60, 41.6, 108.0, 55.7, 101.0},
-                                    {1000, 1010, 705.4, 861.0, 9.7, 7.0},
-                                    {1000, 1050, 733.3, 884.0, 55.7, 87.0},
-                                    {3250, 3300, 988.0, 988.0, 59.3, 86.0},
-                                    {4970, 4980, 23.3, 53.0, 8.5, 16.0}};
-
-    for (const auto q : querySet) {
-        // $match query, includeScalar = true.
-        double estCard = estimateCardRange(arrHist,
-                                           false /* lowInclusive */,
-                                           value::TypeTags::NumberInt32,
-                                           sbe::value::bitcastFrom<int32_t>(q.low),
-                                           false /* highInclusive */,
-                                           value::TypeTags::NumberInt32,
-                                           sbe::value::bitcastFrom<int32_t>(q.high),
-                                           true /* includeScalar */);
-        ASSERT_APPROX_EQUAL(estCard, q.estMatch, 0.1);
-
-        // $elemMatch query, includeScalar = false.
-        estCard = estimateCardRange(arrHist,
-                                    false /* lowInclusive */,
-                                    value::TypeTags::NumberInt32,
-                                    sbe::value::bitcastFrom<int32_t>(q.low),
-                                    false /* highInclusive */,
-                                    value::TypeTags::NumberInt32,
-                                    sbe::value::bitcastFrom<int32_t>(q.high),
-                                    false /* includeScalar */);
-        ASSERT_APPROX_EQUAL(estCard, q.estElemMatch, 0.1);
-    }
-    std::cout << computeRMSE(querySet, false /* isElemMatch */) << std::endl;
-    std::cout << computeRMSE(querySet, true /* isElemMatch */) << std::endl;
-}
-}  // namespace
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp b/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
deleted file mode 100644
index 1f11472c811..00000000000
--- a/src/mongo/db/query/ce/ce_dataflow_nodes_test.cpp
+++ /dev/null
@@ -1,227 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/ce_heuristic.h"
-#include "mongo/db/query/ce/ce_test_utils.h"
-#include "mongo/db/query/optimizer/props.h"
-#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
-#include "mongo/db/query/optimizer/utils/utils.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo::ce {
-namespace {
-
-using namespace optimizer;
-using namespace optimizer::cascades;
-
-constexpr double kCollCard = 1000.0;
-const std::string kCollName = "test";
-
-constexpr double kOtherCollCard = 200.0;
-const std::string kOtherCollName = "otherTest";
-
-constexpr double kThirdCollCard = 50.0;
-const std::string kThirdCollName = "thirdTest";
-
-class DataflowCETester : public CETester {
-public:
-    DataflowCETester() : CETester(kCollName, kCollCard, kDefaultCETestPhaseSet) {}
-
-protected:
-    std::unique_ptr<CEInterface> getCETransport() const override {
-        return std::make_unique<HeuristicCE>();
-    }
-};
-
-namespace {
-bool isRootNodeFn(const ABT& node) {
-    return node.is<optimizer::RootNode>();
-}
-}  // namespace
-
-TEST(CEDataflowTest, EstimateTrivialNodes) {
-    DataflowCETester t;
-    const auto matchCard = t.getMatchCE("{a: 1}", isRootNodeFn);
-
-    // Verify 'CollationNode' estimate returns the input cardinality.
-    ASSERT_CE(t, "[{$sort: {a: 1}}]", kCollCard);
-    ASSERT_CE(t, "[{$sort: {a: -1, b: 1}}]", kCollCard);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$sort: {a: 1, b: 1}}]", matchCard);
-
-    // Verify 'EvaluationNode' estimate.
-    ASSERT_CE(t, "[{$project: {a: {$add: [\"$a\", 1]}}}]", kCollCard);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$project: {a: {$add: [\"$a\", 1]}}}]", matchCard);
-}
-
-TEST(CEDataflowTest, EstimateUnionNode) {
-    auto makeUnionBranch = [](const std::string& collName) {
-        ProjectionName scanVar{"scan_" + collName};
-        auto scanNode = make<ScanNode>(scanVar, collName);
-        auto evalPath =
-            make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>(scanVar));
-        return make<EvaluationNode>("a", std::move(evalPath), std::move(scanNode));
-    };
-
-    // Verify that the estimate of 'UnionNode' always returns the sum of estimates of its children.
-    // In the following tests we force a simple plan to be generated by passing in a 'manually'
-    // constructed ABT.
-    {
-        DataflowCETester t;
-        t.addCollection(kOtherCollName, kOtherCollCard, {});
-        t.addCollection(kThirdCollName, kThirdCollCard, {});
-        {
-            auto unionNode = make<UnionNode>(
-                ProjectionNameVector{"a"},
-                makeSeq(makeUnionBranch(kCollName), makeUnionBranch(kOtherCollName)));
-            auto rootNode = make<RootNode>(
-                properties::ProjectionRequirement{ProjectionNameVector{"a"}}, std::move(unionNode));
-            ASSERT_CE(t, rootNode, kCollCard + kOtherCollCard);
-        }
-        {
-            auto unionNode = make<UnionNode>(
-                ProjectionNameVector{"a"},
-                makeSeq(makeUnionBranch(kCollName), makeUnionBranch(kOtherCollName)));
-            auto parentUnionNode =
-                make<UnionNode>(ProjectionNameVector{"a"},
-                                makeSeq(std::move(unionNode), makeUnionBranch(kThirdCollName)));
-            auto rootNode =
-                make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"a"}},
-                               std::move(parentUnionNode));
-            ASSERT_CE(t, rootNode, kCollCard + kOtherCollCard + kThirdCollCard);
-        }
-    }
-
-    // The following plans include a UnionNode.
-    {
-        DataflowCETester t;
-        t.setCollCard(2000);
-        t.setIndexes(
-            {{"indexA", makeIndexDefinition("a", CollationOp::Ascending, /* isMultiKey */ true)}});
-        t.setDisableScan(true);
-        ASSERT_MATCH_CE(t, {"{a: [12]}"}, 1);
-    }
-    {
-        DataflowCETester t;
-        t.setIndexes(
-            {{"indexA", makeIndexDefinition("a", CollationOp::Ascending, /* isMultiKey */ false)},
-             {"indexB", makeIndexDefinition("b", CollationOp::Ascending, /* isMultiKey */ false)}});
-        t.setDisableScan(true);
-        ASSERT_MATCH_CE(t, {"{a: 1, b: 2}"}, 5.62341);
-    }
-}
-
-TEST(CEDataflowTest, EstimateLimitSkipNode) {
-    DataflowCETester t;
-    const CEType matchCard = t.getMatchCE("{a: 1}", isRootNodeFn);
-
-    // Verify that 'LimitSkipNode' estimate with only a limit set is min(limit, inputCE).
-    ASSERT_CE(t, "[{$limit: 1}]", 1.0);
-    ASSERT_CE(t, "[{$limit: 50}]", 50.0);
-    ASSERT_CE(t, "[{$limit: 1000}]", kCollCard);
-    ASSERT_CE(t, "[{$limit: 10000}]", kCollCard);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$limit: 1}]", 1.0);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$limit: 5}]", 5.0);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$limit: 50}]", matchCard);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$limit: 1000}]", matchCard);
-
-    // Verify that 'LimitSkipNode' estimate with only a skip set is max(inputCE - skip, 0).
-    ASSERT_CE(t, "[{$skip: 0}]", kCollCard);
-    ASSERT_CE(t, "[{$skip: 1}]", kCollCard - 1.0);
-    ASSERT_CE(t, "[{$skip: 50}]", kCollCard - 50.0);
-    ASSERT_CE(t, "[{$skip: 1000}]", 0.0);
-    ASSERT_CE(t, "[{$skip: 10000}]", 0.0);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 1}]", matchCard - 1.0);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 5}]", matchCard - 5.0);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 50}]", 0.0);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 1000}]", 0.0);
-
-    // Test estimates for combinations of $limit & $skip.
-    ASSERT_CE(t, "[{$limit: 1}, {$skip: 1}]", 0.0);
-    ASSERT_CE(t, "[{$skip: 1}, {$limit: 1}]", 1.0);
-    ASSERT_CE(t, "[{$limit: 1}, {$skip: 50}]", 0.0);
-    ASSERT_CE(t, "[{$skip: 50}, {$limit: 1}]", 1.0);
-    ASSERT_CE(t, "[{$limit: 50}, {$skip: 1}]", 49.0);
-    ASSERT_CE(t, "[{$skip: 1}, {$limit: 50}]", 50.0);
-    ASSERT_CE(t, "[{$limit: 50}, {$skip: 50}]", 0.0);
-    ASSERT_CE(t, "[{$skip: 50}, {$limit: 50}]", 50.0);
-    ASSERT_CE(t, "[{$limit: 1000}, {$skip: 50}]", kCollCard - 50.0);
-    ASSERT_CE(t, "[{$skip: 50}, {$limit: 1000}]", kCollCard - 50.0);
-    ASSERT_CE(t, "[{$limit: 50}, {$skip: 1000}]", 0.0);
-    ASSERT_CE(t, "[{$skip: 1000}, {$limit: 50}]", 0.0);
-    ASSERT_CE(t, "[{$limit: 1000}, {$skip: 1000}]", 0.0);
-    ASSERT_CE(t, "[{$skip: 1000}, {$limit: 1000}]", 0.0);
-
-    // Test estimates for combinations of $limit & $skip separated by a $match.
-    ASSERT_CE(t, "[{$limit: 1}, {$match: {a: 1}}, {$skip: 1}]", 0.0);
-    ASSERT_CE(t, "[{$limit: 1}, {$match: {a: 1}}, {$skip: 50}]", 0.0);
-
-    // Input card to $match: 50. $match selectivity here is sqrt(50)/50.
-    ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1}]", 6.07107);
-    ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 50}]", 0.0);
-    ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1000}]", 0.0);
-
-    // Input card to $match is kCollCard. However, our estimate is larger than matchCard because we
-    // have a FilterNode that does not get converted to a SargableNode in this case. The $match
-    // selectivity here is sqrt(1000)/1000.
-    ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1}]", 30.6228);
-    ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 20}]", 11.6228);
-    ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1000}]", 0.0);
-
-    // Input card to $match: 999. $match selectivity here is sqrt(999)/999.
-    ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1}]", 1.0);
-    ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 20}]", 20.0);
-    ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1000}]", 31.607);
-
-    // Input card to $match: 950. $match selectivity here is sqrt(950)/950.
-    ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1}]", 1.0);
-    ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 20}]", 20.0);
-    ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1000}]", 30.8221);
-
-    // Input card to $match is 0.0.
-    ASSERT_CE(t, "[{$skip: 1000}, {$match: {a: 1}}, {$limit: 50}]", 0.0);
-    ASSERT_CE(t, "[{$skip: 1000}, {$match: {a: 1}}, {$limit: 1000}]", 0.0);
-}
-
-TEST(CEDataflowTest, EstimateUnwindNode) {
-    DataflowCETester t;
-    const CEType matchCard = t.getMatchCE("{a: 1}", isRootNodeFn);
-
-    // We assume that arrays on average have ~10 elements, so we estimate this as inputCard*10.
-    ASSERT_CE(t, "[{$unwind: '$a'}]", 10 * kCollCard);
-    ASSERT_CE(t, "[{$match: {a: 1}}, {$unwind: '$a'}]", 10 * matchCard);
-    ASSERT_CE(t, "[{$unwind: {path: '$a', preserveNullAndEmptyArrays: true}}]", 10 * kCollCard);
-    ASSERT_CE(t,
-              "[{$match: {a: 1}}, {$unwind: {path: '$a', preserveNullAndEmptyArrays: true}}]",
-              10 * matchCard);
-
-    // TODO SERVER-70035: implement histogram estimation of $unwind.
-}
-
-}  // namespace
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_edge_cases_test.cpp b/src/mongo/db/query/ce/ce_edge_cases_test.cpp
deleted file mode 100644
index 4d8d84f831a..00000000000
--- a/src/mongo/db/query/ce/ce_edge_cases_test.cpp
+++ /dev/null
@@ -1,1002 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/pipeline/abt/utils.h"
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/ce_test_utils.h"
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/db/query/ce/maxdiff_test_utils.h"
-#include "mongo/db/query/ce/value_utils.h"
-#include "mongo/db/query/optimizer/utils/ce_math.h"
-#include "mongo/db/query/sbe_stage_builder_helpers.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo::ce {
-namespace {
-
-using namespace sbe;
-
-constexpr double kErrorBound = 0.01;
-
-TEST(EstimatorTest, OneBucketIntHistogram) {
-    // Data set of 10 values, each with frequency 3, in the range (-inf, 100].
-    // Example: { -100, -20, 0, 20, 50, 60, 70, 80, 90, 100}.
-    std::vector<BucketData> data{{100, 3.0, 27.0, 9.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(30.0, getTotals(hist).card);
-
-    // Estimates with the bucket bound.
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
-    ASSERT_EQ(27.0, estimateIntValCard(hist, 100, EstimationType::kLess));
-    ASSERT_EQ(30.0, estimateIntValCard(hist, 100, EstimationType::kLessOrEqual));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kGreaterOrEqual));
-
-    // Estimates with a value inside the bucket.
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
-    // No interpolation possible for estimates of inequalities in a single bucket. The estimates
-    // are based on the default cardinality of half bucket +/- the estimate of equality inside of
-    // the bucket.
-    ASSERT_EQ(10.5, estimateIntValCard(hist, 10, EstimationType::kLess));
-    ASSERT_EQ(13.5, estimateIntValCard(hist, 10, EstimationType::kLessOrEqual));
-    ASSERT_EQ(16.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
-    ASSERT_EQ(19.5, estimateIntValCard(hist, 10, EstimationType::kGreaterOrEqual));
-
-    // Estimates for a value larger than the last bucket bound.
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
-    ASSERT_EQ(30.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
-    ASSERT_EQ(30.0, estimateIntValCard(hist, 1000, EstimationType::kLessOrEqual));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreaterOrEqual));
-}
-
-TEST(EstimatorTest, OneExclusiveBucketIntHistogram) {
-    // Data set of a single value.
-    // By exclusive bucket we mean a bucket with only boundary, that is the range frequency and NDV
-    // are zero.
-    std::vector<BucketData> data{{100, 2.0, 0.0, 0.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(2.0, getTotals(hist).card);
-
-    // Estimates with the bucket boundary.
-    ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kLess));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
-
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 0, EstimationType::kEqual));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 0, EstimationType::kLess));
-    ASSERT_EQ(2.0, estimateIntValCard(hist, 0, EstimationType::kGreater));
-
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
-    ASSERT_EQ(2.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
-}
-
-TEST(EstimatorTest, OneBucketTwoIntValuesHistogram) {
-    // Data set of two values, example {5, 100, 100}.
-    std::vector<BucketData> data{{100, 2.0, 1.0, 1.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(3.0, getTotals(hist).card);
-
-    // Estimates with the bucket boundary.
-    ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
-    ASSERT_EQ(1.0, estimateIntValCard(hist, 100, EstimationType::kLess));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
-
-    ASSERT_EQ(1.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
-    // Default estimate of half of the bucket's range frequency = 0.5.
-    ASSERT_EQ(0.5, estimateIntValCard(hist, 10, EstimationType::kLess));
-    ASSERT_EQ(2.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
-
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
-}
-
-TEST(EstimatorTest, OneBucketTwoIntValuesHistogram2) {
-    // Similar to the above test with higher frequency for the second value.
-    // Example {5, 5, 5, 100, 100}.
-    std::vector<BucketData> data{{100, 2.0, 3.0, 1.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(5.0, getTotals(hist).card);
-
-    // Estimates with the bucket boundary.
-    ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kLess));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
-
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
-    // Default estimate of half of the bucket's range frequency = 1.5.
-    ASSERT_EQ(1.5, estimateIntValCard(hist, 10, EstimationType::kLess));
-    ASSERT_EQ(3.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
-
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
-    ASSERT_EQ(5.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
-}
-
-TEST(EstimatorTest, TwoBucketsIntHistogram) {
-    // Data set of 10 values in the range [1, 100].
-    std::vector<BucketData> data{{1, 1.0, 0.0, 0.0}, {100, 3.0, 26.0, 8.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(30.0, getTotals(hist).card);
-
-    // Estimates for a value smaller than the first bucket.
-    ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kEqual));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kLess));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kLessOrEqual));
-    ASSERT_EQ(30.0, estimateIntValCard(hist, -42, EstimationType::kGreater));
-    ASSERT_EQ(30.0, estimateIntValCard(hist, -42, EstimationType::kGreaterOrEqual));
-
-    // Estimates with bucket bounds.
-    ASSERT_EQ(1.0, estimateIntValCard(hist, 1, EstimationType::kEqual));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 1, EstimationType::kLess));
-    ASSERT_EQ(1.0, estimateIntValCard(hist, 1, EstimationType::kLessOrEqual));
-    ASSERT_EQ(29.0, estimateIntValCard(hist, 1, EstimationType::kGreater));
-    ASSERT_EQ(30.0, estimateIntValCard(hist, 1, EstimationType::kGreaterOrEqual));
-
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
-    ASSERT_EQ(27.0, estimateIntValCard(hist, 100, EstimationType::kLess));
-    ASSERT_EQ(30.0, estimateIntValCard(hist, 100, EstimationType::kLessOrEqual));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kGreaterOrEqual));
-
-    // Estimates with a value inside the bucket. The estimates use interpolation.
-    // The bucket ratio for the value of 10 is smaller than the estimate for equality
-    // and the estimates for Less and LessOrEqual are the same.
-    ASSERT_APPROX_EQUAL(3.25, estimateIntValCard(hist, 10, EstimationType::kEqual), kErrorBound);
-    ASSERT_APPROX_EQUAL(3.36, estimateIntValCard(hist, 10, EstimationType::kLess), kErrorBound);
-    ASSERT_APPROX_EQUAL(
-        3.36, estimateIntValCard(hist, 10, EstimationType::kLessOrEqual), kErrorBound);
-
-    ASSERT_APPROX_EQUAL(26.64, estimateIntValCard(hist, 10, EstimationType::kGreater), kErrorBound);
-    ASSERT_APPROX_EQUAL(
-        26.64, estimateIntValCard(hist, 10, EstimationType::kGreaterOrEqual), kErrorBound);
-
-    // Different estimates for Less and LessOrEqual for the value of 50.
-    ASSERT_APPROX_EQUAL(3.25, estimateIntValCard(hist, 50, EstimationType::kEqual), kErrorBound);
-    ASSERT_APPROX_EQUAL(10.61, estimateIntValCard(hist, 50, EstimationType::kLess), kErrorBound);
-    ASSERT_APPROX_EQUAL(
-        13.87, estimateIntValCard(hist, 50, EstimationType::kLessOrEqual), kErrorBound);
-    ASSERT_APPROX_EQUAL(16.13, estimateIntValCard(hist, 50, EstimationType::kGreater), kErrorBound);
-    ASSERT_APPROX_EQUAL(
-        19.38, estimateIntValCard(hist, 50, EstimationType::kGreaterOrEqual), kErrorBound);
-}
-
-TEST(EstimatorTest, ThreeExclusiveBucketsIntHistogram) {
-    std::vector<BucketData> data{{1, 1.0, 0.0, 0.0}, {10, 8.0, 0.0, 0.0}, {100, 1.0, 0.0, 0.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(10.0, getTotals(hist).card);
-
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 5, EstimationType::kEqual));
-    ASSERT_EQ(1.0, estimateIntValCard(hist, 5, EstimationType::kLess));
-    ASSERT_EQ(1.0, estimateIntValCard(hist, 5, EstimationType::kLessOrEqual));
-    ASSERT_EQ(9.0, estimateIntValCard(hist, 5, EstimationType::kGreater));
-    ASSERT_EQ(9.0, estimateIntValCard(hist, 5, EstimationType::kGreaterOrEqual));
-}
-TEST(EstimatorTest, OneBucketStrHistogram) {
-    std::vector<BucketData> data{{"xyz", 3.0, 27.0, 9.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(30.0, getTotals(hist).card);
-
-    // Estimates with bucket bound.
-    auto [tag, value] = value::makeNewString("xyz"_sd);
-    value::ValueGuard vg(tag, value);
-    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(3.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(27.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_EQ(30.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_EQ(3.0, expectedCard);
-
-    // Estimates for a value inside the bucket. Since there is no low value bound in the histogram
-    // all values smaller than the upper bound will be estimated the same way using half of the
-    // bucket cardinality.
-    std::tie(tag, value) = value::makeNewString("a"_sd);
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(3.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(10.5, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_EQ(13.5, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(16.5, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_EQ(19.5, expectedCard);
-
-    std::tie(tag, value) = value::makeNewString(""_sd);
-    // In the special case of a single string bucket, we estimate empty string equality as for any
-    // other string value. In practice if there are at least 2 buckets for the string data and an
-    // empty string in the data set, it will be chosen as a bound for the first bucket and produce
-    // precise estimates.
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(3.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_EQ(30.0, expectedCard);
-
-    // Estimates for a value larger than the upper bound.
-    std::tie(tag, value) = value::makeNewString("z"_sd);
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(30.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-}
-
-TEST(EstimatorTest, TwoBucketsStrHistogram) {
-    // Data set of 100 strings in the range ["abc", "xyz"], with average frequency of 2.
-    std::vector<BucketData> data{{"abc", 2.0, 0.0, 0.0}, {"xyz", 3.0, 95.0, 48.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(100.0, getTotals(hist).card);
-
-    // Estimates for a value smaller than the first bucket bound.
-    auto [tag, value] = value::makeNewString("a"_sd);
-    value::ValueGuard vg(tag, value);
-
-    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(100.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_EQ(100.0, expectedCard);
-
-    // Estimates with bucket bounds.
-    std::tie(tag, value) = value::makeNewString("abc"_sd);
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(2.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_EQ(2.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(98.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_EQ(100.0, expectedCard);
-
-    std::tie(tag, value) = value::makeNewString("xyz"_sd);
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(3.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(97.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_EQ(100.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_EQ(3.0, expectedCard);
-
-    // Estimates for a value inside the bucket.
-    std::tie(tag, value) = value::makeNewString("sun"_sd);
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(1.98, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(74.39, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_APPROX_EQUAL(76.37, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_APPROX_EQUAL(23.64, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_APPROX_EQUAL(25.62, expectedCard, kErrorBound);
-
-    // Estimate for a value very close to the bucket bound.
-    std::tie(tag, value) = value::makeNewString("xyw"_sd);
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(1.98, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(95.02, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_APPROX_EQUAL(96.99, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_APPROX_EQUAL(4.98, expectedCard, kErrorBound);
-}
-
-TEST(EstimatorTest, TwoBucketsDateHistogram) {
-    // June 6, 2017 -- June 7, 2017.
-    const int64_t startInstant = 1496777923000LL;
-    const int64_t endInstant = 1496864323000LL;
-    const auto startDate = Date_t::fromMillisSinceEpoch(startInstant);
-    const auto endDate = Date_t::fromMillisSinceEpoch(endInstant);
-
-    std::vector<BucketData> data{{Value(startDate), 3.0, 0.0, 0.0},
-                                 {Value(endDate), 1.0, 96.0, 48.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(100.0, getTotals(hist).card);
-
-    const auto valueBefore = value::bitcastFrom<int64_t>(startInstant - 1);
-    double expectedCard =
-        estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard =
-        estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kGreater).card;
-    ASSERT_EQ(100.0, expectedCard);
-
-    const auto valueStart = value::bitcastFrom<int64_t>(startInstant);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kEqual).card;
-    ASSERT_EQ(3.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kGreater).card;
-    ASSERT_EQ(97.0, expectedCard);
-
-    const auto valueEnd = value::bitcastFrom<int64_t>(endInstant);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kLess).card;
-    ASSERT_EQ(99.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-
-    const auto valueIn = value::bitcastFrom<int64_t>(startInstant + 43000000);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kEqual).card;
-    ASSERT_EQ(2.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(48.77, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kGreater).card;
-    ASSERT_APPROX_EQUAL(49.22, expectedCard, kErrorBound);
-
-    const auto valueAfter = value::bitcastFrom<int64_t>(endInstant + 100);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kLess).card;
-    ASSERT_EQ(100.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-}
-
-TEST(EstimatorTest, TwoBucketsTimestampHistogram) {
-    // June 6, 2017 -- June 7, 2017 in seconds.
-    const int64_t startInstant = 1496777923LL;
-    const int64_t endInstant = 1496864323LL;
-    const Timestamp startTs{Seconds(startInstant), 0};
-    const Timestamp endTs{Seconds(endInstant), 0};
-
-    std::vector<BucketData> data{{Value(startTs), 3.0, 0.0, 0.0}, {Value(endTs), 1.0, 96.0, 48.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(100.0, getTotals(hist).card);
-
-    const auto valueBefore = value::bitcastFrom<int64_t>(startTs.asULL() - 1);
-    double expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kGreater).card;
-    ASSERT_EQ(100.0, expectedCard);
-
-    const auto valueStart = value::bitcastFrom<int64_t>(
-        startTs.asULL());  // NB: startTs.asInt64() produces different value.
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kEqual).card;
-    ASSERT_EQ(3.0, expectedCard);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kGreater).card;
-    ASSERT_EQ(97.0, expectedCard);
-
-    const auto valueEnd = value::bitcastFrom<int64_t>(endTs.asULL());
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kLess).card;
-    ASSERT_EQ(99.0, expectedCard);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-
-    const auto valueIn = value::bitcastFrom<int64_t>((startTs.asULL() + endTs.asULL()) / 2);
-    expectedCard = estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kEqual).card;
-    ASSERT_EQ(2.0, expectedCard);
-    expectedCard = estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(49.0, expectedCard, kErrorBound);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kGreater).card;
-    ASSERT_APPROX_EQUAL(49.0, expectedCard, kErrorBound);
-
-    const auto valueAfter = value::bitcastFrom<int64_t>(endTs.asULL() + 100);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kLess).card;
-    ASSERT_EQ(100.0, expectedCard);
-    expectedCard =
-        estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-}
-
-TEST(EstimatorTest, TwoBucketsObjectIdHistogram) {
-    const auto startOid = OID("63340d8d27afef2de7357e8d");
-    const auto endOid = OID("63340dbed6cd8af737d4139a");
-    ASSERT_TRUE(startOid < endOid);
-
-    std::vector<BucketData> data{{Value(startOid), 2.0, 0.0, 0.0},
-                                 {Value(endOid), 1.0, 97.0, 77.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(100.0, getTotals(hist).card);
-
-    auto [tag, value] = value::makeNewObjectId();
-    value::ValueGuard vg(tag, value);
-    const auto oidBefore = OID("63340d8d27afef2de7357e8c");
-    oidBefore.view().readInto(value::getObjectIdView(value));
-
-    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(100.0, expectedCard);
-
-    // Bucket bounds.
-    startOid.view().readInto(value::getObjectIdView(value));
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(2.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(98.0, expectedCard);
-
-    endOid.view().readInto(value::getObjectIdView(value));
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(99.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-
-    // ObjectId value inside the bucket.
-    const auto oidInside = OID("63340db2cd4d46ff39178e9d");
-    oidInside.view().readInto(value::getObjectIdView(value));
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(1.25, expectedCard, kErrorBound);
-
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(83.95, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_APPROX_EQUAL(14.78, expectedCard, kErrorBound);
-
-    const auto oidAfter = OID("63340dbed6cd8af737d4139b");
-    oidAfter.view().readInto(value::getObjectIdView(value));
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(0.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(100.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-}
-
-TEST(EstimatorTest, TwoExclusiveBucketsMixedHistogram) {
-    // Data set of mixed data types: 3 integers and 5 strings.
-    std::vector<BucketData> data{{1, 3.0, 0.0, 0.0}, {"abc", 5.0, 0.0, 0.0}};
-    const ScalarHistogram hist = createHistogram(data);
-    const ArrayHistogram arrHist(
-        hist, TypeCounts{{value::TypeTags::NumberInt64, 3}, {value::TypeTags::StringSmall, 5}});
-
-    const auto [tagLowDbl, valLowDbl] =
-        std::make_pair(value::TypeTags::NumberDouble,
-                       value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN()));
-
-    // (NaN, 1).
-    double expectedCard = estimateCardRange(arrHist,
-                                            false /* lowInclusive */,
-                                            tagLowDbl,
-                                            valLowDbl,
-                                            false /* highInclusive */,
-                                            value::TypeTags::NumberInt32,
-                                            value::bitcastFrom<int64_t>(1),
-                                            true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(0.0, expectedCard, kErrorBound);
-
-    // (NaN, 5).
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     tagLowDbl,
-                                     valLowDbl,
-                                     false /* highInclusive */,
-                                     value::TypeTags::NumberInt32,
-                                     value::bitcastFrom<int64_t>(5),
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound);
-
-    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
-    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
-    auto [tag, value] = value::makeNewString("a"_sd);
-    value::ValueGuard vg(tag, value);
-
-    // [0, "").
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     value::TypeTags::NumberInt32,
-                                     value::bitcastFrom<int64_t>(0),
-                                     false /* highInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound);
-
-    // ["", "a"].
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* highInclusive */,
-                                     tag,
-                                     value,
-                                     true /* includeScalar */);
-
-    ASSERT_APPROX_EQUAL(0.0, expectedCard, kErrorBound);
-
-    std::tie(tag, value) = value::makeNewString("xyz"_sd);
-    // ["", "xyz"].
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* highInclusive */,
-                                     tag,
-                                     value,
-                                     true /* includeScalar */);
-
-    ASSERT_APPROX_EQUAL(5.0, expectedCard, kErrorBound);
-}
-
-TEST(EstimatorTest, TwoBucketsMixedHistogram) {
-    // Data set of mixed data types: 20 integers and 80 strings.
-    // Histogram with one bucket per data type.
-    std::vector<BucketData> data{{100, 3.0, 17.0, 9.0}, {"pqr", 5.0, 75.0, 25.0}};
-    const ScalarHistogram hist = createHistogram(data);
-    const ArrayHistogram arrHist(
-        hist, TypeCounts{{value::TypeTags::NumberInt64, 20}, {value::TypeTags::StringSmall, 80}});
-
-    ASSERT_EQ(100.0, getTotals(hist).card);
-
-    // Estimates with the bucket bounds.
-    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
-    ASSERT_EQ(17.0, estimateIntValCard(hist, 100, EstimationType::kLess));
-    ASSERT_EQ(80.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
-
-    auto [tag, value] = value::makeNewString("pqr"_sd);
-    value::ValueGuard vg(tag, value);
-    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_EQ(5.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_EQ(95.0, expectedCard);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_EQ(0.0, expectedCard);
-
-    // Estimates for a value smaller than the first bucket bound.
-    ASSERT_APPROX_EQUAL(1.88, estimateIntValCard(hist, 50, EstimationType::kEqual), kErrorBound);
-    ASSERT_APPROX_EQUAL(6.61, estimateIntValCard(hist, 50, EstimationType::kLess), kErrorBound);
-    ASSERT_APPROX_EQUAL(
-        8.49, estimateIntValCard(hist, 50, EstimationType::kLessOrEqual), kErrorBound);
-    ASSERT_APPROX_EQUAL(91.5, estimateIntValCard(hist, 50, EstimationType::kGreater), kErrorBound);
-    ASSERT_APPROX_EQUAL(
-        93.39, estimateIntValCard(hist, 50, EstimationType::kGreaterOrEqual), kErrorBound);
-
-    // Estimates for a value between bucket bounds.
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 105, EstimationType::kEqual));
-
-    std::tie(tag, value) = value::makeNewString("a"_sd);
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(54.5, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_APPROX_EQUAL(57.5, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
-    ASSERT_APPROX_EQUAL(42.5, expectedCard, kErrorBound);
-    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
-    ASSERT_APPROX_EQUAL(45.5, expectedCard, kErrorBound);
-
-    // Range estimates, including min/max values per data type.
-    const auto [tagLowDbl, valLowDbl] =
-        std::make_pair(value::TypeTags::NumberDouble,
-                       value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN()));
-    const auto [tagHighInt, valHighInt] =
-        std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(1000000));
-
-    // [NaN, 25].
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowDbl,
-                                     valLowDbl,
-                                     true /* highInclusive */,
-                                     value::TypeTags::NumberInt32,
-                                     value::bitcastFrom<int64_t>(25),
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(8.49, expectedCard, kErrorBound);
-
-    // [25, 1000000].
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     value::TypeTags::NumberInt32,
-                                     value::bitcastFrom<int64_t>(25),
-                                     true /* highInclusive */,
-                                     tagHighInt,
-                                     valHighInt,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(13.38, expectedCard, kErrorBound);
-
-    // [NaN, 1000000].
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowDbl,
-                                     valLowDbl,
-                                     true /* highInclusive */,
-                                     tagHighInt,
-                                     valHighInt,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(20.0, expectedCard, kErrorBound);
-
-    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
-    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
-
-    // [NaN, "").
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowDbl,
-                                     valLowDbl,
-                                     false /* highInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(20.0, expectedCard, kErrorBound);
-
-    // [25, "").
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     value::TypeTags::NumberInt32,
-                                     value::bitcastFrom<int64_t>(25),
-                                     false /* highInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(13.39, expectedCard, kErrorBound);
-
-    // ["", "a"].
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* highInclusive */,
-                                     tag,
-                                     value,
-                                     true /* includeScalar */);
-
-    ASSERT_APPROX_EQUAL(37.49, expectedCard, kErrorBound);
-
-    // ["", {}).
-    auto [tagObj, valObj] = value::makeNewObject();
-    value::ValueGuard vgObj(tagObj, valObj);
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     false /* highInclusive */,
-                                     tagObj,
-                                     valObj,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(80.0, expectedCard, kErrorBound);
-
-    // ["a", {}).
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tag,
-                                     value,
-                                     false /* highInclusive */,
-                                     tagObj,
-                                     valObj,
-                                     true /* includeScalar */);
-
-    ASSERT_APPROX_EQUAL(45.5, expectedCard, kErrorBound);
-}
-
-// TODO: enable the following test after SERVER-71376 Fix histogram generation on MacOs
-#if 0
-/**
- * Tests for cardinality estimates for queries over minimum values of date, timestamp, and objectId
- * types. When the histogram has at least 2 buckets per data type, the minimum value, if present in
- * the data, is picked as a bound for the first bucket for the corresponding data type. In this case
- * the cardinality estimates are precise. To test the approximate estimation, we force the histogram
- * generation to use one bucket per type (except the first numeric type).
- */
-TEST(EstimatorTest, MinValueMixedHistogramFromData) {
-    const int64_t startInstant = 1506777923000LL;
-    const int64_t endInstant = 1516864323000LL;
-    const Timestamp startTs{Seconds(1516864323LL), 0};
-    const Timestamp endTs{Seconds(1526864323LL), 0};
-    const auto startOid = OID("63340d8d27afef2de7357e8d");
-    //    const auto endOid = OID("63340dbed6cd8af737d4139a");
-
-    std::vector<SBEValue> data;
-    data.emplace_back(value::TypeTags::Date, value::bitcastFrom<int64_t>(startInstant));
-    data.emplace_back(value::TypeTags::Date, value::bitcastFrom<int64_t>(endInstant));
-
-    data.emplace_back(value::TypeTags::Timestamp, value::bitcastFrom<int64_t>(startTs.asULL()));
-    data.emplace_back(value::TypeTags::Timestamp, value::bitcastFrom<int64_t>(endTs.asULL()));
-
-    auto [tag, val] = makeInt64Value(100);
-    data.emplace_back(tag, val);
-    std::tie(tag, val) = makeInt64Value(1000);
-    data.emplace_back(tag, val);
-
-    auto [strTag, strVal] = value::makeNewString("abc"_sd);
-    value::ValueGuard strVG(strTag, strVal);
-    auto [copyTag, copyVal] = value::copyValue(strTag, strVal);
-    data.emplace_back(copyTag, copyVal);
-    std::tie(strTag, strVal) = value::makeNewString("xyz"_sd);
-    std::tie(copyTag, copyVal) = value::copyValue(strTag, strVal);
-    data.emplace_back(copyTag, copyVal);
-
-    auto [objTag, objVal] = value::makeNewObjectId();
-    value::ValueGuard objVG(objTag, objVal);
-    startOid.view().readInto(value::getObjectIdView(objVal));
-    std::tie(tag, val) = copyValue(objTag, objVal);
-    data.emplace_back(tag, val);
-    /* TODO: add another objectId value when mapping to double is fixed by SERVER-71205.
-        endOid.view().readInto(value::getObjectIdView(objVal));
-        std::tie(tag, val) = copyValue(objTag, objVal);
-        data.emplace_back(tag, val);
-    */
-
-    sortValueVector(data);
-
-    // Force each type except numbers to use a single bucket. This way there is no bucket for the
-    // min value if present in the data and it needs to be estimated.
-    const ScalarHistogram& hist = makeHistogram(data, 6);
-    // Mixed data are sorted in the histogram according to the BSON order as defined in bsontypes.h
-    // the canonicalizeBSONTypeUnsafeLookup function.
-    if constexpr (kCETestLogOnly) {
-        std::cout << printValueArray(data) << "\n";
-        std::cout << "Mixed types " << hist.dump();
-    }
-
-    // Minimum ObjectId.
-    auto&& [minOid, inclOid] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::ObjectId);
-    auto [minOidTag, minOidVal] = minOid->cast<mongo::optimizer::Constant>()->get();
-    double expectedCard = estimate(hist, minOidTag, minOidVal, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-
-    // Minimum date.
-    const auto&& [minDate, inclDate] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Date);
-    const auto [minDateTag, minDateVal] = minDate->cast<mongo::optimizer::Constant>()->get();
-    expectedCard = estimate(hist, minDateTag, minDateVal, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-
-    // Minimum timestamp.
-    auto&& [minTs, inclTs] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Timestamp);
-    auto [minTsTag, minTsVal] = minTs->cast<mongo::optimizer::Constant>()->get();
-    expectedCard = estimate(hist, minTsTag, minTsVal, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-
-    // Add minimum values to the data set and create another histogram.
-    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
-    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
-    std::tie(copyTag, copyVal) = value::copyValue(tagLowStr, valLowStr);
-    data.emplace_back(copyTag, copyVal);
-    data.emplace_back(minDateTag, minDateVal);
-    data.emplace_back(minTsTag, minTsVal);
-
-    sortValueVector(data);
-    const ScalarHistogram& hist2 = makeHistogram(data, 6);
-    if constexpr (kCETestLogOnly) {
-        std::cout << printValueArray(data) << "\n";
-        std::cout << "Mixed types " << hist2.dump();
-    }
-
-    // Precise estimate for equality to empty string, it is a bucket boundary.
-    expectedCard = estimate(hist2, tagLowStr, valLowStr, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-    // Equality to the minimum date/ts value is estimated by range_frequency/NDV.
-    expectedCard = estimate(hist2, minDateTag, minDateVal, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-    expectedCard = estimate(hist2, minTsTag, minTsVal, EstimationType::kEqual).card;
-    ASSERT_EQ(1.0, expectedCard);
-
-    // Inequality predicates using min values.
-    const ArrayHistogram arrHist(hist2,
-                                 TypeCounts{
-                                     {value::TypeTags::NumberInt64, 2},
-                                     {value::TypeTags::StringSmall, 3},
-                                     {value::TypeTags::ObjectId, 1},
-                                     {value::TypeTags::Date, 3},
-                                     {value::TypeTags::Timestamp, 3},
-                                 });
-    // [minDate, startInstant], estimated by the half of the date bucket.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     minDateTag,
-                                     minDateVal,
-                                     true /* highInclusive */,
-                                     value::TypeTags::Date,
-                                     value::bitcastFrom<int64_t>(startInstant),
-                                     true /* includeScalar */);
-    ASSERT_EQ(1.0, expectedCard);
-
-    // [minDate, endInstant], estimated by the entire date bucket.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     minDateTag,
-                                     minDateVal,
-                                     true /* highInclusive */,
-                                     value::TypeTags::Date,
-                                     value::bitcastFrom<int64_t>(endInstant),
-                                     true /* includeScalar */);
-    ASSERT_EQ(3.0, expectedCard);
-
-    // [minDate, minTs), estimated by the entire date bucket.
-    // (is this interval possible or is it better to have maxDate upper bound?).
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     minDateTag,
-                                     minDateVal,
-                                     false /* highInclusive */,
-                                     minTsTag,
-                                     minTsVal,
-                                     true /* includeScalar */);
-    ASSERT_EQ(3.0, expectedCard);
-
-    // [minTs, startTs], estimated by the half of the timestamp bucket.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     minTsTag,
-                                     minTsVal,
-                                     true /* highInclusive */,
-                                     value::TypeTags::Timestamp,
-                                     value::bitcastFrom<int64_t>(startTs.asULL()),
-                                     true /* includeScalar */);
-    ASSERT_EQ(1.0, expectedCard);
-
-    // [minTs, endTs], estimated by the entire timestamp bucket.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     minTsTag,
-                                     minTsVal,
-                                     true /* highInclusive */,
-                                     value::TypeTags::Timestamp,
-                                     value::bitcastFrom<int64_t>(endTs.asULL()),
-                                     true /* includeScalar */);
-    ASSERT_EQ(3.0, expectedCard);
-
-    // [minTs, maxTs], estimated by the entire timestamp bucket.
-    auto&& [maxTs, inclMaxTs] = getMinMaxBoundForType(false /*isMin*/, value::TypeTags::Timestamp);
-    const auto [maxTsTag, maxTsVal] = maxTs->cast<mongo::optimizer::Constant>()->get();
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     minTsTag,
-                                     minTsVal,
-                                     true /* highInclusive */,
-                                     maxTsTag,
-                                     maxTsVal,
-                                     true /* includeScalar */);
-    ASSERT_EQ(3.0, expectedCard);
-}
-#endif
-
-TEST(EstimatorTest, MinValueMixedHistogramFromBuckets) {
-    const auto endOid = OID("63340dbed6cd8af737d4139a");
-    const auto endDate = Date_t::fromMillisSinceEpoch(1526864323000LL);
-    const Timestamp endTs{Seconds(1526864323LL), 0};
-
-    std::vector<BucketData> data{
-        {0, 1.0, 0.0, 0.0},
-        {100, 4.0, 95.0, 30.0},
-        {"xyz", 5.0, 95.0, 25.0},
-        {Value(endOid), 5.0, 95.0, 50.0},
-        {Value(endDate), 4.0, 96.0, 24.0},
-        {Value(endTs), 5.0, 95.0, 50.0},
-    };
-    const ScalarHistogram hist = createHistogram(data);
-    if constexpr (kCETestLogOnly) {
-        std::cout << "Mixed types " << hist.dump();
-    }
-    ASSERT_EQ(500.0, getTotals(hist).card);
-
-    // Minimum ObjectId.
-    auto&& [minOid, inclOid] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::ObjectId);
-    auto [minOidTag, minOidVal] = minOid->cast<mongo::optimizer::Constant>()->get();
-    double expectedCard = estimate(hist, minOidTag, minOidVal, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(1.9, expectedCard, kErrorBound);
-
-    // Minimum date.
-    const auto&& [minDate, inclDate] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Date);
-    const auto [minDateTag, minDateVal] = minDate->cast<mongo::optimizer::Constant>()->get();
-    expectedCard = estimate(hist, minDateTag, minDateVal, EstimationType::kEqual).card;
-    ASSERT_EQ(4.0, expectedCard);
-
-    // Minimum timestamp.
-    auto&& [minTs, inclTs] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Timestamp);
-    auto [minTsTag, minTsVal] = minTs->cast<mongo::optimizer::Constant>()->get();
-    expectedCard = estimate(hist, minTsTag, minTsVal, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(1.9, expectedCard, kErrorBound);
-
-    // Inequality predicates using min values.
-    const ArrayHistogram arrHist(hist,
-                                 TypeCounts{
-                                     {value::TypeTags::NumberInt64, 100},
-                                     {value::TypeTags::StringSmall, 100},
-                                     {value::TypeTags::ObjectId, 100},
-                                     {value::TypeTags::Date, 100},
-                                     {value::TypeTags::Timestamp, 100},
-                                 });
-    // [minDate, innerDate], estimated by the half of the date bucket.
-    const int64_t innerDate = 1516864323000LL;
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     minDateTag,
-                                     minDateVal,
-                                     true /* highInclusive */,
-                                     value::TypeTags::Date,
-                                     value::bitcastFrom<int64_t>(innerDate),
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(48.0, expectedCard, kErrorBound);
-
-    // [minTs, innerTs], estimated by the half of the timestamp bucket.
-    const Timestamp innerTs{Seconds(1516864323LL), 0};
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     minTsTag,
-                                     minTsVal,
-                                     true /* highInclusive */,
-                                     value::TypeTags::Timestamp,
-                                     value::bitcastFrom<int64_t>(innerTs.asULL()),
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(47.5, expectedCard, kErrorBound);
-}
-}  // namespace
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_generated_histograms_test.cpp b/src/mongo/db/query/ce/ce_generated_histograms_test.cpp
deleted file mode 100644
index 93696346447..00000000000
--- a/src/mongo/db/query/ce/ce_generated_histograms_test.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include <string>
-#include <vector>
-
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/ce_test_utils.h"
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo::ce {
-namespace {
-
-using namespace sbe;
-
-constexpr double kErrorBound = 0.1;
-
-TEST(EstimatorTest, UniformIntStrEstimate) {
-    /* The code in this comment generates a dataset and creates the histogram used in this test. To
-    recreate the data set and the histogram, place this code in a unit test which uses the utilities
-    from rand_utils_new.cpp.
-
-    constexpr int minLen = 3, maxLen = 5;
-    constexpr int minVal = 0, maxVal = 1000;
-    constexpr size_t dataSize = 1000;
-    constexpr size_t nBuckets = std::min(20UL, dataSize);
-
-    MixedDistributionDescriptor dd{{DistrType::kUniform, 1.0}};
-    TypeDistrVector td;
-    td.emplace_back(std::make_unique<IntDistribution>(dd, 0.5, 250, minVal, maxVal));
-    td.emplace_back(std::make_unique<StrDistribution>(dd, 0.5, 250, minLen, maxLen));
-
-    std::mt19937_64 gen(0);
-    DatasetDescriptorNew desc{std::move(td), gen};
-
-    std::vector<SBEValue> dataset;
-    dataset = desc.genRandomDataset(dataSize);
-
-    const ScalarHistogram& hist = makeHistogram(dataset, nBuckets);
-    */
-
-    std::vector<BucketData> data{
-        {2, 5, 0, 0},       {57, 4, 21, 12},     {159, 4, 59, 24},    {172, 5, 0, 0},
-        {184, 4, 2, 2},     {344, 4, 73, 32},    {363, 4, 1, 1},      {420, 3, 16, 10},
-        {516, 2, 49, 23},   {758, 4, 113, 54},   {931, 5, 104, 41},   {998, 4, 29, 12},
-        {"3vL", 6, 30, 11}, {"9WUk", 1, 59, 24}, {"HraK", 4, 56, 26}, {"Zujbu", 1, 130, 64},
-        {"kEr", 5, 80, 40}, {"rupc", 6, 44, 21}, {"up1O", 5, 16, 7},  {"ztf", 5, 37, 17}};
-
-    const ScalarHistogram hist = createHistogram(data);
-    const ArrayHistogram arrHist(
-        hist, TypeCounts{{value::TypeTags::NumberInt64, 515}, {value::TypeTags::StringSmall, 485}});
-
-    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
-    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
-    const auto [tagAbc, valAbc] = value::makeNewString("abc"_sd);
-    value::ValueGuard vg(tagAbc, valAbc);
-    auto [tagObj, valObj] = value::makeNewObject();
-    value::ValueGuard vgObj(tagObj, valObj);
-
-    // Predicates over bucket bound.
-    // Actual cardinality {$eq: 804} = 2.
-    double expectedCard = estimateIntValCard(hist, 804, EstimationType::kEqual);
-    ASSERT_APPROX_EQUAL(2.5, expectedCard, kErrorBound);
-
-    // Actual cardinality {$lt: 100} = 40.
-    expectedCard = estimateIntValCard(hist, 100, EstimationType::kLess);
-    ASSERT_APPROX_EQUAL(52.4, expectedCard, kErrorBound);
-
-    // Range query crossing the type brackets.
-    // Actual cardinality {$gt: 100} = 475.
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     value::TypeTags::NumberInt64,
-                                     value::bitcastFrom<int64_t>(100),
-                                     false /* highInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(460.1, expectedCard, kErrorBound);
-
-    // Actual cardinality {$lt: 'abc'} = 291.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* highInclusive */,
-                                     tagAbc,
-                                     valAbc,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(319.9, expectedCard, kErrorBound);
-
-    // Actual cardinality {$gte: 'abc'} = 194.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagAbc,
-                                     valAbc,
-                                     false /* highInclusive */,
-                                     tagObj,
-                                     valObj,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(167.0, expectedCard, kErrorBound);
-
-    // Queries over the low string bound.
-    // Actual cardinality {$eq: ''} = 0.
-    expectedCard = estimateCardEq(arrHist, tagLowStr, valLowStr, true);
-    ASSERT_APPROX_EQUAL(2.727, expectedCard, 0.001);
-
-    // Actual cardinality {$gt: ''} = 485.
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     false /* highInclusive */,
-                                     tagObj,
-                                     valObj,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(485, expectedCard, 0.001);
-}
-
-TEST(EstimatorTest, IntStrArrayEstimate) {
-    /* The code in this comment generates a dataset of 1000 integers, strings and arrays of integers
-       and strings and creates the histogram used in this test. To recreate the data set and the
-       histogram, place this code in a unit test which uses the utilities from rand_utils_new.cpp.
-
-       constexpr int minLen = 2, maxLen = 5;
-       constexpr int minVal = 0, maxVal = 1000;
-       constexpr size_t dataSize = 1000;
-       constexpr size_t nBuckets = std::min(20UL, dataSize);
-
-       MixedDistributionDescriptor dd{{DistrType::kUniform, 1.0}};
-       TypeDistrVector td1;
-       td1.emplace_back(std::make_unique<IntDistribution>(dd, 0.7, 200, minVal, maxVal));
-       td1.emplace_back(std::make_unique<StrDistribution>(dd, 0.3, 100, minLen, maxLen));
-
-       std::mt19937_64 gen(5);
-       auto desc1 = std::make_unique<DatasetDescriptorNew>(std::move(td1), gen);
-
-       TypeDistrVector td2;
-       td2.emplace_back(std::make_unique<IntDistribution>(dd, 0.4, 200, minVal, maxVal));
-       td2.emplace_back(std::make_unique<StrDistribution>(dd, 0.3, 200, minLen, maxLen));
-       td2.emplace_back(std::make_unique<ArrDistribution>(dd, 0.3, 200, 2, 6, std::move(desc1),
-       0.0));
-
-       DatasetDescriptorNew desc{std::move(td2), gen};
-       std::vector<SBEValue> dataset;
-       dataset = desc.genRandomDataset(dataSize);
-
-       const ScalarHistogram& hist = makeHistogram(dataset, nBuckets);
-        */
-
-    std::vector<BucketData> scalarData{
-        {10, 1, 0, 0},    {11, 4, 0, 0},       {44, 2, 5, 2},         {213, 3, 40, 20},
-        {256, 5, 13, 6},  {270, 3, 9, 2},      {407, 3, 56, 28},      {510, 3, 32, 16},
-        {524, 3, 0, 0},   {561, 5, 16, 8},     {583, 3, 4, 3},        {599, 3, 1, 1},
-        {663, 5, 19, 9},  {681, 5, 6, 2},      {873, 5, 75, 37},      {909, 4, 16, 7},
-        {994, 3, 36, 14}, {"9TcY", 4, 44, 23}, {"Zow00", 5, 134, 67}, {"zsS", 2, 130, 66},
-    };
-
-    const ScalarHistogram scalarHist = createHistogram(scalarData);
-
-    std::vector<BucketData> minData{
-        {12, 5, 0, 0},      {17, 8, 0, 0},        {28, 7, 7, 1},        {55, 5, 22, 5},
-        {110, 5, 45, 11},   {225, 4, 43, 15},     {563, 3, 98, 36},     {643, 4, 3, 2},
-        {701, 4, 9, 5},     {845, 1, 6, 4},       {921, 2, 0, 0},       {980, 1, 0, 0},
-        {"1l", 9, 16, 4},   {"8YN", 4, 19, 5},    {"PE2OO", 2, 41, 15}, {"WdJ", 8, 25, 7},
-        {"dKb7", 9, 17, 6}, {"msdP", 12, 25, 10}, {"t7wmp", 5, 15, 6},  {"yx", 2, 13, 4},
-    };
-
-    const ScalarHistogram minHist = createHistogram(minData);
-
-    std::vector<BucketData> maxData{
-        {26, 2, 0, 0},    {79, 3, 0, 0},      {147, 1, 0, 0},      {207, 2, 0, 0},
-        {362, 6, 7, 5},   {563, 3, 47, 19},   {603, 9, 2, 1},      {676, 6, 21, 10},
-        {702, 6, 9, 4},   {712, 6, 0, 0},     {759, 8, 4, 1},      {774, 6, 3, 1},
-        {831, 9, 28, 9},  {948, 7, 51, 15},   {981, 3, 33, 8},     {"9Iey", 4, 20, 8},
-        {"Ji", 3, 21, 8}, {"WdJ", 9, 26, 10}, {"msdP", 9, 59, 20}, {"zbI", 3, 68, 16},
-    };
-
-    const ScalarHistogram maxHist = createHistogram(maxData);
-
-    std::vector<BucketData> uniqueData{
-        {12, 5, 0, 0},      {28, 8, 15, 2},      {55, 8, 23, 5},       {110, 5, 59, 12},
-        {225, 8, 79, 18},   {362, 8, 88, 20},    {507, 10, 165, 36},   {572, 5, 25, 6},
-        {603, 12, 25, 3},   {712, 6, 106, 19},   {759, 11, 17, 4},     {774, 6, 3, 1},
-        {831, 14, 50, 13},  {981, 3, 105, 25},   {"547DP", 4, 43, 9},  {"9Iey", 4, 8, 1},
-        {"WdJ", 9, 85, 26}, {"ZGYcw", 2, 14, 4}, {"msdP", 14, 80, 21}, {"zbI", 3, 74, 17},
-    };
-
-    const ScalarHistogram uniqueHist = createHistogram(uniqueData);
-
-    TypeCounts typeCounts{{value::TypeTags::NumberInt64, 388},
-                          {value::TypeTags::StringSmall, 319},
-                          {value::TypeTags::Array, 293}};
-    TypeCounts arrayTypeCounts{{value::TypeTags::NumberInt64, 874},
-                               {value::TypeTags::StringSmall, 340}};
-    const ArrayHistogram arrHist(scalarHist,
-                                 typeCounts,
-                                 uniqueHist,
-                                 minHist,
-                                 maxHist,
-                                 arrayTypeCounts,
-                                 0 /* No empty arrays */);
-
-    const auto [tagLowDbl, valLowDbl] =
-        std::make_pair(value::TypeTags::NumberDouble,
-                       value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN()));
-    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
-    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
-
-    // Actual cardinality {$lt: 100} = 115.
-    double expectedCard = estimateCardRange(arrHist,
-                                            false /* lowInclusive */,
-                                            tagLowDbl,
-                                            valLowDbl,
-                                            false /* highInclusive */,
-                                            value::TypeTags::NumberInt64,
-                                            value::bitcastFrom<int64_t>(100),
-                                            true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(109.9, expectedCard, kErrorBound);
-
-    // Actual cardinality {$gt: 502} = 434.
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     value::TypeTags::NumberInt64,
-                                     value::bitcastFrom<int64_t>(500),
-                                     false /* highInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(443.8, expectedCard, kErrorBound);
-
-    // Actual cardinality {$gte: 502} = 437.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     value::TypeTags::NumberInt64,
-                                     value::bitcastFrom<int64_t>(500),
-                                     false /* highInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(448.3, expectedCard, kErrorBound);
-
-    // Actual cardinality {$eq: ''} = 0.
-    expectedCard = estimateCardEq(arrHist, tagLowStr, valLowStr, true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(6.69, expectedCard, 0.001);
-
-    // Actual cardinality {$eq: 'DD2'} = 2.
-    auto [tagStr, valStr] = value::makeNewString("DD2"_sd);
-    value::ValueGuard vg(tagStr, valStr);
-    expectedCard = estimateCardEq(arrHist, tagStr, valStr, true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(5.27, expectedCard, kErrorBound);
-
-    // Actual cardinality {$lte: 'DD2'} = 120.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* highInclusive */,
-                                     tagStr,
-                                     valStr,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(160.6, expectedCard, kErrorBound);
-
-    // Actual cardinality {$gt: 'DD2'} = 450.
-    auto [tagObj, valObj] = value::makeNewObject();
-    value::ValueGuard vgObj(tagObj, valObj);
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     tagStr,
-                                     valStr,
-                                     false /* highInclusive */,
-                                     tagObj,
-                                     valObj,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(411.2, expectedCard, kErrorBound);
-
-    // Queries with $elemMatch.
-    const auto [tagInt, valInt] =
-        std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(603));
-
-    // Actual cardinality {$match: {a: {$elemMatch: {$eq: 603}}}} = 12.
-    expectedCard = estimateCardEq(arrHist, tagInt, valInt, false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(12.0, expectedCard, kErrorBound);
-
-    // Actual cardinality {$match: {a: {$elemMatch: {$lte: 603}}}} = 252.
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     tagLowDbl,
-                                     valLowDbl,
-                                     true /* highInclusive */,
-                                     tagInt,
-                                     valInt,
-                                     false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(293.0, expectedCard, kErrorBound);
-
-    // Actual cardinality {$match: {a: {$elemMatch: {$gte: 603}}}} = 200.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagInt,
-                                     valInt,
-                                     false /* highInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(250.8, expectedCard, kErrorBound);
-
-    // Actual cardinality {$match: {a: {$elemMatch: {$eq: 'cu'}}}} = 7.
-    std::tie(tagStr, valStr) = value::makeNewString("cu"_sd);
-    expectedCard = estimateCardEq(arrHist, tagStr, valStr, false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(3.8, expectedCard, kErrorBound);
-
-    // Actual cardinality {$match: {a: {$elemMatch: {$gte: 'cu'}}}} = 125.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagStr,
-                                     valStr,
-                                     false /* highInclusive */,
-                                     tagObj,
-                                     valObj,
-                                     false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(109.7, expectedCard, kErrorBound);
-
-    // Actual cardinality {$match: {a: {$elemMatch: {$lte: 'cu'}}}} = 141.
-    expectedCard = estimateCardRange(arrHist,
-                                     true /* lowInclusive */,
-                                     tagLowStr,
-                                     valLowStr,
-                                     true /* highInclusive */,
-                                     tagStr,
-                                     valStr,
-                                     false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(156.1, expectedCard, kErrorBound);
-}
-}  // namespace
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_heuristic.cpp b/src/mongo/db/query/ce/ce_heuristic.cpp
deleted file mode 100644
index dcdc6e698e4..00000000000
--- a/src/mongo/db/query/ce/ce_heuristic.cpp
+++ /dev/null
@@ -1,611 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/ce_heuristic.h"
-
-#include "mongo/db/query/optimizer/cascades/memo.h"
-#include "mongo/db/query/optimizer/utils/ce_math.h"
-#include "mongo/util/assert_util.h"
-
-namespace mongo::ce {
-namespace {
-namespace cascades = optimizer::cascades;
-namespace properties = optimizer::properties;
-
-using ABT = optimizer::ABT;
-using CEType = optimizer::CEType;
-using LogicalProps = properties::LogicalProps;
-using Memo = cascades::Memo;
-using Metadata = optimizer::Metadata;
-
-// Invalid estimate - an arbitrary negative value used for initialization.
-constexpr SelectivityType kInvalidSel = -1.0;
-
-constexpr SelectivityType kDefaultFilterSel = 0.1;
-constexpr SelectivityType kDefaultExistsSel = 0.70;
-
-// The selectivities used in the piece-wise function for open-range intervals.
-// Note that we assume a smaller input cardinality will result in a less selective range.
-constexpr SelectivityType kSmallCardOpenRangeSel = 0.70;
-constexpr SelectivityType kMediumCardOpenRangeSel = 0.45;
-constexpr SelectivityType kLargeCardOpenRangeSel = 0.33;
-
-// The selectivities used in the piece-wise function for closed-range intervals.
-// Note that we assume a smaller input cardinality will result in a less selective range.
-constexpr SelectivityType kSmallCardClosedRangeSel = 0.50;
-constexpr SelectivityType kMediumCardClosedRangeSel = 0.33;
-constexpr SelectivityType kLargeCardClosedRangeSel = 0.20;
-
-// Global and Local selectivity should multiply to the Complete selectivity.
-constexpr SelectivityType kDefaultCompleteGroupSel = 0.01;
-constexpr SelectivityType kDefaultLocalGroupSel = 0.02;
-constexpr SelectivityType kDefaultGlobalGroupSel = 0.5;
-
-// The following constants are the steps used in the piece-wise functions that select selectivies
-// based on input cardinality.
-constexpr CEType kSmallLimit = 20.0;
-constexpr CEType kMediumLimit = 100.0;
-
-// Assumed average number of elements in an array.
-constexpr CEType kDefaultAverageArraySize = 10.0;
-
-/**
- * Default selectivity of equalities. To avoid super small selectivities for small
- * cardinalities, that would result in 0 cardinality for many small inputs, the
- * estimate is scaled as inputCard grows. The bigger inputCard, the smaller the
- * selectivity.
- */
-SelectivityType equalitySel(const CEType inputCard) {
-    uassert(6716604, "Zero cardinality must be handled by the caller.", inputCard > 0.0);
-    if (inputCard <= 1.0) {
-        // If the input has < 1 values, it cannot be reduced any further by a condition.
-        return 1.0;
-    }
-    return std::sqrt(inputCard) / inputCard;
-}
-
-/**
- * Default selectivity of intervals with bounds on both ends. These intervals are
- * considered less selective than equalities.
- * Examples: (a > 'abc' AND a < 'hta'), (0 < b <= 13)
- */
-SelectivityType closedRangeSel(const CEType inputCard) {
-    SelectivityType sel = kInvalidSel;
-    if (inputCard < kSmallLimit) {
-        sel = kSmallCardClosedRangeSel;
-    } else if (inputCard < kMediumLimit) {
-        sel = kMediumCardClosedRangeSel;
-    } else {
-        sel = kLargeCardClosedRangeSel;
-    }
-    return sel;
-}
-
-/**
- * Default selectivity of intervals open on one end. These intervals are
- * considered less selective than those with both ends specified by the user query.
- * Examples: (a > 'xyz'), (b <= 13)
- */
-SelectivityType openRangeSel(const CEType inputCard) {
-    SelectivityType sel = kInvalidSel;
-    if (inputCard < kSmallLimit) {
-        sel = kSmallCardOpenRangeSel;
-    } else if (inputCard < kMediumLimit) {
-        sel = kMediumCardOpenRangeSel;
-    } else {
-        sel = kLargeCardOpenRangeSel;
-    }
-    return sel;
-}
-
-mongo::sbe::value::TypeTags constType(const Constant* constBoundPtr) {
-    if (constBoundPtr == nullptr) {
-        return mongo::sbe::value::TypeTags::Nothing;
-    }
-    const auto [tag, val] = constBoundPtr->get();
-    return tag;
-}
-
-mongo::sbe::value::TypeTags boundType(const BoundRequirement& bound) {
-    return constType(bound.getBound().cast<Constant>());
-}
-
-SelectivityType intervalSel(const IntervalRequirement& interval, const CEType inputCard) {
-    SelectivityType sel = kInvalidSel;
-    if (interval.isFullyOpen()) {
-        sel = 1.0;
-    } else if (interval.isEquality()) {
-        sel = equalitySel(inputCard);
-    } else if (interval.getHighBound().isPlusInf() || interval.getLowBound().isMinusInf() ||
-               boundType(interval.getLowBound()) != boundType(interval.getHighBound())) {
-        // The interval has an actual bound only on one of it ends if:
-        // - one of the bounds is infinite, or
-        // - both bounds are of a different type - this is the case when due to type bracketing
-        //   one of the bounds is the lowest/highest value of the previous/next type.
-        // TODO: Notice that sometimes type bracketing uses a min/max value from the same type,
-        // so sometimes we may not detect an open-ended interval.
-        sel = openRangeSel(inputCard);
-    } else {
-        sel = closedRangeSel(inputCard);
-    }
-    uassert(6716603, "Invalid selectivity.", validSelectivity(sel));
-    return sel;
-}
-
-SelectivityType negationSel(SelectivityType sel) {
-    return 1.0 - sel;
-}
-
-SelectivityType operationSel(const Operations op, const CEType inputCard) {
-    switch (op) {
-        case Operations::Eq:
-            return equalitySel(inputCard);
-        case Operations::Neq:
-            return negationSel(equalitySel(inputCard));
-        case Operations::EqMember:
-            // Reached when the query has $in. We don't handle it yet.
-            return kDefaultFilterSel;
-        case Operations::Gt:
-        case Operations::Gte:
-        case Operations::Lt:
-        case Operations::Lte:
-            return openRangeSel(inputCard);
-        default:
-            MONGO_UNREACHABLE;
-    }
-}
-
-SelectivityType intervalSel(const PathCompare& left,
-                            const PathCompare& right,
-                            const CEType inputCard) {
-    if (left.op() == Operations::EqMember || right.op() == Operations::EqMember) {
-        // Reached when the query has $in. We don't handle it yet.
-        return kDefaultFilterSel;
-    }
-
-    bool lowBoundUnknown = false;
-    bool highBoundUnknown = false;
-    boost::optional<mongo::sbe::value::TypeTags> lowBoundType;
-    boost::optional<mongo::sbe::value::TypeTags> highBoundType;
-
-    for (const auto& compare : {left, right}) {
-        switch (compare.op()) {
-            case Operations::Eq: {
-                // This branch is reached when we have a conjunction of equalities on the same path.
-                uassert(6777601,
-                        "Expected conjunction of equalities.",
-                        left.op() == Operations::Eq && right.op() == Operations::Eq);
-
-                const auto leftConst = left.getVal().cast<Constant>();
-                const auto rightConst = right.getVal().cast<Constant>();
-                if (leftConst && rightConst && !(*leftConst == *rightConst)) {
-                    // Equality comparison on different constants is a contradiction.
-                    return 0.0;
-                }
-                // We can't tell if the equalities result in a contradiction or not, so we use the
-                // default equality selectivity.
-                return equalitySel(inputCard);
-            }
-            case Operations::Gt:
-            case Operations::Gte:
-                lowBoundUnknown = lowBoundUnknown || compare.getVal().is<Variable>();
-                lowBoundType = constType(compare.getVal().cast<Constant>());
-                break;
-            case Operations::Lt:
-            case Operations::Lte:
-                highBoundUnknown = highBoundUnknown || compare.getVal().is<Variable>();
-                highBoundType = constType(compare.getVal().cast<Constant>());
-                break;
-            default:
-                MONGO_UNREACHABLE;
-        }
-    }
-
-    if (lowBoundType && highBoundType &&
-        (lowBoundType == highBoundType || lowBoundUnknown || highBoundUnknown)) {
-        // Interval is closed only if:
-        // - it has low and high bounds
-        // - bounds are of the same type
-        //
-        // If bounds are of a different type, it implies that one bound is the
-        // lowest/highest value of the previous/next type and has been added for type bracketing
-        // purposes. We treat such bounds as infinity.
-        //
-        // If there are unknown boundaries (Variables), we assume that they are of the same type
-        // as the other bound.
-        //
-        // TODO: Notice that sometimes type bracketing uses a min/max value from the same type,
-        // so sometimes we may not detect an open-ended interval.
-        return closedRangeSel(inputCard);
-    }
-
-    if (lowBoundType || highBoundType) {
-        return openRangeSel(inputCard);
-    }
-
-    MONGO_UNREACHABLE;
-}
-
-/**
- * Heuristic selectivity estimation for EvalFilter nodes. Used for estimating cardinalities of
- * FilterNodes. The estimate is computed by traversing the tree bottom-up, applying default
- * selectivity functions to atomic predicates (comparisons), and combining child selectivities of
- * disjunctions and conjunctions via simple addition and multiplication.
- */
-class EvalFilterSelectivityTransport {
-public:
-    /**
-     * Helper class for holding values passed from child to parent nodes when traversing the tree.
-     */
-    struct EvalFilterSelectivityResult {
-        // Each item represents a field in a dotted path.
-        // Collected while traversing a path expression.
-        // Used for deciding whether a conjunction of comparisons is an interval or not.
-        FieldPathType path;
-        // When handling a PathComposeM, we need to access its child comparisons which might be
-        // hidden under path expressions.
-        const PathCompare* compare;
-        // The selectivity estimate.
-        SelectivityType selectivity;
-    };
-
-    EvalFilterSelectivityResult transport(const EvalFilter& /*node*/,
-                                          CEType /*inputCard*/,
-                                          EvalFilterSelectivityResult pathResult,
-                                          EvalFilterSelectivityResult /*inputResult*/) {
-        return pathResult;
-    }
-
-    EvalFilterSelectivityResult transport(const PathGet& node,
-                                          CEType /*inputCard*/,
-                                          EvalFilterSelectivityResult childResult) {
-        childResult.path.push_back(node.name());
-        return childResult;
-    }
-
-    EvalFilterSelectivityResult transport(const PathTraverse& node,
-                                          CEType /*inputCard*/,
-                                          EvalFilterSelectivityResult childResult) {
-        return childResult;
-    }
-
-    EvalFilterSelectivityResult transport(const PathCompare& node,
-                                          CEType inputCard,
-                                          EvalFilterSelectivityResult /*childResult*/) {
-        // Note that the result will be ignored if this operation is part of an interval.
-        const SelectivityType sel = operationSel(node.op(), inputCard);
-        return {{}, &node, sel};
-    }
-
-    EvalFilterSelectivityResult transport(const PathComposeM& node,
-                                          CEType inputCard,
-                                          EvalFilterSelectivityResult leftChildResult,
-                                          EvalFilterSelectivityResult rightChildResult) {
-        const bool isInterval = leftChildResult.compare && rightChildResult.compare &&
-            leftChildResult.path == rightChildResult.path;
-
-        const SelectivityType sel = isInterval
-            ? intervalSel(*leftChildResult.compare, *rightChildResult.compare, inputCard)
-            : conjunctionSel(leftChildResult.selectivity, rightChildResult.selectivity);
-
-        return {{}, nullptr, sel};
-    }
-
-    EvalFilterSelectivityResult transport(const PathComposeA& node,
-                                          CEType /*inputCard*/,
-                                          EvalFilterSelectivityResult leftChildResult,
-                                          EvalFilterSelectivityResult rightChildResult) {
-        const SelectivityType sel =
-            disjunctionSel(leftChildResult.selectivity, rightChildResult.selectivity);
-
-        return {{}, nullptr, sel};
-    }
-
-    EvalFilterSelectivityResult transport(const UnaryOp& node,
-                                          CEType /*inputCard*/,
-                                          EvalFilterSelectivityResult childResult) {
-        switch (node.op()) {
-            case Operations::Not:
-                childResult.selectivity = negationSel(childResult.selectivity);
-                return childResult;
-            case Operations::Neg:
-                // If we see negation (-) in a UnaryOp, we ignore it for CE purposes.
-                return childResult;
-            default:
-                MONGO_UNREACHABLE;
-        }
-    }
-
-    EvalFilterSelectivityResult transport(const PathConstant& /*node*/,
-                                          CEType /*inputCard*/,
-                                          EvalFilterSelectivityResult childResult) {
-        return childResult;
-    }
-
-    EvalFilterSelectivityResult transport(const PathDefault& node,
-                                          CEType inputCard,
-                                          EvalFilterSelectivityResult childResult) {
-        if (node.getDefault() == Constant::boolean(false)) {
-            // We have a {$exists: true} predicate on this path if we have a Constant[false] child
-            // here. Note that ${exists: false} is handled by the presence of a negation expression
-            // higher in the ABT.
-            childResult.selectivity = kDefaultExistsSel;
-        }
-        return childResult;
-    }
-
-    template <typename T, typename... Ts>
-    EvalFilterSelectivityResult transport(const T& /*node*/, Ts&&...) {
-        return {{}, nullptr, kDefaultFilterSel};
-    }
-
-    static SelectivityType derive(const CEType inputCard, const ABT::reference_type ref) {
-        EvalFilterSelectivityTransport instance;
-        const auto result = algebra::transport<false>(ref, instance, inputCard);
-        return result.selectivity;
-    }
-
-private:
-    SelectivityType negationSel(const SelectivityType in) {
-        return 1.0 - in;
-    }
-
-    SelectivityType conjunctionSel(const SelectivityType left, const SelectivityType right) {
-        return left * right;
-    }
-
-    SelectivityType disjunctionSel(const SelectivityType left, const SelectivityType right) {
-        // We sum the selectivities and subtract the overlapping part so that it's only counted
-        // once.
-        return left + right - left * right;
-    }
-};
-
-class CEHeuristicTransport {
-public:
-    CEType transport(const ScanNode& node, CEType /*bindResult*/) {
-        // Default cardinality estimate.
-        const CEType metadataCE = _metadata._scanDefs.at(node.getScanDefName()).getCE();
-        return (metadataCE < 0.0) ? kDefaultCard : metadataCE;
-    }
-
-    CEType transport(const ValueScanNode& node, CEType /*bindResult*/) {
-        return node.getArraySize();
-    }
-
-    CEType transport(const MemoLogicalDelegatorNode& node) {
-        return properties::getPropertyConst<properties::CardinalityEstimate>(
-                   _memo.getLogicalProps(node.getGroupId()))
-            .getEstimate();
-    }
-
-    CEType transport(const FilterNode& node, CEType childResult, CEType /*exprResult*/) {
-        if (childResult == 0.0) {
-            // Early out and return 0 since we don't expect to get more results.
-            return 0.0;
-        }
-        if (node.getFilter() == Constant::boolean(true)) {
-            // Trivially true filter.
-            return childResult;
-        }
-        if (node.getFilter() == Constant::boolean(false)) {
-            // Trivially false filter.
-            return 0.0;
-        }
-
-        const SelectivityType sel =
-            EvalFilterSelectivityTransport::derive(childResult, node.getFilter().ref());
-
-        return std::max(sel * childResult, kMinCard);
-    }
-
-    CEType transport(const EvaluationNode& node, CEType childResult, CEType /*exprResult*/) {
-        // Evaluations do not change cardinality.
-        return childResult;
-    }
-
-    CEType transport(const SargableNode& node,
-                     CEType childResult,
-                     CEType /*bindsResult*/,
-                     CEType /*refsResult*/) {
-        // Early out and return 0 since we don't expect to get more results.
-        if (childResult == 0.0) {
-            return 0.0;
-        }
-
-        SelectivityType topLevelSel = 1.0;
-        std::vector<SelectivityType> topLevelSelectivities;
-        for (const auto& [key, req] : node.getReqMap()) {
-            if (req.getIsPerfOnly()) {
-                // Ignore perf-only requirements.
-                continue;
-            }
-
-            SelectivityType disjSel = 1.0;
-            std::vector<SelectivityType> disjSelectivities;
-            // Intervals are in DNF.
-            const auto intervalDNF = req.getIntervals();
-            const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes();
-            for (const auto& disjunct : disjuncts) {
-                const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes();
-                SelectivityType conjSel = 1.0;
-                std::vector<SelectivityType> conjSelectivities;
-                for (const auto& conjunct : conjuncts) {
-                    const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr();
-                    const SelectivityType sel = intervalSel(interval, childResult);
-                    conjSelectivities.push_back(sel);
-                }
-                conjSel = ce::conjExponentialBackoff(std::move(conjSelectivities));
-                disjSelectivities.push_back(conjSel);
-            }
-            disjSel = ce::disjExponentialBackoff(std::move(disjSelectivities));
-            topLevelSelectivities.push_back(disjSel);
-        }
-
-        if (topLevelSelectivities.empty()) {
-            return 1.0;
-        }
-        // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
-        topLevelSel = ce::conjExponentialBackoff(std::move(topLevelSelectivities));
-        CEType card = std::max(topLevelSel * childResult, kMinCard);
-        uassert(6716602, "Invalid cardinality.", mongo::ce::validCardinality(card));
-        return card;
-    }
-
-    CEType transport(const RIDIntersectNode& node,
-                     CEType /*leftChildResult*/,
-                     CEType /*rightChildResult*/) {
-        // CE for the group should already be derived via the underlying Filter or Evaluation
-        // logical nodes.
-        uasserted(6624038, "Should not be necessary to derive CE for RIDIntersectNode");
-    }
-
-    CEType transport(const RIDUnionNode& node,
-                     CEType /*leftChildResult*/,
-                     CEType /*rightChildResult*/) {
-        // CE for the group should already be derived via the underlying Filter or Evaluation
-        // logical nodes.
-        uasserted(7016301, "Should not be necessary to derive CE for RIDUnionNode");
-    }
-
-    CEType transport(const BinaryJoinNode& node,
-                     CEType leftChildResult,
-                     CEType rightChildResult,
-                     CEType /*exprResult*/) {
-        const auto& filter = node.getFilter();
-
-        SelectivityType selectivity = kDefaultFilterSel;
-        if (filter == Constant::boolean(false)) {
-            selectivity = 0.0;
-        } else if (filter == Constant::boolean(true)) {
-            selectivity = 1.0;
-        }
-        return leftChildResult * rightChildResult * selectivity;
-    }
-
-    CEType transport(const UnionNode& node,
-                     std::vector<CEType> childResults,
-                     CEType /*bindResult*/,
-                     CEType /*refsResult*/) {
-        // Combine the CE of each child.
-        CEType result = 0;
-        for (auto&& child : childResults) {
-            result += child;
-        }
-        return result;
-    }
-
-    CEType transport(const GroupByNode& node,
-                     CEType childResult,
-                     CEType /*bindAggResult*/,
-                     CEType /*refsAggResult*/,
-                     CEType /*bindGbResult*/,
-                     CEType /*refsGbResult*/) {
-        // TODO: estimate number of groups.
-        switch (node.getType()) {
-            case GroupNodeType::Complete:
-                return kDefaultCompleteGroupSel * childResult;
-
-            // Global and Local selectivity should multiply to Complete selectivity.
-            case GroupNodeType::Global:
-                return kDefaultGlobalGroupSel * childResult;
-            case GroupNodeType::Local:
-                return kDefaultLocalGroupSel * childResult;
-
-            default:
-                MONGO_UNREACHABLE;
-        }
-    }
-
-    CEType transport(const UnwindNode& node,
-                     CEType childResult,
-                     CEType /*bindResult*/,
-                     CEType /*refsResult*/) {
-        return kDefaultAverageArraySize * childResult;
-    }
-
-    CEType transport(const CollationNode& node, CEType childResult, CEType /*refsResult*/) {
-        // Collations do not change cardinality.
-        return childResult;
-    }
-
-    CEType transport(const LimitSkipNode& node, CEType childResult) {
-        const auto limit = node.getProperty().getLimit();
-        const auto skip = node.getProperty().getSkip();
-        const auto cardAfterSkip = std::max(childResult - skip, 0.0);
-        if (limit < cardAfterSkip) {
-            return limit;
-        }
-        return cardAfterSkip;
-    }
-
-    CEType transport(const ExchangeNode& node, CEType childResult, CEType /*refsResult*/) {
-        // Exchanges do not change cardinality.
-        return childResult;
-    }
-
-    CEType transport(const RootNode& node, CEType childResult, CEType /*refsResult*/) {
-        // Root node does not change cardinality.
-        return childResult;
-    }
-
-    /**
-     * Other ABT types.
-     */
-    template <typename T, typename... Ts>
-    CEType transport(const T& /*node*/, Ts&&...) {
-        static_assert(!canBeLogicalNode<T>(), "Logical node must implement its CE derivation.");
-        return 0.0;
-    }
-
-    static CEType derive(const Metadata& metadata,
-                         const Memo& memo,
-                         const ABT::reference_type logicalNodeRef) {
-        CEHeuristicTransport instance(metadata, memo);
-        return algebra::transport<false>(logicalNodeRef, instance);
-    }
-
-private:
-    CEHeuristicTransport(const Metadata& metadata, const Memo& memo)
-        : _metadata(metadata), _memo(memo) {}
-
-    // We don't own this.
-    const Metadata& _metadata;
-    const Memo& _memo;
-};
-}  // namespace
-
-CEType HeuristicCE::deriveCE(const Metadata& metadata,
-                             const Memo& memo,
-                             const LogicalProps& /*logicalProps*/,
-                             const ABT::reference_type logicalNodeRef) const {
-    CEType card = CEHeuristicTransport::derive(metadata, memo, logicalNodeRef);
-    return card;
-}
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_heuristic.h b/src/mongo/db/query/ce/ce_heuristic.h
deleted file mode 100644
index fd6e0672c54..00000000000
--- a/src/mongo/db/query/ce/ce_heuristic.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/query/optimizer/cascades/interfaces.h"
-
-namespace mongo::ce {
-
-/**
- * Default cardinality estimation in the absence of statistics.
- * Relies purely on heuristics.
- * We currently do not use logical properties for heuristic ce.
- */
-class HeuristicCE : public optimizer::cascades::CEInterface {
-public:
-    optimizer::CEType deriveCE(const optimizer::Metadata& metadata,
-                               const optimizer::cascades::Memo& memo,
-                               const optimizer::properties::LogicalProps& /*logicalProps*/,
-                               optimizer::ABT::reference_type logicalNodeRef) const override final;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_heuristic_test.cpp b/src/mongo/db/query/ce/ce_heuristic_test.cpp
deleted file mode 100644
index c59361fb03c..00000000000
--- a/src/mongo/db/query/ce/ce_heuristic_test.cpp
+++ /dev/null
@@ -1,1009 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include <string>
-
-#include "mongo/db/query/ce/ce_heuristic.h"
-#include "mongo/db/query/ce/ce_test_utils.h"
-#include "mongo/db/query/optimizer/cascades/logical_props_derivation.h"
-#include "mongo/db/query/optimizer/cascades/memo.h"
-#include "mongo/db/query/optimizer/defs.h"
-#include "mongo/db/query/optimizer/explain.h"
-#include "mongo/db/query/optimizer/metadata.h"
-#include "mongo/db/query/optimizer/opt_phase_manager.h"
-#include "mongo/db/query/optimizer/props.h"
-#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
-#include "mongo/db/query/optimizer/utils/utils.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo::ce {
-namespace {
-
-using namespace optimizer;
-using namespace optimizer::cascades;
-
-constexpr double kCollCard = 10000.0;
-const std::string collName = "test";
-
-class HeuristicCETester : public CETester {
-public:
-    HeuristicCETester(
-        std::string collName,
-        const optimizer::OptPhaseManager::PhaseSet& optPhases = kDefaultCETestPhaseSet)
-        : CETester(collName, kCollCard, optPhases) {}
-
-protected:
-    std::unique_ptr<CEInterface> getCETransport() const override {
-        return std::make_unique<HeuristicCE>();
-    }
-};
-
-TEST(CEHeuristicTest, CEWithoutOptimizationGtLtNum) {
-    std::string query = "{a0 : {$gt : 14, $lt : 21}}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE(ht, query, 1089.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationEqNum) {
-    std::string query = "{a: 123}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationEqStr) {
-    std::string query = "{a: 'foo'}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationGtNum) {
-    std::string query = "{a: {$gt: 44}}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationGtStr) {
-    std::string query = "{a: {$gt: 'foo'}}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationLtNum) {
-    std::string query = "{a: {$lt: 44}}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationDNF1pathSimple) {
-    std::string query =
-        "{$or: ["
-        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
-        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 44}}]}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 6.6591, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 36.0354, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 205.941, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj1) {
-    std::string query =
-        "{$or: ["
-        "{a: {$lt: 3}},"
-        "{$and: [{b: {$gt:5}}, {c: {$lt: 10}}]}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 7.623, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 55.5761, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 402.963, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj2) {
-    std::string query =
-        "{$and: ["
-        "{a: {$lt: 3}},"
-        "{$or: [{b: {$gt:5}}, {b: {$lt: 10}}]}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 5.733, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 31.0736, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 181.863, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj3) {
-    std::string query =
-        "{$and: ["
-        "{$and: [{a: {$gt: 5}}, {a: {$lt: 10}}]},"
-        "{$and: ["
-        "   {b: {$gt: 15}},"
-        "   {c: {$lt: 110}},"
-        "   {$or: [{a1: 1}, {b1: 2}, {c1: 3}]}"
-        "]}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.52063, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 4.15975, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 9.11877, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj4) {
-    std::string query =
-        "{$or: ["
-        "{$or: [{a: {$gt: 5}}, {a: {$lt: 10}}]},"
-        "{$or: ["
-        "   {b: {$gt: 15}},"
-        "   {c: {$lt: 110}},"
-        "   {$and: [{a1: 1}, {b1: 2}, {c1: 3}]}"
-        "]}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 8.9298, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 89.9501, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 798.495, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationTraverseSelectivityDoesNotAccumulate) {
-    std::string query =
-        "{$or: ["
-        "{a0: 1},"
-        "{a0: {$lt: -4}},"
-        "{b0: {$gt: 10}}"
-        "]}";
-    std::string queryWithLongPaths =
-        "{$or: ["
-        "{'a0.a1.a2.a3.a4.a5.a6.a7.a8.a9': 1},"
-        "{'a0.a1.a2.a3.a4.a5.a6.a7.a8.a9': {$lt: -4}},"
-        "{'b0.b1.b3': {$gt: 10}}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    auto ce1 = ht.getMatchCE(query);
-    auto ce2 = ht.getMatchCE(queryWithLongPaths);
-    ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnSameValue) {
-    std::string query =
-        "{$or: ["
-        "{a: 1},"
-        "{$and: [{a: 2}, {a: 2}]}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 5.0, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 18.8997, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 62.2456, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnDifferentValues) {
-    std::string query =
-        "{$or: ["
-        "{a: 1},"
-        "{$and: [{a: 2}, {a: 3}]}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 3.0, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 9.94987, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 31.6228, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionWithIn) {
-    std::string query =
-        "{$or: ["
-        "{a: 1},"
-        "{$and: [{a: 2}, {a: {$in: [2, 3, 4]}}]}"
-        "]}";
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    // Estimation for $in is not implemented yet, so we assume it has the default filter selectivity
-    // of 0.1.
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 3.6, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 18.8549, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 128.46, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationOneLowBoundWithoutTraverse) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(make<PathGet>("a", make<PathCompare>(Operations::Gt, Constant::int64(42))),
-                         make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationOneHighBoundWithoutTraverse) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(make<PathGet>("a", make<PathCompare>(Operations::Lt, Constant::int64(42))),
-                         make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationTwoLowBoundsWithoutTraverse) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(make<PathGet>("a",
-                                       make<PathComposeM>(
-                                           make<PathCompare>(Operations::Gt, Constant::int64(5)),
-                                           make<PathCompare>(Operations::Gt, Constant::int64(10)))),
-                         make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationTwoHighBoundsWithoutTraverse) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(make<PathGet>("a",
-                                       make<PathComposeM>(
-                                           make<PathCompare>(Operations::Lt, Constant::int64(5)),
-                                           make<PathCompare>(Operations::Lt, Constant::int64(10)))),
-                         make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationClosedRangeWithoutTraverse) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(make<PathGet>("a",
-                                       make<PathComposeM>(
-                                           make<PathCompare>(Operations::Gt, Constant::int64(7)),
-                                           make<PathCompare>(Operations::Lt, Constant::int64(13)))),
-                         make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 1.5, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 3.5, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 5.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 20.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 2000.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithDifferentTypes) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(
-            make<PathGet>(
-                "a",
-                make<PathComposeM>(make<PathCompare>(Operations::Gt, Constant::int64(5)),
-                                   make<PathCompare>(Operations::Lt, Constant::str("foo")))),
-            make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationClosedRangeWithPathExpr) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(
-            make<PathComposeM>(
-                make<PathGet>(
-                    "a0",
-                    make<PathTraverse>(
-                        make<PathGet>("a1",
-                                      make<PathTraverse>(
-                                          make<PathCompare>(Operations::Gt, Constant::int64(5)),
-                                          PathTraverse::kSingleLevel)),
-                        PathTraverse::kSingleLevel)),
-                make<PathGet>(
-                    "a0",
-                    make<PathTraverse>(
-                        make<PathGet>("a1",
-                                      make<PathTraverse>(
-                                          make<PathCompare>(Operations::Lt, Constant::int64(10)),
-                                          PathTraverse::kSingleLevel)),
-                        PathTraverse::kSingleLevel))),
-            make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 1.5, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 3.5, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 5.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 20.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 2000.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationClosedRangeWith1Variable) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(
-            make<PathComposeM>(
-                make<PathGet>(
-                    "a0",
-                    make<PathTraverse>(
-                        make<PathGet>("a1",
-                                      make<PathTraverse>(
-                                          make<PathCompare>(Operations::Gt, Constant::int64(5)),
-                                          PathTraverse::kSingleLevel)),
-                        PathTraverse::kSingleLevel)),
-                make<PathGet>(
-                    "a0",
-                    make<PathTraverse>(
-                        make<PathGet>("a1",
-                                      make<PathTraverse>(
-                                          make<PathCompare>(Operations::Lt, make<Variable>("test")),
-                                          PathTraverse::kSingleLevel)),
-                        PathTraverse::kSingleLevel))),
-            make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 1.5, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 3.5, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 5.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 20.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 2000.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationOpenRangeWith1Variable) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(
-            make<PathComposeM>(
-                make<PathGet>(
-                    "a0",
-                    make<PathTraverse>(
-                        make<PathGet>("a1",
-                                      make<PathTraverse>(
-                                          make<PathCompare>(Operations::Lt, Constant::int64(5)),
-                                          PathTraverse::kSingleLevel)),
-                        PathTraverse::kSingleLevel)),
-                make<PathGet>(
-                    "a0",
-                    make<PathTraverse>(
-                        make<PathGet>("a1",
-                                      make<PathTraverse>(
-                                          make<PathCompare>(Operations::Lt, make<Variable>("test")),
-                                          PathTraverse::kSingleLevel)),
-                        PathTraverse::kSingleLevel))),
-            make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionOfBoundsWithDifferentPaths) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(
-            make<PathComposeM>(
-                make<PathGet>(
-                    "a0",
-                    make<PathTraverse>(
-                        make<PathGet>("a1",
-                                      make<PathTraverse>(
-                                          make<PathCompare>(Operations::Gt, Constant::int64(5)),
-                                          PathTraverse::kSingleLevel)),
-                        PathTraverse::kSingleLevel)),
-                make<PathGet>(
-                    "b0",
-                    make<PathTraverse>(
-                        make<PathGet>("b1",
-                                      make<PathTraverse>(
-                                          make<PathCompare>(Operations::Lt, Constant::int64(10)),
-                                          PathTraverse::kSingleLevel)),
-                        PathTraverse::kSingleLevel))),
-            make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 1.47, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 3.43, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 4.9, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 10.89, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 1089.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationDisjunctionOnSamePathWithoutTraverse) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(
-            make<PathComposeA>(
-                make<PathGet>(
-                    "a0",
-                    make<PathGet>("a1", make<PathCompare>(Operations::Gt, Constant::int64(5)))),
-                make<PathGet>(
-                    "a0",
-                    make<PathGet>("a1", make<PathCompare>(Operations::Eq, Constant::int64(100))))),
-            make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 2.61962, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 5.69373, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 7.94868, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 39.7, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 3367.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationDisjunctionOnDifferentPathsWithoutTraverse) {
-    using namespace properties;
-
-    ABT scanNode = make<ScanNode>("test", "test");
-
-    ABT filterNode = make<FilterNode>(
-        make<EvalFilter>(
-            make<PathComposeA>(
-                make<PathGet>(
-                    "a0",
-                    make<PathGet>("a1", make<PathCompare>(Operations::Gt, Constant::int64(5)))),
-                make<PathGet>(
-                    "b0",
-                    make<PathGet>("b1", make<PathCompare>(Operations::Eq, Constant::int64(100))))),
-            make<Variable>("test")),
-        std::move(scanNode));
-
-    ABT rootNode =
-        make<RootNode>(ProjectionRequirement{ProjectionNameVector{"test"}}, std::move(filterNode));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
-    ASSERT_CE_CARD(ht, rootNode, 2.61962, 3.0);
-    ASSERT_CE_CARD(ht, rootNode, 5.69373, 7.0);
-    ASSERT_CE_CARD(ht, rootNode, 7.94868, 10.0);
-    ASSERT_CE_CARD(ht, rootNode, 39.7, 100.0);
-    ASSERT_CE_CARD(ht, rootNode, 3367.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEWithoutOptimizationEquivalentConjunctions) {
-    using namespace properties;
-
-    ABT rootNode1 = make<RootNode>(
-        ProjectionRequirement{ProjectionNameVector{"test"}},
-        make<FilterNode>(
-            make<EvalFilter>(
-                make<PathComposeM>(
-                    make<PathGet>(
-                        "a0",
-                        make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(5)),
-                                           PathTraverse::kSingleLevel)),
-                    make<PathGet>(
-                        "b0",
-                        make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(10)),
-                                           PathTraverse::kSingleLevel))),
-                make<Variable>("test")),
-            make<ScanNode>("test", "test")));
-
-    ABT rootNode2 = make<RootNode>(
-        ProjectionRequirement{ProjectionNameVector{"test"}},
-        make<FilterNode>(
-            make<EvalFilter>(make<PathGet>("a0",
-                                           make<PathTraverse>(make<PathCompare>(Operations::Gt,
-                                                                                Constant::int64(5)),
-                                                              PathTraverse::kSingleLevel)),
-                             make<Variable>("test")),
-            make<FilterNode>(
-                make<EvalFilter>(
-                    make<PathGet>(
-                        "b0",
-                        make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(10)),
-                                           PathTraverse::kSingleLevel)),
-                    make<Variable>("test")),
-                make<ScanNode>("test", "test"))));
-
-    HeuristicCETester ht(collName, kNoOptPhaseSet);
-    ht.setCollCard(kCollCard);
-    auto ce1 = ht.getCE(rootNode1);
-    auto ce2 = ht.getCE(rootNode2);
-    ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Eq) {
-    std::string query = "{a : 123}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.1, 0.1);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Gt) {
-    std::string query = "{a: {$gt: 44}}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.01, 0.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.7, 1.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 330, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Gt_Lt) {
-    std::string query = "{a: {$gt: 44, $lt: 99}}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 0.585662, 1.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 5.27096, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 29.885, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 189.571, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_AND2Eq) {
-    std::string query = "{a : 13, b : 42}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.31607, 3.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.62658, 7.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.77828, 10.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 100.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_AND3Eq) {
-    std::string query = "{a : 13, b : 42, c : 69}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.1472, 3.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.27537, 7.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.33352, 10.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 1.77828, 100.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR1path) {
-    std::string query = "{$or: [{a0: {$gt: 44}}, {a0: {$lt: 9}}]}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 7.52115, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 58.6188, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 451.581, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR2paths) {
-    std::string query = "{$or: [{a0: {$gt:44}}, {b0: {$lt: 9}}]}";
-    HeuristicCETester ht(collName, kOnlySubPhaseSet);
-    // Disjunctions on different paths are not SARGable.
-    ASSERT_MATCH_CE_CARD(ht, query, 8.19, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 69.0525, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 551.1, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathSimple) {
-    std::string query =
-        "{$or: ["
-        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
-        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 44}}]}"
-        "]}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 6.42792, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 37.0586, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 225.232, 1000.0);
-}
-
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathComplex) {
-    HeuristicCETester ht(collName, kOnlySubPhaseSet);
-    // Each disjunct has different number of conjuncts,
-    // so that its selectivity is different. We need 5 disjuncts to test exponential backoff which
-    // cuts off at the first 4. The conjuncts are in selectivity order.
-    std::string query1 =
-        "{$or: ["
-        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
-        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
-        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}]},"
-        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
-        "81}}]},"
-        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
-        "81}}, {a0: {$lt: 77}}]}"
-        "]}";
-    auto ce1 = ht.getMatchCE(query1);
-    // The conjuncts are in inverse selectivity order.
-    std::string query2 =
-        "{$or: ["
-        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
-        "81}}, {a0: {$lt: 77}}]},"
-        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
-        "81}}]},"
-        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}]},"
-        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
-        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]}"
-        "]}";
-    auto ce2 = ht.getMatchCE(query2);
-    ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF2paths) {
-    std::string query =
-        "{$or: ["
-        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
-        "{$and: [{b0: {$gt:40}}, {b0: {$lt: 44}}]}"
-        "]}";
-    HeuristicCETester ht(collName, kOnlySubPhaseSet);
-    // Disjunctions on different paths are not SARGable.
-    ASSERT_MATCH_CE_CARD(ht, query, 6.6591, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 36.0354, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 205.941, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF1path) {
-    std::string query =
-        "{$and : ["
-        "{$or : [ {a0 : {$gt : 11}}, {a0 : {$lt : 44}} ]},"
-        "{$or : [ {a0 : {$gt : 77}}, {a0 : {$eq : 51}} ]}"
-        "]}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 6.21212, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 36.4418, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 228.935, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF2paths) {
-    std::string query =
-        "{$and : ["
-        "{$or : [ {a0 : {$gt : 11}}, {a0 : {$lt : 44}} ]},"
-        "{$or : [ {b0 : {$gt : 77}}, {b0 : {$eq : 51}} ]}"
-        "]}";
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE_CARD(ht, query, 6.21212, 9.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 36.4418, 99.0);
-    ASSERT_MATCH_CE_CARD(ht, query, 228.935, 1000.0);
-}
-
-TEST(CEHeuristicTest, CEAfterMemoSubstitutionExplorationPhases) {
-    HeuristicCETester ht(collName);
-    ASSERT_MATCH_CE(ht, "{a : 13, b : 42}", 10.0);
-}
-
-TEST(CEHeuristicTest, CENotEquality) {
-    double collCard = kCollCard;
-    HeuristicCETester opt(collName);
-
-    // We avoid optimizing in order to verify heuristic estimate of FilterNode subtree. Note that we
-    // do not generate SargableNodes for $not predicates, but we do generate SargableNodes without
-    // it; for the purposes of this test, we want to demonstrate that $not returns the inverse of
-    // the FilterNode estimate.
-    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
-
-    // Equality selectivity is sqrt(kCollCard)/kCollCard = 0.01. When we see a UnaryOp [Not] above
-    // this subtree, we invert the selectivity 1.0 - 0.01 = 0.99.
-    double ce = 100.0;
-    double inverseCE = collCard - ce;
-    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
-
-    // Update cardinality to 25.
-    collCard = 25;
-    opt.setCollCard(collCard);
-    noOpt.setCollCard(collCard);
-
-    // Selectivity is sqrt(25)/25.
-    ce = 5.0;
-    inverseCE = collCard - ce;
-    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
-
-    // Update cardinality to 9.
-    collCard = 9;
-    opt.setCollCard(collCard);
-    noOpt.setCollCard(collCard);
-
-    // Selectivity is sqrt(3)/9.
-    ce = 3.0;
-    inverseCE = collCard - ce;
-    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
-}
-
-TEST(CEHeuristicTest, CENotOpenRange) {
-    // Repeat the above test for open ranges; the $not cardinality estimate should add up with the
-    // non-$not estimate to the collection cardinality.
-    double collCard = kCollCard;
-    HeuristicCETester opt(collName);
-    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
-
-    // Expect open-range selectivity for input card > 100 (0.33).
-    double ce = 3300;
-    double inverseCE = collCard - ce;
-
-    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
-
-    // Update cardinality to 25.
-    collCard = 25;
-    opt.setCollCard(collCard);
-    noOpt.setCollCard(collCard);
-
-    // Expect open-range selectivity for input card in range (20, 100) (0.45).
-    ce = 11.25;
-    inverseCE = collCard - ce;
-
-    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
-
-    // Update cardinality to 10.
-    collCard = 10.0;
-    opt.setCollCard(collCard);
-    noOpt.setCollCard(collCard);
-
-    // Expect open-range selectivity for input card < 20 (0.70).
-    ce = 7.0;
-    inverseCE = collCard - ce;
-
-    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
-}
-
-TEST(CEHeuristicTest, CENotClosedRange) {
-    // Repeat the above test for closed ranges; the $not cardinality estimate should add up with the
-    // non-$not estimate to the collection cardinality.
-    double collCard = kCollCard;
-    double ce = 1089.0;
-    double inverseCE = collCard - ce;
-    HeuristicCETester opt(collName);
-    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
-
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
-
-    /*
-     * Update cardinality to 25. Here we observe an interesting edge case where the estimated
-     * cardinality is not the inverse of the actual cardinality.
-     *
-     * Consider the predicate {a: {$gt: 10, $lt: 20}}. This generates two FilterNodes stacked on top
-     * of each other. However, the predicate {a: {$not: {$gt: 10, $lt: 20}}} generates just one
-     * FilterNode.
-     *
-     * We always use input cardinality to determine which interval selectivity we're going to use.
-     * However, we have a different input cardinality for the one FilterNode case (collCard) than
-     * for the two FilterNodes case: the first node gets collCard, and the second node gets a
-     * smaller value after the selectivity of the first filter is applied.
-     *
-     * Because we use a piecewise function to pick the selectivity, and because we go from inputCard
-     * < 100 to inputCard < 20, we choose different selectivities for the intervals in the second
-     * FilterNode (0.50) than in the first (0.33).
-     */
-    collCard = 25;
-    ce = 7.875;
-    inverseCE = 19.9375;
-    opt.setCollCard(collCard);
-    noOpt.setCollCard(collCard);
-
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
-
-    // Update cardinality to 10.
-    collCard = 10.0;
-    ce = 4.9;
-    inverseCE = collCard - ce;
-    opt.setCollCard(collCard);
-    noOpt.setCollCard(collCard);
-
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
-    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
-    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
-}
-
-TEST(CEHeuristicTest, CEExists) {
-    HeuristicCETester noOpt(collName);
-
-    // Test basic case + $not.
-    ASSERT_MATCH_CE(noOpt, "{a: {$exists: true}}", 7000);
-    ASSERT_MATCH_CE(noOpt, "{a: {$exists: false}}", 3000);
-    ASSERT_MATCH_CE(noOpt, "{a: {$not: {$exists: false}}}", 7000);
-    ASSERT_MATCH_CE(noOpt, "{a: {$not: {$exists: true}}}", 3000);
-
-    // Test combinations of predicates.
-    ASSERT_MATCH_CE(noOpt, "{a: {$exists: true, $eq: 123}}", 70);
-    ASSERT_MATCH_CE(noOpt, "{a: {$exists: false, $eq: null}}", 30);
-    ASSERT_MATCH_CE(noOpt, "{a: {$exists: false}, b: {$eq: 123}}", 30);
-    ASSERT_MATCH_CE(noOpt, "{a: {$exists: true, $gt: 123}}", 2310);
-}
-
-}  // namespace
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_hinted.cpp b/src/mongo/db/query/ce/ce_hinted.cpp
deleted file mode 100644
index 0ce71a69fe7..00000000000
--- a/src/mongo/db/query/ce/ce_hinted.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/ce_hinted.h"
-#include "mongo/db/query/ce/ce_heuristic.h"
-
-namespace mongo::ce {
-namespace cascades = optimizer::cascades;
-namespace properties = optimizer::properties;
-
-using ABT = optimizer::ABT;
-using CEType = optimizer::CEType;
-using LogicalProps = properties::LogicalProps;
-using Memo = cascades::Memo;
-using Metadata = optimizer::Metadata;
-
-class CEHintedTransport {
-public:
-    CEType transport(const ABT& n,
-                     const optimizer::SargableNode& node,
-                     CEType childResult,
-                     CEType /*bindsResult*/,
-                     CEType /*refsResult*/) {
-        CEType result = childResult;
-        for (const auto& [key, req] : node.getReqMap()) {
-            if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
-                auto it = _hints.find(key);
-                if (it != _hints.cend()) {
-                    // Assume independence.
-                    result *= it->second;
-                }
-            }
-        }
-
-        return result;
-    }
-
-    template <typename T, typename... Ts>
-    CEType transport(const ABT& n, const T& /*node*/, Ts&&...) {
-        if (optimizer::canBeLogicalNode<T>()) {
-            return _heuristicCE.deriveCE(_metadata, _memo, _logicalProps, n.ref());
-        }
-        return 0.0;
-    }
-
-    static CEType derive(const Metadata& metadata,
-                         const Memo& memo,
-                         const PartialSchemaSelHints& hints,
-                         const LogicalProps& logicalProps,
-                         const ABT::reference_type logicalNodeRef) {
-        CEHintedTransport instance(metadata, memo, logicalProps, hints);
-        return optimizer::algebra::transport<true>(logicalNodeRef, instance);
-    }
-
-private:
-    CEHintedTransport(const Metadata& metadata,
-                      const Memo& memo,
-                      const LogicalProps& logicalProps,
-                      const PartialSchemaSelHints& hints)
-        : _heuristicCE(),
-          _metadata(metadata),
-          _memo(memo),
-          _logicalProps(logicalProps),
-          _hints(hints) {}
-
-    HeuristicCE _heuristicCE;
-
-    // We don't own this.
-    const Metadata& _metadata;
-    const Memo& _memo;
-    const LogicalProps& _logicalProps;
-    const PartialSchemaSelHints& _hints;
-};
-
-CEType HintedCE::deriveCE(const Metadata& metadata,
-                          const Memo& memo,
-                          const LogicalProps& logicalProps,
-                          const ABT::reference_type logicalNodeRef) const {
-    return CEHintedTransport::derive(metadata, memo, _hints, logicalProps, logicalNodeRef);
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_hinted.h b/src/mongo/db/query/ce/ce_hinted.h
deleted file mode 100644
index eacadc0ccfb..00000000000
--- a/src/mongo/db/query/ce/ce_hinted.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/query/optimizer/cascades/interfaces.h"
-
-namespace mongo::ce {
-
-using PartialSchemaSelHints = std::map<optimizer::PartialSchemaKey,
-                                       optimizer::SelectivityType,
-                                       optimizer::PartialSchemaKeyLessComparator>;
-
-/**
- * Estimation based on hints. The hints are organized in a PartialSchemaSelHints structure.
- * SargableNodes are estimated based on the matching PartialSchemaKeys.
- */
-class HintedCE : public optimizer::cascades::CEInterface {
-public:
-    HintedCE(PartialSchemaSelHints hints) : _hints(std::move(hints)) {}
-
-    optimizer::CEType deriveCE(const optimizer::Metadata& metadata,
-                               const optimizer::cascades::Memo& memo,
-                               const optimizer::properties::LogicalProps& logicalProps,
-                               optimizer::ABT::reference_type logicalNodeRef) const override final;
-
-private:
-    // Selectivity hints per PartialSchemaKey.
-    PartialSchemaSelHints _hints;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_histogram.cpp b/src/mongo/db/query/ce/ce_histogram.cpp
deleted file mode 100644
index c456d9227b6..00000000000
--- a/src/mongo/db/query/ce/ce_histogram.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/exec/sbe/abt/abt_lower.h"
-
-#include "mongo/db/query/ce/ce_histogram.h"
-#include "mongo/db/query/ce/collection_statistics_impl.h"
-#include "mongo/db/query/ce/histogram_estimation.h"
-
-#include "mongo/db/query/optimizer/utils/abt_hash.h"
-#include "mongo/db/query/optimizer/utils/ce_math.h"
-#include "mongo/db/query/optimizer/utils/memo_utils.h"
-
-#include "mongo/db/pipeline/abt/utils.h"
-
-namespace mongo::ce {
-namespace cascades = optimizer::cascades;
-namespace properties = optimizer::properties;
-
-using ABT = optimizer::ABT;
-using CEType = optimizer::CEType;
-using LogicalProps = properties::LogicalProps;
-using Memo = cascades::Memo;
-using Metadata = optimizer::Metadata;
-
-namespace {
-
-/**
- * This transport combines chains of PathGets and PathTraverses into an MQL-like string path.
- */
-class PathDescribeTransport {
-public:
-    std::string transport(const optimizer::PathTraverse& /*node*/, std::string childResult) {
-        return childResult;
-    }
-
-    std::string transport(const optimizer::PathGet& node, std::string childResult) {
-        return str::stream() << node.name() << (childResult.length() > 0 ? "." : "") << childResult;
-    }
-
-    std::string transport(const optimizer::EvalFilter& node,
-                          std::string pathResult,
-                          std::string inputResult) {
-        return pathResult;
-    }
-
-    std::string transport(const optimizer::PathIdentity& node) {
-        return "";
-    }
-
-    template <typename T, typename... Ts>
-    std::string transport(const T& node, Ts&&... /* args */) {
-        uasserted(6903900, "Unexpected node in path serialization.");
-    }
-};
-
-std::string serializePath(const ABT& path) {
-    PathDescribeTransport pdt;
-    auto str = optimizer::algebra::transport<false>(path, pdt);
-    return str;
-}
-
-}  // namespace
-
-class CEHistogramTransportImpl {
-public:
-    CEHistogramTransportImpl(std::shared_ptr<CollectionStatistics> stats,
-                             std::unique_ptr<cascades::CEInterface> fallbackCE)
-        : _stats(stats),
-          _fallbackCE(std::move(fallbackCE)),
-          _arrayOnlyInterval(*defaultConvertPathToInterval(make<PathArr>())) {}
-
-    ~CEHistogramTransportImpl() {}
-
-    CEType transport(const ABT& n,
-                     const optimizer::ScanNode& node,
-                     const Memo& memo,
-                     const LogicalProps& logicalProps,
-                     CEType /*bindResult*/) {
-        return _stats->getCardinality();
-    }
-
-    /**
-     * This struct is used to track an intermediate representation of the intervals in the
-     * requirements map. In particular, grouping intervals along each path in the map allows us to
-     * determine which paths should be estimated as $elemMatches without relying on a particular
-     * order of entries in the requirements map.
-     */
-    struct SargableConjunct {
-        bool includeScalar;
-        const ce::ArrayHistogram& histogram;
-        std::vector<std::reference_wrapper<const IntervalReqExpr::Node>> intervals;
-    };
-
-    CEType transport(const ABT& n,
-                     const SargableNode& node,
-                     const Metadata& metadata,
-                     const Memo& memo,
-                     const LogicalProps& logicalProps,
-                     CEType childResult,
-                     CEType /*bindsResult*/,
-                     CEType /*refsResult*/) {
-        // Early out and return 0 since we don't expect to get more results.
-        if (childResult == 0.0) {
-            return 0.0;
-        }
-
-        // Initial first pass through the requirements map to extract information about each path.
-        std::map<std::string, SargableConjunct> conjunctRequirements;
-        for (const auto& [key, req] : node.getReqMap()) {
-            if (req.getIsPerfOnly()) {
-                // Ignore perf-only requirements.
-                continue;
-            }
-
-            const auto serializedPath = serializePath(key._path.ref());
-            const auto& interval = req.getIntervals();
-            const bool isPathArrInterval =
-                (_arrayOnlyInterval == interval) && !pathEndsInTraverse(key._path.ref());
-
-            // Check if we have already seen this path.
-            if (auto conjunctIt = conjunctRequirements.find({serializedPath});
-                conjunctIt != conjunctRequirements.end()) {
-                auto& conjunctReq = conjunctIt->second;
-                if (isPathArrInterval) {
-                    // We should estimate this path's intervals using $elemMatch semantics.
-                    // Don't push back the interval for estimation; instead, we use it to change how
-                    // we estimate other intervals along this path.
-                    conjunctReq.includeScalar = false;
-                } else {
-                    // We will need to estimate this interval.
-                    conjunctReq.intervals.push_back(interval);
-                }
-                continue;
-            }
-
-            // Fallback if there is no histogram.
-            auto histogram = _stats->getHistogram(serializedPath);
-            if (!histogram) {
-                // For now, because of the structure of SargableNode and the implementation of
-                // the fallback (currently HeuristicCE), we can't combine heuristic & histogram
-                // estimates. In this case, default to Heuristic if we don't have a histogram for
-                // any of the predicates.
-                return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
-            }
-
-            // Add this path to the map. If this is not a 'PathArr' interval, add it to the vector
-            // of intervals we will be estimating.
-            SargableConjunct sc{!isPathArrInterval, *histogram, {}};
-            if (sc.includeScalar) {
-                sc.intervals.push_back(interval);
-            }
-            conjunctRequirements.emplace(serializedPath, std::move(sc));
-        }
-
-        std::vector<double> topLevelSelectivities;
-        for (const auto& [_, conjunctReq] : conjunctRequirements) {
-            const CEType totalCard = _stats->getCardinality();
-
-            if (conjunctReq.intervals.empty() && !conjunctReq.includeScalar) {
-                // In this case there is a single 'PathArr' interval for this field.
-                // The selectivity of this interval is: (count of all arrays) / totalCard
-                double pathArrSel = conjunctReq.histogram.getArrayCount() / totalCard;
-                topLevelSelectivities.push_back(pathArrSel);
-            }
-
-            // Intervals are in DNF.
-            for (const IntervalReqExpr::Node& intervalDNF : conjunctReq.intervals) {
-                std::vector<double> disjSelectivities;
-
-                const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes();
-                for (const auto& disjunct : disjuncts) {
-                    const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes();
-
-                    std::vector<double> conjSelectivities;
-                    for (const auto& conjunct : conjuncts) {
-                        const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr();
-                        auto cardinality =
-                            ce::estimateIntervalCardinality(conjunctReq.histogram,
-                                                            interval,
-                                                            childResult,
-                                                            conjunctReq.includeScalar);
-
-                        // We may still not have been able to estimate the interval using
-                        // histograms, for instance if the interval bounds were non-Constant. In
-                        // this case, we should fallback to heuristics.
-                        if (cardinality < 0) {
-                            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
-                        }
-
-                        // We have to convert the cardinality to a selectivity. The histogram
-                        // returns the cardinality for the entire collection; however, fewer records
-                        // may be expected at the SargableNode.
-                        conjSelectivities.push_back(cardinality / totalCard);
-                    }
-
-                    auto backoff = ce::conjExponentialBackoff(std::move(conjSelectivities));
-                    disjSelectivities.push_back(backoff);
-                }
-
-                auto backoff = ce::disjExponentialBackoff(std::move(disjSelectivities));
-                topLevelSelectivities.push_back(backoff);
-            }
-        }
-
-        // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
-        if (!topLevelSelectivities.empty()) {
-            auto backoff = ce::conjExponentialBackoff(std::move(topLevelSelectivities));
-            childResult *= backoff;
-        }
-        return childResult;
-    }
-
-    CEType transport(const ABT& n,
-                     const RootNode& node,
-                     const Metadata& metadata,
-                     const Memo& memo,
-                     const LogicalProps& logicalProps,
-                     CEType childResult,
-                     CEType /*refsResult*/) {
-        // Root node does not change cardinality.
-        return childResult;
-    }
-
-    /**
-     * Use fallback for other ABT types.
-     */
-    template <typename T, typename... Ts>
-    CEType transport(const ABT& n,
-                     const T& /*node*/,
-                     const Metadata& metadata,
-                     const Memo& memo,
-                     const LogicalProps& logicalProps,
-                     Ts&&...) {
-        if (canBeLogicalNode<T>()) {
-            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
-        }
-        return 0.0;
-    }
-
-private:
-    std::shared_ptr<CollectionStatistics> _stats;
-    std::unique_ptr<cascades::CEInterface> _fallbackCE;
-
-    // This is a special interval indicating that we expect to use $elemMatch semantics when
-    // estimating the current path.
-    const IntervalReqExpr::Node _arrayOnlyInterval;
-};
-
-CEHistogramTransport::CEHistogramTransport(std::shared_ptr<CollectionStatistics> stats,
-                                           std::unique_ptr<cascades::CEInterface> fallbackCE)
-    : _impl(std::make_unique<CEHistogramTransportImpl>(stats, std::move(fallbackCE))) {}
-
-CEHistogramTransport::~CEHistogramTransport() {}
-
-CEType CEHistogramTransport::deriveCE(const Metadata& metadata,
-                                      const Memo& memo,
-                                      const LogicalProps& logicalProps,
-                                      const ABT::reference_type logicalNodeRef) const {
-    return algebra::transport<true>(logicalNodeRef, *this->_impl, metadata, memo, logicalProps);
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_histogram.h b/src/mongo/db/query/ce/ce_histogram.h
deleted file mode 100644
index 1823bc211a5..00000000000
--- a/src/mongo/db/query/ce/ce_histogram.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/query/ce/collection_statistics_impl.h"
-#include "mongo/db/query/optimizer/cascades/interfaces.h"
-
-namespace mongo::ce {
-
-class CEHistogramTransportImpl;
-
-class CEHistogramTransport : public optimizer::cascades::CEInterface {
-public:
-    CEHistogramTransport(std::shared_ptr<CollectionStatistics> stats,
-                         std::unique_ptr<optimizer::cascades::CEInterface> fallbackCE);
-    ~CEHistogramTransport();
-
-    optimizer::CEType deriveCE(const optimizer::Metadata& metadata,
-                               const optimizer::cascades::Memo& memo,
-                               const optimizer::properties::LogicalProps& logicalProps,
-                               optimizer::ABT::reference_type logicalNodeRef) const final;
-
-private:
-    std::unique_ptr<CEHistogramTransportImpl> _impl;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp
deleted file mode 100644
index 3267ce4d89f..00000000000
--- a/src/mongo/db/query/ce/ce_histogram_test.cpp
+++ /dev/null
@@ -1,1156 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/ce_histogram.h"
-#include "mongo/db/query/ce/ce_test_utils.h"
-#include "mongo/db/query/ce/collection_statistics_mock.h"
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
-#include "mongo/db/query/sbe_stage_builder_helpers.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo::ce {
-namespace {
-
-using namespace optimizer;
-using namespace cascades;
-
-std::string collName("test");
-
-class CEHistogramTester : public CETester {
-public:
-    CEHistogramTester(std::string collName, double numRecords)
-        : CETester(collName, numRecords), _stats{new CollectionStatisticsMock(numRecords)} {}
-
-    void addHistogram(const std::string& path, std::shared_ptr<ArrayHistogram> histogram) {
-        _stats->addHistogram(path, histogram);
-    }
-
-protected:
-    std::unique_ptr<CEInterface> getCETransport() const override {
-        // making a copy of CollecitonStatistics to override
-        return std::make_unique<CEHistogramTransport>(_stats, makeHeuristicCE());
-    }
-
-private:
-    std::shared_ptr<CollectionStatistics> _stats;
-};
-
-struct TestBucket {
-    Value val;
-    double equalFreq;
-    double rangeFreq = 0.0;
-    double ndv = 1.0; /* ndv including bucket boundary*/
-};
-using TestBuckets = std::vector<TestBucket>;
-
-ScalarHistogram getHistogramFromData(TestBuckets testBuckets) {
-    sbe::value::Array bounds;
-    std::vector<Bucket> buckets;
-
-    double cumulativeFreq = 0.0;
-    double cumulativeNDV = 0.0;
-    for (const auto& b : testBuckets) {
-        // Add bucket boundary value to bounds.
-        auto [tag, val] = stage_builder::makeValue(b.val);
-        bounds.push_back(tag, val);
-
-        cumulativeFreq += b.equalFreq + b.rangeFreq;
-        cumulativeNDV += b.ndv;
-
-        // Create a histogram bucket.
-        buckets.emplace_back(b.equalFreq,
-                             b.rangeFreq,
-                             cumulativeFreq,
-                             b.ndv - 1, /* ndv excluding bucket boundary*/
-                             cumulativeNDV);
-    }
-
-    return ScalarHistogram(std::move(bounds), std::move(buckets));
-}
-
-TypeCounts getTypeCountsFromData(TestBuckets testBuckets) {
-    TypeCounts typeCounts;
-    for (const auto& b : testBuckets) {
-        // Add bucket boundary value to bounds.
-        auto sbeVal = stage_builder::makeValue(b.val);
-        auto [tag, val] = sbeVal;
-
-        // Increment count of values for each type tag.
-        if (auto it = typeCounts.find(tag); it != typeCounts.end()) {
-            it->second += b.equalFreq + b.rangeFreq;
-        } else {
-            typeCounts[tag] = b.equalFreq + b.rangeFreq;
-        }
-    }
-    return typeCounts;
-}
-
-std::unique_ptr<ArrayHistogram> getArrayHistogramFromData(TestBuckets testBuckets,
-                                                          TypeCounts additionalScalarData = {}) {
-    TypeCounts dataTypeCounts = getTypeCountsFromData(testBuckets);
-    dataTypeCounts.merge(additionalScalarData);
-    return std::make_unique<ArrayHistogram>(getHistogramFromData(testBuckets),
-                                            std::move(dataTypeCounts));
-}
-
-std::unique_ptr<ArrayHistogram> getArrayHistogramFromData(TestBuckets scalarBuckets,
-                                                          TestBuckets arrayUniqueBuckets,
-                                                          TestBuckets arrayMinBuckets,
-                                                          TestBuckets arrayMaxBuckets,
-                                                          TypeCounts arrayTypeCounts,
-                                                          double totalArrayCount,
-                                                          double emptyArrayCount = 0,
-                                                          TypeCounts additionalScalarData = {}) {
-
-    // Set up scalar type counts.
-    TypeCounts dataTypeCounts = getTypeCountsFromData(scalarBuckets);
-    dataTypeCounts[value::TypeTags::Array] = totalArrayCount;
-    dataTypeCounts.merge(additionalScalarData);
-
-    // Set up histograms.
-    auto arrayMinHist = getHistogramFromData(arrayMinBuckets);
-    auto arrayMaxHist = getHistogramFromData(arrayMaxBuckets);
-    return std::make_unique<ArrayHistogram>(getHistogramFromData(scalarBuckets),
-                                            std::move(dataTypeCounts),
-                                            getHistogramFromData(arrayUniqueBuckets),
-                                            std::move(arrayMinHist),
-                                            std::move(arrayMaxHist),
-                                            std::move(arrayTypeCounts),
-                                            emptyArrayCount);
-}
-
-TEST(CEHistogramTest, AssertSmallMaxDiffHistogramEstimatesAtomicPredicates) {
-    constexpr auto kCollCard = 8;
-    CEHistogramTester t(collName, kCollCard);
-
-    // Construct a histogram with two buckets: one for 3 ints equal to 1, another for 5 strings
-    // equal to "ing".
-    const std::string& str = "ing";
-    t.addHistogram("a",
-                   getArrayHistogramFromData({
-                       {Value(1), 3 /* frequency */},
-                       {Value(str), 5 /* frequency */},
-                   }));
-
-    // Test $eq.
-    ASSERT_MATCH_CE(t, "{a: {$eq: 1}}", 3.0);
-    ASSERT_MATCH_CE(t, "{a: {$eq: 2}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$eq: \"ing\"}}", 5.0);
-    ASSERT_MATCH_CE(t, "{a: {$eq: \"foo\"}}", 0.0);
-
-    // Test case when field doesn't match fieldpath of histogram. This falls back to heuristics.
-    ASSERT_MATCH_CE(t, "{b: {$eq: 1}}", 2.82843);
-
-    // Test $gt.
-    ASSERT_MATCH_CE(t, "{a: {$gt: 3}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 1}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 0}}", 3.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: \"bar\"}}", 5.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: \"ing\"}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: \"zap\"}}", 0.0);
-
-    // Test $lt.
-    ASSERT_MATCH_CE(t, "{a: {$lt: 3}}", 3.0);
-    ASSERT_MATCH_CE(t, "{a: {$lt: 1}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$lt: 0}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$lt: \"bar\"}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$lt: \"ing\"}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$lt: \"zap\"}}", 5.0);
-
-    // Test $gte.
-    ASSERT_MATCH_CE(t, "{a: {$gte: 3}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$gte: 1}}", 3.0);
-    ASSERT_MATCH_CE(t, "{a: {$gte: 0}}", 3.0);
-    ASSERT_MATCH_CE(t, "{a: {$gte: \"bar\"}}", 5.0);
-    ASSERT_MATCH_CE(t, "{a: {$gte: \"ing\"}}", 5.0);
-    ASSERT_MATCH_CE(t, "{a: {$gte: \"zap\"}}", 0.0);
-
-    // Test $lte.
-    ASSERT_MATCH_CE(t, "{a: {$lte: 3}}", 3.0);
-    ASSERT_MATCH_CE(t, "{a: {$lte: 1}}", 3.0);
-    ASSERT_MATCH_CE(t, "{a: {$lte: 0}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$lte: \"bar\"}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$lte: \"ing\"}}", 5.0);
-    ASSERT_MATCH_CE(t, "{a: {$lte: \"zap\"}}", 5.0);
-}
-
-TEST(CEHistogramTest, AssertSmallHistogramEstimatesComplexPredicates) {
-    constexpr auto kCollCard = 9;
-    CEHistogramTester t(collName, kCollCard);
-
-    // Construct a histogram with three int buckets for field 'a'.
-    t.addHistogram("a",
-                   getArrayHistogramFromData({
-                       {Value(1), 3 /* frequency */},
-                       {Value(2), 5 /* frequency */},
-                       {Value(3), 1 /* frequency */},
-                   }));
-
-    // Construct a histogram with two int buckets for field 'b'.
-    t.addHistogram("b",
-                   getArrayHistogramFromData({
-                       {Value(22), 3 /* frequency */},
-                       {Value(33), 6 /* frequency */},
-                   }));
-
-
-    // Test simple conjunctions on one field. Note the first example: the range we expect to see
-    // here is (1, 3); however, the structure in the SargableNode gives us a conjunction of two
-    // intervals instead: (1, "") ^ (nan, 3) This is then estimated using exponential backoff to
-    // give us a less accurate result. The correct cardinality here would be 5.
-    ASSERT_MATCH_CE(t, "{a: {$gt: 1}, a: {$lt: 3}}", 5.66);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 1}, a: {$lte: 3}}", 6.0);
-    ASSERT_MATCH_CE(t, "{a: {$gte: 1}, a: {$lt: 3}}", 8.0);
-    ASSERT_MATCH_CE(t, "{a: {$gte: 1}, a: {$lte: 3}}", 9.0);
-
-    // Test ranges which exclude each other.
-    ASSERT_MATCH_CE(t, "{a: {$lt: 1}, a: {$gt: 3}}", 0.0);
-
-    // Test overlapping ranges. This is a similar case to {a: {$gt: 1}, a: {$lt: 3}} above: we
-    // expect to see the range [2, 2]; instead, we see the range [nan, 2] ^ [2, "").
-    ASSERT_MATCH_CE(t, "{a: {$lte: 2}, a: {$gte: 2}}", 5.66);
-
-    // Test conjunctions over multiple fields for which we have histograms. Here we expect a
-    // cardinality estimated by exponential backoff.
-    ASSERT_MATCH_CE(t, "{a: {$eq: 2}, b: {$eq: 22}}", 2.24);
-    ASSERT_MATCH_CE(t, "{a: {$eq: 11}, b: {$eq: 22}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 11}, a: {$lte: 100}, b: {$eq: 22}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$lt: 3}, a: {$gte: 1}, b: {$lt: 100}, b: {$gt: 30}}", 5.66);
-
-    // Test conjunctions over multiple fields for which we may not have histograms. This falls back
-    // to heuristic estimation.
-    ASSERT_MATCH_CE(t, "{a: {$eq: 2}, c: {$eq: 1}}", 1.73205);
-    ASSERT_MATCH_CE(t, "{c: {$eq: 2}, d: {$eq: 22}}", 1.73205);
-}
-
-TEST(CEHistogramTest, SanityTestEmptyHistogram) {
-    constexpr auto kCollCard = 0;
-    CEHistogramTester t(collName, kCollCard);
-    t.addHistogram("empty", std::make_unique<ArrayHistogram>());
-
-    ASSERT_MATCH_CE(t, "{empty: {$eq: 1.0}}", 0.0);
-    ASSERT_MATCH_CE(t, "{empty: {$lt: 1.0}, empty: {$gt: 0.0}}", 0.0);
-    ASSERT_MATCH_CE(t, "{empty: {$eq: 1.0}, other: {$eq: \"anything\"}}", 0.0);
-    ASSERT_MATCH_CE(t, "{other: {$eq: \"anything\"}, empty: {$eq: 1.0}}", 0.0);
-}
-
-TEST(CEHistogramTest, TestOneBucketOneIntHistogram) {
-    constexpr auto kCollCard = 50;
-    CEHistogramTester t(collName, kCollCard);
-
-    // Create a histogram with a single bucket that contains exactly one int (42) with a frequency
-    // of 50 (equal to the collection cardinality).
-    t.addHistogram("soloInt",
-                   getArrayHistogramFromData({
-                       {Value(42), kCollCard /* frequency */},
-                   }));
-
-    // Check against a variety of intervals that include 42 as a bound.
-    ASSERT_MATCH_CE(t, "{soloInt: {$eq: 42}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$lt: 42}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$lte: 42}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lt: 42}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lte: 42}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lt: 42}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lte: 42}}", kCollCard);
-
-    // Check against a variety of intervals that include 42 only as one bound.
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lt: 43}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lte: 43}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lt: 43}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lte: 43}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lt: 42}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lte: 42}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lt: 42}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lte: 42}}", kCollCard);
-
-    // Check against a variety of intervals close to 42 using a lower bound of 41 and a higher bound
-    // of 43.
-    ASSERT_MATCH_CE(t, "{soloInt: {$eq: 41}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$eq: 43}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$lt: 43}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$lte: 43}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lt: 43}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lt: 43}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lte: 43}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lte: 43}}", kCollCard);
-
-    // Check against different types.
-    ASSERT_MATCH_CE(t, "{soloInt: {$eq: \"42\"}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$lt: \"42\"}}", 0.0);
-    ASSERT_MATCH_CE(t, "{soloInt: {$lt: 42.1}}", kCollCard);
-}
-
-TEST(CEHistogramTest, TestOneBoundIntRangeHistogram) {
-    constexpr auto kCollCard = 51;
-    CEHistogramTester t(collName, kCollCard);
-    t.addHistogram("intRange",
-                   getArrayHistogramFromData({
-                       {Value(10), 5 /* frequency */},
-                       {Value(20), 1 /* frequency */, 45 /* range frequency */, 10 /* ndv */},
-                   }));
-
-    // Test ranges that overlap only with the lower bound.
-    // Note: 5 values equal 10.
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 10}}", 5.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$lte: 10}}", 5.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$lte: 10}, intRange: {$gte: 10}}", 5.0);
-
-    // Test ranges that overlap only with the upper bound.
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 11}}", 5.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 15}}", 5.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 15.5}}", 5.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 20}}", 1.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 20}}", 1.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 10}}", 46.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 15}}", 28.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 15}}", 23.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 11}, intRange: {$lte: 20}}", 41.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 11}, intRange: {$lte: 20}}", 41.5);
-
-    // Test ranges that partially overlap with the entire histogram.
-    ASSERT_MATCH_CE(t, "{intRange: {$lt: 11}}", 9.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$lt: 15}}", 22.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$lte: 15}}", 27.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 8}, intRange: {$lte: 15}}", 27.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 8}, intRange: {$lte: 15}}", 27.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 8}, intRange: {$lt: 15}}", 22.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 8}, intRange: {$lte: 15}}", 27.5);
-
-    // Test ranges that include all values in the histogram.
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 10}, intRange: {$lte: 20}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 1}, intRange: {$lte: 30}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 1}, intRange: {$lt: 30}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 1}, intRange: {$lte: 30}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 1}, intRange: {$lt: 30}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{intRange: {$lt: 100}}", kCollCard);
-    ASSERT_MATCH_CE(t, "{intRange: {$lte: 100}}", kCollCard);
-
-    // Test ranges that are fully included in the histogram.
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 10.5}}", 5.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 12.5}}", 5.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 19.36}}", 5.0);
-
-    // Test ranges that don't overlap with the histogram.
-    ASSERT_MATCH_CE(t, "{intRange: {$lt: 10}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$lt: 5}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$lte: 5}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 20.1}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$eq: 21}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 21}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 20}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 100}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 30}, intRange: {$lte: 50}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 30}, intRange: {$lt: 50}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 30}, intRange: {$lt: 50}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 30}, intRange: {$lte: 50}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}, intRange: {$lte: 5}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}, intRange: {$lt: 5}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}, intRange: {$lt: 5}}", 0.0);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}, intRange: {$lte: 5}}", 0.0);
-
-    // Because we don't specify any indexes here, these intervals do not go through simplification.
-    // This means that instead of having one key in the requirements map of the generated sargable
-    // node corresponding to the path "intRange", we have two keys and two ranges, both
-    // corresponding to the same path. As a consequence, we combine the estimates for the intervals
-    // using exponential backoff, which results in an overestimate.
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 11}, intRange: {$lt: 20}}", 41.09);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 11}, intRange: {$lt: 20}}", 41.09);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lt: 15}}", 19.16);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lt: 15}}", 20.42);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lte: 15}}", 23.42);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lte: 15}}", 24.96);
-    ASSERT_MATCH_CE(t, "{intRange: {$lt: 19}, intRange: {$gt: 11}}", 36.53);
-
-    // When we specify that there is a non-multikey index on 'intRange', we expect to see interval
-    // simplification occurring, which should provide a better estimate for the following ranges.
-    t.setIndexes(
-        {{"intRangeIndex",
-          makeIndexDefinition("intRange", CollationOp::Ascending, /* isMultiKey */ false)}});
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 11}, intRange: {$lt: 20}}", 40.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 11}, intRange: {$lt: 20}}", 40.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lt: 15}}", 8.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lt: 15}}", 13.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lte: 15}}", 13.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lte: 15}}", 18.5);
-    ASSERT_MATCH_CE(t, "{intRange: {$lt: 19}, intRange: {$gt: 11}}", 31.0);
-}
-
-TEST(CEHistogramTest, TestHistogramOnNestedPaths) {
-    constexpr auto kCollCard = 50;
-    CEHistogramTester t(collName, kCollCard);
-
-    // Create a histogram with a single bucket that contains exactly one int (42) with a frequency
-    // of 50 (equal to the collection cardinality).
-    t.addHistogram("path",
-                   getArrayHistogramFromData({
-                       {Value(42), kCollCard /* frequency */},
-                   }));
-    t.addHistogram("a.histogram.path",
-                   getArrayHistogramFromData({
-                       {Value(42), kCollCard /* frequency */},
-                   }));
-
-    ASSERT_MATCH_CE(t, "{\"not.a.histogram.path\": {$eq: 42}}", 7.071 /* heuristic */);
-    ASSERT_MATCH_CE(t, "{\"a.histogram.path\": {$eq: 42}}", kCollCard);
-    ASSERT_MATCH_CE(
-        t, "{\"a.histogram.path.with.no.histogram\": {$eq: 42}}", 7.071 /* heuristic */);
-
-    // When a predicate can't be precisely translated to a SargableNode (such as $elemMatch on a
-    // dotted path), we may still be able to translate an over-approximation. We generate a
-    // SargableNode with all predicates marked perfOnly, and keep the original Filter. The Filter
-    // ensures the results are correct, while the SargableNode hopefully will be answerable by an
-    // index.
-    //
-    // On the logical level, perfOnly predicates don't do anything, so we don't consider them in
-    // cardinality estimates. But when we split a SargableNode into an indexed part and a fetch
-    // part, we remove the perfOnly flag from the indexed part, and we should consider them to
-    // estimate how many index keys are returned.
-    //
-    // In this test, we want to exercise the histogram estimate for the SargableNode generated by
-    // $elemMatch on a dotted path. So we create an index on this field to ensure the SargableNode
-    // is split, and the predicates marked non-perfOnly.
-    //
-    // We also mark the index multikey, to prevent non-CE rewrites from removing the predicate
-    // entirely. (This scenario could happen if you remove all the arrays, and refresh the
-    // statistics.)
-    IndexDefinition ix{
-        IndexCollationSpec{
-            IndexCollationEntry{
-                makeIndexPath({"a", "histogram", "path"}),
-                CollationOp::Ascending,
-            },
-        },
-        true /* isMultiKey */,
-    };
-    t.setIndexes({{"a_histogram_path_1", std::move(ix)}});
-    ASSERT_MATCH_CE_NODE(t, "{\"a.histogram.path\": {$elemMatch: {$eq: 42}}}", 0.0, isSargable2);
-}
-
-TEST(CEHistogramTest, TestArrayHistogramOnAtomicPredicates) {
-    constexpr auto kCollCard = 6;
-    CEHistogramTester t(collName, kCollCard);
-    t.addHistogram(
-        "a",
-        // Generate a histogram for this data:
-        // {a: 1}, {a: 2}, {a: [1, 2, 3, 2, 2]}, {a: [10]}, {a: [2, 3, 3, 4, 5, 5, 6]}, {a: []}
-        //  - scalars: [1, 2]
-        //  - unique values: [1, 2, 3], [10], [2, 3, 4, 5, 6]
-        //      -> [1, 2, 2, 3, 3, 4, 5, 6, 10]
-        //  - min values: [1], [10], [2] -> [1, 1, 2, 2, 10]
-        //  - max values: [3], [10], [6] -> [1, 2, 3, 6, 10]
-        getArrayHistogramFromData(
-            {// Scalar buckets.
-             {Value(1), 1 /* frequency */},
-             {Value(2), 1 /* frequency */}},
-            {
-                // Array unique buckets.
-                {Value(1), 1 /* frequency */},
-                {Value(2), 2 /* frequency */},
-                {Value(3), 2 /* frequency */},
-                {Value(4), 1 /* frequency */},
-                {Value(5), 1 /* frequency */},
-                {Value(6), 1 /* frequency */},
-                {Value(10), 1 /* frequency */},
-            },
-            {
-                // Array min buckets.
-                {Value(1), 1 /* frequency */},
-                {Value(2), 1 /* frequency */},
-                {Value(10), 1 /* frequency */},
-            },
-            {
-                // Array max buckets.
-                {Value(3), 1 /* frequency */},
-                {Value(6), 1 /* frequency */},
-                {Value(10), 1 /* frequency */},
-            },
-            {{sbe::value::TypeTags::NumberInt32, 13}},  // Array type counts.
-            3,                                          // 3 arrays total.
-            1                                           // 1 empty array.
-            ));
-
-    // Test simple predicates against 'a'. Note: in the $elemMatch case, we exclude scalar
-    // estimates. Without $elemMatch, we add the array histogram and scalar histogram estimates
-    // together.
-
-    // Test equality predicates.
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$eq: 0}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 1}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 3.0 /* CE */, 2.0 /* $elemMatch CE */, "a", "{$eq: 2}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.0 /* $elemMatch CE */, "a", "{$eq: 3}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 4}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 6}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 10}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$eq: 11}");
-
-    // Test histogram boundary values.
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$lt: 1}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$lte: 1}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$gt: 10}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$gte: 10}");
-
-    ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lte: 10}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lt: 10}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gt: 1}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 1}");
-
-    ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lte: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lt: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.0 /* $elemMatch CE */, "a", "{$gt: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.40822 /* $elemMatch CE */, "a", "{$gte: 5}");
-
-    ASSERT_EQ_ELEMMATCH_CE(t, 2.45 /* CE */, 2.40822 /* $elemMatch CE */, "a", "{$gt: 2, $lt: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 3.27 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 2, $lt: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 2.45 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gt: 2, $lte: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 3.27 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 2, $lte: 5}");
-}
-
-TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
-    constexpr auto kCollCard = 175;
-    CEHistogramTester t(collName, kCollCard);
-
-    // A scalar histogram with values in the range [1,10], most of which are in the middle bucket.
-    t.addHistogram("scalar",
-                   getArrayHistogramFromData({
-                       {Value(1), 10 /* frequency */},
-                       {Value(2), 10 /* frequency */},
-                       {Value(3), 20 /* frequency */, 120 /* range frequency */, 5 /* ndv */},
-                       {Value(8), 5 /* frequency */, 10 /* range frequency */, 3 /* ndv */},
-                   }));
-
-    // An array histogram built on the following arrays with 35 occurrences of each:
-    // [{[1, 2, 3]: 35}, {[5, 5, 5, 5, 5]: 35}, {[6]: 35}, {[]: 35}, {[8, 9, 10]: 35}]
-    t.addHistogram(
-        "array",
-        getArrayHistogramFromData(
-            {/* No scalar buckets. */},
-            {
-                // Array unique buckets.
-                {Value(2), 35 /* frequency */, 35 /* range frequency */, 2 /* ndv */},
-                {Value(5), 35 /* frequency */, 35 /* range frequency */, 2 /* ndv */},
-                {Value(6), 35 /* frequency */},
-                {Value(10), 35 /* frequency */, 105 /* range frequency */, 3 /* ndv */},
-            },
-            {
-                // Array min buckets.
-                {Value(1), 35 /* frequency */},
-                {Value(5), 35 /* frequency */},
-                {Value(6), 35 /* frequency */},
-                {Value(8), 35 /* frequency */},
-            },
-            {
-                // Array max buckets.
-                {Value(3), 35 /* frequency */},
-                {Value(5), 35 /* frequency */},
-                {Value(6), 35 /* frequency */},
-                {Value(10), 35 /* frequency */},
-            },
-            {{sbe::value::TypeTags::NumberInt32, 420}},  // Array type count = 3*35+5*35+1*35+3*35.
-            kCollCard,                                   // kCollCard arrays total.
-            35                                           // 35 empty arrays
-            ));
-
-    t.addHistogram(
-        "mixed",
-        // The mixed histogram has 87 scalars that follow approximately the same distribution as
-        // in the pure scalar case, and 88 arrays with the following distribution:
-        //  [{[1, 2, 3]: 17}, {[5, 5, 5, 5, 5]: 17}, {[6]: 17}, {[]: 20}, {[8, 9, 10]: 17}]
-        getArrayHistogramFromData(
-            {
-                // Scalar buckets. These are half the number of values from the "scalar" histogram.
-                {Value(1), 5 /* frequency */},
-                {Value(2), 5 /* frequency */},
-                {Value(3), 10 /* frequency */, 60 /* range frequency */, 5 /* ndv */},
-                {Value(8), 2 /* frequency */, 5 /* range frequency */, 3 /* ndv */},
-            },
-            {
-                // Array unique buckets.
-                {Value(2), 17 /* frequency */, 17 /* range frequency */, 2 /* ndv */},
-                {Value(5), 17 /* frequency */, 17 /* range frequency */, 2 /* ndv */},
-                {Value(6), 17 /* frequency */},
-                {Value(10), 17 /* frequency */, 34 /* range frequency */, 3 /* ndv */},
-            },
-            {
-                // Array min buckets.
-                {Value(1), 17 /* frequency */},
-                {Value(5), 17 /* frequency */},
-                {Value(6), 17 /* frequency */},
-                {Value(8), 17 /* frequency */},
-            },
-            {
-                // Array max buckets.
-                {Value(3), 17 /* frequency */},
-                {Value(5), 17 /* frequency */},
-                {Value(6), 17 /* frequency */},
-                {Value(10), 17 /* frequency */},
-            },
-            {{sbe::value::TypeTags::NumberInt32, 289}},  // Array type count = 3*17+5*17+6*17+3*17
-            88,                                          // kCollCard arrays total.
-            20                                           // 20 empty arrays.
-            ));
-
-    // Test cardinality of individual predicates.
-    ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 0.0 /* $elemMatch CE */, "scalar", "{$eq: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 35.0 /* CE */, 35.0 /* $elemMatch CE */, "array", "{$eq: 5}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 19.5 /* CE */, 17.0 /* $elemMatch CE */, "mixed", "{$eq: 5}");
-
-    // Test cardinality of predicate combinations; the following tests make sure we correctly track
-    // which paths have $elemMatches and which don't. Some notes:
-    //  - Whenever we use 'scalar' + $elemMatch, we expect an estimate of 0 because $elemMatch never
-    // returns documents on non-array paths.
-    //  - Whenever we use 'mixed' + $elemMatch, we expect the estimate to decrease because we omit
-    // scalar values in 'mixed' from our estimate.
-    //  - We do not expect the estimate on 'array' to be affected by the presence of $elemMatch,
-    // since we only have array values for this field.
-
-    // Composite predicate on 'scalar' and 'array' fields.
-    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, array: {$eq: 5}}", 2.236);
-    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, array: {$elemMatch: {$eq: 5}}}", 2.236);
-    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 0.0);
-
-    // Composite predicate on 'mixed' and 'array' fields.
-    ASSERT_MATCH_CE(t, "{mixed: {$eq: 5}, array: {$eq: 5}}", 8.721);
-    ASSERT_MATCH_CE(t, "{mixed: {$eq: 5}, array: {$elemMatch: {$eq: 5}}}", 8.721);
-    ASSERT_MATCH_CE(t, "{mixed: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 7.603);
-
-    // Composite predicate on 'scalar' and 'mixed' fields.
-    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$eq: 5}}", 1.669);
-    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$elemMatch: {$eq: 5}}}", 1.559);
-    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$eq: 5}}", 0.0);
-
-    // Composite predicate on all three fields without '$elemMatch' on 'array'.
-    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$eq: 5}, array: {$eq: 5}}", 1.116);
-    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 1.042);
-    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$eq: 5}, array: {$eq: 5}}", 0.0);
-
-    // Composite predicate on all three fields with '$elemMatch' on 'array' (same expected results
-    // as above).
-    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$eq: 5}, array: {$elemMatch: {$eq: 5}}}", 1.116);
-
-    // Test case where the same path has both $match and $elemMatch (same as $elemMatch case).
-    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, scalar: {$eq: 5}}", 0.0);
-    ASSERT_MATCH_CE(t, "{mixed: {$elemMatch: {$eq: 5}}, mixed: {$eq: 5}}", 17.0);
-    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 35.0);
-
-    // Test case with multiple predicates and ranges.
-    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$lt: 5}}", 70.2156);
-    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$gt: 5}}", 28.4848);
-
-    // Test multiple $elemMatches.
-    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, array: {$elemMatch: {$eq: 5}}}", 0.0);
-    ASSERT_MATCH_CE(t, "{mixed: {$elemMatch: {$eq: 5}}, array: {$elemMatch: {$eq: 5}}}", 7.603);
-    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$elemMatch: {$eq: 5}}}", 0.0);
-    ASSERT_MATCH_CE(
-        t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 0.0);
-    ASSERT_MATCH_CE(
-        t,
-        "{scalar: {$eq: 5}, mixed: {$elemMatch: {$eq: 5}}, array: {$elemMatch: {$eq: 5}}}",
-        1.042);
-    ASSERT_MATCH_CE(
-        t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$eq: 5}, array: {$elemMatch: {$eq: 5}}}", 0.0);
-    ASSERT_MATCH_CE(t,
-                    "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$elemMatch: {$eq: 5}}, array: "
-                    "{$elemMatch: {$eq: 5}}}",
-                    0.0);
-    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$lt: 5}}}", 34.1434);
-    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$gt: 5}}}", 45.5246);
-
-    // Verify that we still return an estimate of 0.0 for any $elemMatch predicate on a scalar
-    // field when we have a non-multikey index.
-    t.setIndexes({{"aScalarIndex",
-                   makeIndexDefinition("scalar", CollationOp::Ascending, /* isMultiKey */ false)}});
-    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}}", 0.0);
-    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$gt: 1, $lt: 10}}}", 0.0);
-
-    // Test how we estimate singular PathArr sargable predicate.
-    ASSERT_MATCH_CE_NODE(t, "{array: {$elemMatch: {}}}", 175.0, isSargable);
-    ASSERT_MATCH_CE_NODE(t, "{mixed: {$elemMatch: {}}}", 88.0, isSargable);
-
-    // Take into account both empty and non-empty arrays.
-    auto makePathArrABT = [&](const FieldNameType& fieldName) {
-        const ProjectionName scanProjection{"scan_0"};
-        auto scanNode = make<ScanNode>(scanProjection, collName);
-        auto filterNode =
-            make<FilterNode>(make<EvalFilter>(make<PathGet>(std::move(fieldName), make<PathArr>()),
-                                              make<Variable>(scanProjection)),
-                             std::move(scanNode));
-        return make<RootNode>(
-            properties::ProjectionRequirement{ProjectionNameVector{scanProjection}},
-            std::move(filterNode));
-    };
-
-    // There are no arrays in the 'scalar' field.
-    ABT scalarABT = makePathArrABT("scalar");
-    ASSERT_CE(t, scalarABT, 0.0);
-
-    // About half the values of this field are arrays.
-    ABT mixedABT = makePathArrABT("mixed");
-    ASSERT_CE(t, mixedABT, 88.0);
-
-    // This field is always an array.
-    ABT arrayABT = makePathArrABT("array");
-    ASSERT_CE(t, arrayABT, kCollCard);
-}
-
-TEST(CEHistogramTest, TestMixedElemMatchAndNonElemMatch) {
-    constexpr auto kCollCard = 1;
-    CEHistogramTester t(collName, kCollCard);
-
-    // A very simple histogram encoding a collection with one document {a: [3, 10]}.
-    t.addHistogram("a",
-                   getArrayHistogramFromData({/* No scalar buckets. */},
-                                             {
-                                                 // Array unique buckets.
-                                                 {Value(3), 1 /* frequency */},
-                                                 {Value(10), 1 /* frequency */},
-                                             },
-                                             {
-                                                 // Array min buckets.
-                                                 {Value(3), 1 /* frequency */},
-                                             },
-                                             {
-                                                 // Array max buckets.
-                                                 {Value(10), 1 /* frequency */},
-                                             },
-                                             {{sbe::value::TypeTags::NumberInt32, 2}},
-                                             // Array type counts.
-                                             1,
-                                             0));
-
-    // Tests without indexes.
-    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$gt: 3, $lt: 10}}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10}}", 1.0);
-    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$eq: 3}, $gt: 3, $lt: 10}}", 1.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10, $elemMatch: {$eq: 3}}}", 1.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10, $elemMatch: {$gt: 3, $lt: 10}}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$gt: 3, $lt: 10}, $gt: 3, $lt: 10}}", 0.0);
-
-    // Tests with multikey index (note that the index on "a" must be multikey due to arrays).
-    t.setIndexes(
-        {{"anIndex", makeIndexDefinition("a", CollationOp::Ascending, /* isMultiKey */ true)}});
-    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$gt: 3, $lt: 10}}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10}}", 1.0);
-    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$eq: 3}, $gt: 3, $lt: 10}}", 1.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10, $elemMatch: {$eq: 3}}}", 1.0);
-    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10, $elemMatch: {$gt: 3, $lt: 10}}}", 0.0);
-    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$gt: 3, $lt: 10}, $gt: 3, $lt: 10}}", 0.0);
-}
-
-TEST(CEHistogramTest, TestTypeCounters) {
-    constexpr double kCollCard = 1000.0;
-    CEHistogramTester t(collName, kCollCard);
-
-    // This test is designed such that for each document, we have the following fields:
-    // 1. scalar: Scalar histogram with no buckets, only type-counted data.
-    // 2. array: Array histogram with no buckets, only type-counted data inside of arrays.
-    // 3. mixed: Mixed histogram with no buckets, only type-counted data, both scalars and arrays.
-    constexpr double kNumObj = 200.0;
-    constexpr double kNumNull = 300.0;
-    t.addHistogram("scalar",
-                   getArrayHistogramFromData({/* No histogram data. */},
-                                             {{sbe::value::TypeTags::Object, kNumObj},
-                                              {sbe::value::TypeTags::Null, kNumNull}}));
-    t.addHistogram("array",
-                   getArrayHistogramFromData({/* No scalar buckets. */},
-                                             {/* No array unique buckets. */},
-                                             {/* No array min buckets. */},
-                                             {/* No array max buckets. */},
-                                             {{sbe::value::TypeTags::Object, kNumObj},
-                                              {sbe::value::TypeTags::Null, kNumNull}},
-                                             kCollCard));
-
-    // Count of each type in array type counters for field "mixed".
-    constexpr double kNumObjMA = 50.0;
-    constexpr double kNumNullMA = 100.0;
-    // For the purposes of this test, we have one array of each value of a non-histogrammable type.
-    constexpr double kNumArr = kNumObjMA + kNumNullMA;
-    const TypeCounts mixedArrayTC{{sbe::value::TypeTags::Object, kNumObjMA},
-                                  {sbe::value::TypeTags::Null, kNumNullMA}};
-
-    // Count of each type in scalar type counters for field "mixed".
-    constexpr double kNumObjMS = 150.0;
-    constexpr double kNumNullMS = 200.0;
-    const TypeCounts mixedScalarTC{{sbe::value::TypeTags::Object, kNumObjMS},
-                                   {sbe::value::TypeTags::Null, kNumNullMS}};
-
-    // Quick sanity check of test setup for the "mixed" histogram. The idea is that we want a
-    // portion of objects inside arrays, and the rest as scalars, but we want the total count of
-    // objects to be
-    ASSERT_EQ(kNumObjMA + kNumObjMS, kNumObj);
-    ASSERT_EQ(kNumNullMA + kNumNullMS, kNumNull);
-
-    t.addHistogram("mixed",
-                   getArrayHistogramFromData({/* No scalar buckets. */},
-                                             {/* No array unique buckets. */},
-                                             {/* No array min buckets. */},
-                                             {/* No array max buckets. */},
-                                             mixedArrayTC,
-                                             kNumArr,
-                                             0 /* Empty array count. */,
-                                             mixedScalarTC));
-
-    // Set up indexes.
-    t.setIndexes({{"scalarIndex",
-                   makeIndexDefinition("scalar", CollationOp::Ascending, /* isMultiKey */ false)}});
-    t.setIndexes({{"arrayIndex",
-                   makeIndexDefinition("array", CollationOp::Ascending, /* isMultiKey */ true)}});
-    t.setIndexes({{"mixedIndex",
-                   makeIndexDefinition("mixed", CollationOp::Ascending, /* isMultiKey */ true)}});
-
-    // Tests for scalar type counts only.
-    // For object-only intervals in a scalar histogram, we always return object count, no matter
-    // what the bounds are. Since we have a scalar histogram for "scalar", we expect all $elemMatch
-    // queries to have a cardinality of 0.
-
-    // Test object equality.
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$eq: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$eq: {a: 1}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$eq: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$gt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$gte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$lte: {b: 2, c: 3}}");
-
-    // Test intervals including the empty object. Note that range queries on objects do not generate
-    // point equalities, so these fall back onto logic in interval estimation that identifies that
-    // the generated intervals are subsets of the object type interval. Note: we don't even generate
-    // a SargableNode for the first case. The generated bounds are:
-    // [{}, {}) because {} is the "minimum" value for the object type.
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "scalar", "{$lt: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$gt: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$gte: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$lte: {}}");
-
-    // Rather than combining the intervals together, in the following cases we generate two
-    // object-only intervals in the requirements map with the following bounds. Each individual
-    // interval is estimated as having a cardinality of 'kNumObj', before we apply conjunctive
-    // exponential backoff to combine them.
-    constexpr double k2ObjCard = 89.4427;  // == 200/1000 * sqrt(200/1000) * 1000
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {}, $lt: {b: 2, c: 3}}");
-
-    // Test intervals including {a: 1}. Similar to the above case, we have two intervals in the
-    // requirements map.
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {a: 1}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {a: 1}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {a: 1}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {a: 1}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {a: 1}, $lte: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {a: 1}, $lte: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {a: 1}, $lt: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {a: 1}, $lt: {a: 3}}");
-
-    // Test that for null, we always return null count.
-    // Note that for ranges including null (e.g. {$lt: null}) we don't generate any SargableNodes.
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumNull, 0.0, "scalar", "{$eq: null}");
-
-    // TODO SERVER-70936: Add tests for booleans.
-    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, 0.0, "scalar", "{$eq: true}");
-    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, 0.0, "scalar", "{$eq: false}");
-
-    // Tests for array type counts only.
-    // For object-only intervals in an array histogram, if we're using $elemMatch on an object-only
-    // interval, we always return object count. While we have no scalar type counts for "array",
-    // non-$elemMatch queries should also match objects embedded in arrays, so we still return
-    // object count in that case.
-
-    // Test object equality.
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$eq: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$eq: {a: 1}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$eq: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$gt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$gte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$lte: {b: 2, c: 3}}");
-
-    // Test intervals including the empty object.
-    // Note: we don't even generate a SargableNode for the first case. The generated bounds are:
-    // [{}, {}) because {} is the "minimum" value for the object type.
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "array", "{$lt: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$gt: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$gte: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$lte: {}}");
-
-    // Similar to above, here we have two object intervals for non-$elemMatch queries. However, for
-    // $elemMatch queries, we have the following intervals in the requirements map:
-    //  1. [[], BinData(0, )) with CE 1000
-    //  2. The actual object interval, e.g. ({}, {b: 2, c: 3}] with CE 200
-    constexpr double kArrEMCard = kNumObj;  // == 200/1000 * sqrt(1000/1000) * 1000
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {}, $lt: {b: 2, c: 3}}");
-
-    // Test intervals including {a: 1}; similar to above, we have two object intervals.
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {a: 1}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {a: 1}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {a: 1}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {a: 1}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {a: 1}, $lte: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {a: 1}, $lte: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {a: 1}, $lt: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {a: 1}, $lt: {a: 3}}");
-
-    // Test that for null, we always return null count.
-    // Note that for ranges including null (e.g. {$lt: null}) we don't generate any SargableNodes.
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumNull, kNumNull, "array", "{$eq: null}");
-
-    // TODO SERVER-70936: Add tests for booleans.
-    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, kNumBool, "array", "{$eq: true}");
-    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, kNumBool, "array", "{$eq: false}");
-
-    // Tests for mixed type counts only. Regular match predicates should be estimated as the sum of
-    // the scalar and array counts (e.g. for objects, 'kNumObj'), while elemMatch predicates
-    // should be estimated without scalars, returning the array type count (for objects this is
-    // 'kNumObjMA').
-
-    // Test object equality.
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$eq: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$eq: {a: 1}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$eq: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$gt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$gte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$lte: {b: 2, c: 3}}");
-
-    // Test intervals including the empty object.
-    // Note: we don't even generate a SargableNode for the first case. The generated bounds are:
-    // [{}, {}) because {} is the "minimum" value for the object type.
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "mixed", "{$lt: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$gt: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$gte: {}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$lte: {}}");
-
-    // Similar to above, here we have two object intervals for non-$elemMatch queries. However, for
-    // $elemMatch queries, we have the following intervals in the requirements map:
-    //  1. [[], BinData(0, )) with CE 1000
-    //  2. The actual object interval, e.g. ({}, {b: 2, c: 3}] with CE 50
-    constexpr double kMixEMCard = kNumObjMA;  // == 50/1000 * sqrt(1000/1000) * 1000
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {}, $lt: {b: 2, c: 3}}");
-
-    // Test intervals including {a: 1}; similar to above, we have two object intervals.
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {a: 1}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {a: 1}, $lte: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {a: 1}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {a: 1}, $lt: {b: 2, c: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {a: 1}, $lte: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {a: 1}, $lte: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {a: 1}, $lt: {a: 3}}");
-    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {a: 1}, $lt: {a: 3}}");
-
-    // Test that for null, we always return null count.
-    // Note that for ranges including null (e.g. {$lt: null}) we don't generate any SargableNodes.
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumNull, kNumNullMA, "mixed", "{$eq: null}");
-
-    // TODO SERVER-70936: Add tests for booleans.
-    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, kNumBoolMA, "mixed", "{$eq: true}");
-    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, kNumBoolMA, "mixed", "{$eq: false}");
-
-    // Test combinations of the three fields/ type counters.
-    constexpr double k3ObjCard =
-        59.814;  // == 200/1000 * sqrt(200/1000) * sqrt(sqrt(200/1000)) * 1000
-    constexpr double k4ObjCard = 48.914;
-    ASSERT_MATCH_CE_NODE(t,
-                         "{scalar: {$eq: {a: 1}}, mixed: {$eq: {b: 1}}, array: {$eq: {c: 1}}}",
-                         k3ObjCard,
-                         isSargable3);
-    ASSERT_MATCH_CE_NODE(
-        t,
-        "{scalar: {$eq: {}}, mixed: {$lt: {b: 1}}, array: {$gt: {a: 1}, $lte: {a: 2, b: 4, c: 3}}}",
-        k4ObjCard,
-        isSargable4);
-
-    // Should always get a 0.0 cardinality for an $elemMatch on a scalar predicate.
-    ASSERT_MATCH_CE(t,
-                    "{scalar: {$elemMatch: {$eq: {a: 1}}}, mixed: {$elemMatch: {$eq: {b: 1}}},"
-                    " array: {$elemMatch: {$eq: {c: 1}}}}",
-                    0.0);
-    ASSERT_MATCH_CE(t,
-                    "{scalar: {$elemMatch: {$eq: {}}}, mixed: {$elemMatch: {$lt: {b: 1}}},"
-                    " array: {$elemMatch: {$gt: {a: 1}, $lte: {a: 2, b: 4, c: 3}}}}",
-                    0.0);
-
-    // The 'array' interval estimate is 50, but the 'mixed' interval estimate is 200.
-    constexpr double kArrMixObjEMCard = 22.3607;  // == 50/1000 * sqrt(200/1000) * 1000
-    ASSERT_MATCH_CE_NODE(t,
-                         "{mixed: {$elemMatch: {$eq: {b: 1}}}, array: {$elemMatch: {$eq: {c: 1}}}}",
-                         kArrMixObjEMCard,
-                         isSargable4);
-    ASSERT_MATCH_CE_NODE(t,
-                         "{mixed: {$elemMatch: {$lt: {b: 1}}},"
-                         " array: {$elemMatch: {$gt: {a: 1}, $lte: {a: 2, b: 4, c: 3}}}}",
-                         kArrMixObjEMCard,
-                         isSargable4);
-}
-
-TEST(CEHistogramTest, TestNestedArrayTypeCounterPredicates) {
-    // This test validates the correct behaviour of both the nested-array type counter as well as
-    // combinations of type counters and histogram estimates.
-    constexpr double kCollCard = 1000.0;
-    constexpr double kNumArr = 600.0;      // Total number of arrays.
-    constexpr double kNumNestArr = 500.0;  // Frequency of nested arrays, e.g. [[1, 2, 3]].
-    constexpr double kNumNonNestArr = 100.0;
-    constexpr double kNum1 = 2.0;      // Frequency of 1.
-    constexpr double kNum2 = 3.0;      // Frequency of 2.
-    constexpr double kNum3 = 5.0;      // Frequency of 3.
-    constexpr double kNumArr1 = 20.0;  // Frequency of [1].
-    constexpr double kNumArr2 = 30.0;  // Frequency of [2].
-    constexpr double kNumArr3 = 50.0;  // Frequency of [3].
-    constexpr double kNumObj = 390.0;  // Total number of scalar objects.
-
-    // Sanity test numbers.
-    ASSERT_EQ(kNumArr1 + kNumArr2, kNumArr3);
-    ASSERT_EQ(kNumNonNestArr + kNumNestArr, kNumArr);
-    ASSERT_EQ(kNumObj + kNumArr + kNum1 + kNum2 + kNum3, kCollCard);
-
-    // Define histogram buckets.
-    TestBuckets scalarBuckets{{Value(1), kNum1}, {Value(2), kNum2}, {Value(3), kNum3}};
-    TestBuckets arrUniqueBuckets{{Value(1), kNumArr1}, {Value(2), kNumArr2}, {Value(3), kNumArr3}};
-    TestBuckets arrMinBuckets{{Value(1), kNumArr1}, {Value(2), kNumArr2}, {Value(3), kNumArr3}};
-    TestBuckets arrMaxBuckets{{Value(1), kNumArr1}, {Value(2), kNumArr2}, {Value(3), kNumArr3}};
-
-    // Define type counts.
-    TypeCounts arrayTypeCounts{{sbe::value::TypeTags::Array, kNumNestArr},
-                               {sbe::value::TypeTags::NumberInt32, kNumNonNestArr}};
-    TypeCounts scalarTypeCounts{{sbe::value::TypeTags::Object, kNumObj}};
-
-    CEHistogramTester t(collName, kCollCard);
-    t.addHistogram("na",
-                   getArrayHistogramFromData(std::move(scalarBuckets),
-                                             std::move(arrUniqueBuckets),
-                                             std::move(arrMinBuckets),
-                                             std::move(arrMaxBuckets),
-                                             std::move(arrayTypeCounts),
-                                             kNumArr,
-                                             0 /* Empty array count. */,
-                                             std::move(scalarTypeCounts)));
-    t.setIndexes(
-        {{"index", makeIndexDefinition("na", CollationOp::Ascending, /* isMultiKey */ true)}});
-
-    // Some equality tests on types that are not present in the type counters should return 0.0.
-    // TODO SERVER-70936: Add tests for booleans.
-    // ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "na", "{$eq: false}");
-    // ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "na", "{$eq: true}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "na", "{$eq: null}");
-    // We don't have any objects in arrays, so don't count them.
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "na", "{$eq: {a: 1}}");
-
-    // Quick equality test to see if regular array histogram estimation still works as expected.
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumArr1 + kNum1, kNumArr1, "na", "{$eq: 1}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumArr2 + kNum2, kNumArr2, "na", "{$eq: 2}");
-    ASSERT_EQ_ELEMMATCH_CE(t, kNumArr3 + kNum3, kNumArr3, "na", "{$eq: 3}");
-
-    // Test a range predicate.
-    // - For simple $lt, we correctly return both scalar and array counts that could match.
-    // - For $elemMatch + $lt, we have two entries in the requirements map.
-    //   - The PathArr interval, estimated correctly as 'kNumArr'.
-    //   - The interval {$lt: 3}, estimated as an array histogram range interval.
-    // We then combine the estimates for the two using conjunctive exponential backoff.
-    constexpr double elemMatchRange = 71.5485;
-    ASSERT_EQ_ELEMMATCH_CE(
-        t, kNumArr1 + kNum1 + kNumArr2 + kNum2, elemMatchRange, "na", "{$lt: 3}");
-    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "na", "{$lt: 1}");
-
-    // Test equality to arrays.
-    // - $elemMatch, estimation, as expected, will return the count of nested arrays.
-    // - For the case where we see equality to the array, we have a disjunction of intervals in the
-    // same entry of the SargableNode requirements map. For the case of {$eq: [1]}, for example, we
-    // have: [[1], [1]] U [1, 1]. As a result, we estimate each point interval separately:
-    //   - [[1], [1]]: We estimate the nested array interval as 'kNumNestArr'.
-    //   - [1, 1]: We estimate the regular point interval as 'kNumArr1' + 'kNum1'.
-    // We then combine the results by exponential backoff. Note that we will NOT match {na: 1};
-    // however, because of the way the interval is defined, our estimate suggests that we would.
-    // TODO: is there a way to know this on the CE side?
-    constexpr double kArr1EqCard = 505.531;  // (1 - (1 - 500.0/1000) * sqrt(1 - 22.0/1000)) * 1000
-    constexpr double kArr2EqCard = 508.319;  // (1 - (1 - 500.0/1000) * sqrt(1 - 33.0/1000)) * 1000
-    constexpr double kArr3EqCard = 513.944;  // (1 - (1 - 500.0/1000) * sqrt(1 - 55.0/1000)) * 1000
-    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kArr1EqCard, kNumNestArr, "na", "{$eq: [1]}", isSargable);
-    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kArr2EqCard, kNumNestArr, "na", "{$eq: [2]}", isSargable);
-    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kArr3EqCard, kNumNestArr, "na", "{$eq: [3]}", isSargable);
-    // For the last case, we have the interval [[1, 2, 3], [1, 2, 3]] U [1, 1].
-    // TODO: is this interval semantically correct?
-    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kArr1EqCard, kNumNestArr, "na", "{$eq: [1, 2, 3]}", isSargable);
-
-    // Now, we test the case of nested arrays.
-    // - $elemMatch, once again, returns the number of nested arrays.
-    // - Simple equality generates two intervals. We estimate both intervals using the nested array
-    // type count. For {$eq: [[1, 2, 3]]}, we get:
-    //   - [[1, 2, 3], [1, 2, 3]] U [[[1, 2, 3]]], [[1, 2, 3]]]
-    constexpr double kNestedEqCard =
-        646.447;  // (1 - (1 - 500.0/1000) * sqrt(1 - 500.0/1000)) * 1000
-    ASSERT_EQ_ELEMMATCH_CE_NODE(
-        t, kNestedEqCard, kNumNestArr, "na", "{$eq: [[1, 2, 3]]}", isSargable);
-    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kNestedEqCard, kNumNestArr, "na", "{$eq: [[1]]}", isSargable);
-    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kNestedEqCard, kNumNestArr, "na", "{$eq: [[2]]}", isSargable);
-    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kNestedEqCard, kNumNestArr, "na", "{$eq: [[3]]}", isSargable);
-
-    // Note: we can't convert range queries on arrays to SargableNodes yet. If we ever can, we
-    // should add some more tests here.
-}
-
-TEST(CEHistogramTest, TestFallbackForNonConstIntervals) {
-    // This is a sanity test to validate fallback for an interval with non-const bounds.
-    IntervalRequirement intervalLowNonConst{
-        BoundRequirement(true /*inclusive*/, make<Variable>("v1")),
-        BoundRequirement::makePlusInf()};
-
-    IntervalRequirement intervalHighNonConst{
-        BoundRequirement::makeMinusInf(),
-        BoundRequirement(true /*inclusive*/, make<Variable>("v2"))};
-
-    IntervalRequirement intervalEqNonConst{
-        BoundRequirement(true /*inclusive*/, make<Variable>("v3")),
-        BoundRequirement(true /*inclusive*/, make<Variable>("v3"))};
-
-    const auto estInterval = [](const auto& interval) {
-        ArrayHistogram ah;
-        return estimateIntervalCardinality(
-            ah, interval, 100 /* inputCardinality */, true /* includeScalar */);
-    };
-
-    ASSERT_EQ(estInterval(intervalLowNonConst), -1.0);
-    ASSERT_EQ(estInterval(intervalHighNonConst), -1.0);
-    ASSERT_EQ(estInterval(intervalEqNonConst), -1.0);
-}
-}  // namespace
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_interpolation_test.cpp b/src/mongo/db/query/ce/ce_interpolation_test.cpp
deleted file mode 100644
index 6d9d52b347d..00000000000
--- a/src/mongo/db/query/ce/ce_interpolation_test.cpp
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/ce_test_utils.h"
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/db/query/sbe_stage_builder_helpers.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo::ce {
-namespace {
-
-using namespace sbe;
-
-TEST(EstimatorTest, ManualHistogram) {
-    std::vector<BucketData> data{{0, 1.0, 1.0, 1.0},
-                                 {10, 1.0, 10.0, 5.0},
-                                 {20, 3.0, 15.0, 3.0},
-                                 {30, 1.0, 10.0, 4.0},
-                                 {40, 2.0, 0.0, 0.0},
-                                 {50, 1.0, 10.0, 5.0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    ASSERT_EQ(55.0, getTotals(hist).card);
-
-    ASSERT_EQ(1.0, estimateIntValCard(hist, 0, EstimationType::kEqual));
-    ASSERT_EQ(2.0, estimateIntValCard(hist, 5, EstimationType::kEqual));
-    ASSERT_EQ(0.0, estimateIntValCard(hist, 35, EstimationType::kEqual));
-
-    ASSERT_EQ(15.5, estimateIntValCard(hist, 15, EstimationType::kLess));
-    ASSERT_EQ(20.5, estimateIntValCard(hist, 15, EstimationType::kLessOrEqual));
-    ASSERT_EQ(28, estimateIntValCard(hist, 20, EstimationType::kLess));
-    ASSERT_EQ(31.0, estimateIntValCard(hist, 20, EstimationType::kLessOrEqual));
-
-    ASSERT_EQ(42, estimateIntValCard(hist, 10, EstimationType::kGreater));
-    ASSERT_EQ(43, estimateIntValCard(hist, 10, EstimationType::kGreaterOrEqual));
-    ASSERT_EQ(19, estimateIntValCard(hist, 25, EstimationType::kGreater));
-    ASSERT_EQ(21.5, estimateIntValCard(hist, 25, EstimationType::kGreaterOrEqual));
-}
-
-TEST(EstimatorTest, UniformIntEstimate) {
-    // This hard-codes a maxdiff histogram with 10 buckets built off a uniform int distribution with
-    // a minimum of 0, a maximum of 1000, and 70 distinct values.
-    std::vector<BucketData> data{{2, 1, 0, 0},
-                                 {57, 3, 2, 1},
-                                 {179, 5, 10, 6},
-                                 {317, 5, 9, 6},
-                                 {344, 3, 0, 0},
-                                 {558, 4, 19, 12},
-                                 {656, 2, 4, 3},
-                                 {798, 3, 7, 4},
-                                 {951, 5, 17, 7},
-                                 {986, 1, 0, 0}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    // Predicates over bucket bound.
-    double expectedCard = estimateIntValCard(hist, 558, EstimationType::kEqual);
-    ASSERT_EQ(4.0, expectedCard);
-    expectedCard = estimateIntValCard(hist, 558, EstimationType::kLess);
-    ASSERT_EQ(57.0, expectedCard);
-    expectedCard = estimateIntValCard(hist, 558, EstimationType::kLessOrEqual);
-    ASSERT_EQ(61.0, expectedCard);
-
-    // Predicates over value inside of a bucket.
-
-    // Query: [{$match: {a: {$eq: 530}}}].
-    expectedCard = estimateIntValCard(hist, 530, EstimationType::kEqual);
-    ASSERT_APPROX_EQUAL(1.6, expectedCard, 0.1);  // Actual: 1.
-
-    // Query: [{$match: {a: {$lt: 530}}}].
-    expectedCard = estimateIntValCard(hist, 530, EstimationType::kLess);
-    ASSERT_APPROX_EQUAL(52.9, expectedCard, 0.1);  // Actual: 50.
-
-    // Query: [{$match: {a: {$lte: 530}}}].
-    expectedCard = estimateIntValCard(hist, 530, EstimationType::kLessOrEqual);
-    ASSERT_APPROX_EQUAL(54.5, expectedCard, 0.1);  // Actual: 51.
-
-    // Query: [{$match: {a: {$eq: 400}}}].
-    expectedCard = estimateIntValCard(hist, 400, EstimationType::kEqual);
-    ASSERT_APPROX_EQUAL(1.6, expectedCard, 0.1);  // Actual: 1.
-
-    // Query: [{$match: {a: {$lt: 400}}}].
-    expectedCard = estimateIntValCard(hist, 400, EstimationType::kLess);
-    ASSERT_APPROX_EQUAL(41.3, expectedCard, 0.1);  // Actual: 39.
-
-    // Query: [{$match: {a: {$lte: 400}}}].
-    expectedCard = estimateIntValCard(hist, 400, EstimationType::kLessOrEqual);
-    ASSERT_APPROX_EQUAL(43.0, expectedCard, 0.1);  // Actual: 40.
-}
-
-TEST(EstimatorTest, NormalIntEstimate) {
-    // This hard-codes a maxdiff histogram with 10 buckets built off a normal int distribution with
-    // a minimum of 0, a maximum of 1000, and 70 distinct values.
-    std::vector<BucketData> data{{2, 1, 0, 0},
-                                 {317, 8, 20, 15},
-                                 {344, 2, 0, 0},
-                                 {388, 3, 0, 0},
-                                 {423, 4, 2, 2},
-                                 {579, 4, 12, 8},
-                                 {632, 3, 2, 1},
-                                 {696, 3, 5, 3},
-                                 {790, 5, 4, 2},
-                                 {993, 1, 21, 9}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    // Predicates over bucket bound.
-    double expectedCard = estimateIntValCard(hist, 696, EstimationType::kEqual);
-    ASSERT_EQ(3.0, expectedCard);
-    expectedCard = estimateIntValCard(hist, 696, EstimationType::kLess);
-    ASSERT_EQ(66.0, expectedCard);
-    expectedCard = estimateIntValCard(hist, 696, EstimationType::kLessOrEqual);
-    ASSERT_EQ(69.0, expectedCard);
-
-    // Predicates over value inside of a bucket.
-
-    // Query: [{$match: {a: {$eq: 150}}}].
-    expectedCard = estimateIntValCard(hist, 150, EstimationType::kEqual);
-    ASSERT_APPROX_EQUAL(1.3, expectedCard, 0.1);  // Actual: 1.
-
-    // Query: [{$match: {a: {$lt: 150}}}].
-    expectedCard = estimateIntValCard(hist, 150, EstimationType::kLess);
-    ASSERT_APPROX_EQUAL(9.1, expectedCard, 0.1);  // Actual: 9.
-
-    // Query: [{$match: {a: {$lte: 150}}}].
-    expectedCard = estimateIntValCard(hist, 150, EstimationType::kLessOrEqual);
-    ASSERT_APPROX_EQUAL(10.4, expectedCard, 0.1);  // Actual: 10.
-}
-
-TEST(EstimatorTest, UniformStrEstimate) {
-    // This hard-codes a maxdiff histogram with 10 buckets built off a uniform string distribution
-    // with a minimum length of 3, a maximum length of 5, and 80 distinct values.
-    std::vector<BucketData> data{{{"0ejz", 2, 0, 0},
-                                  {"8DCaq", 3, 4, 4},
-                                  {"Cy5Kw", 3, 3, 3},
-                                  {"WXX7w", 3, 31, 20},
-                                  {"YtzS", 2, 0, 0},
-                                  {"fuK", 5, 13, 7},
-                                  {"gLkp", 3, 0, 0},
-                                  {"ixmVx", 2, 6, 2},
-                                  {"qou", 1, 9, 6},
-                                  {"z2b", 1, 9, 6}}};
-    const ScalarHistogram hist = createHistogram(data);
-
-    // Predicates over value inside of a bucket.
-    const auto [tag, value] = value::makeNewString("TTV"_sd);
-    value::ValueGuard vg(tag, value);
-
-    // Query: [{$match: {a: {$eq: 'TTV'}}}].
-    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(1.55, expectedCard, 0.1);  // Actual: 2.
-
-    // Query: [{$match: {a: {$lt: 'TTV'}}}].
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(39.8, expectedCard, 0.1);  // Actual: 39.
-
-    // Query: [{$match: {a: {$lte: 'TTV'}}}].
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_APPROX_EQUAL(41.3, expectedCard, 0.1);  // Actual: 41.
-}
-
-TEST(EstimatorTest, NormalStrEstimate) {
-    // This hard-codes a maxdiff histogram with 10 buckets built off a normal string distribution
-    // with a minimum length of 3, a maximum length of 5, and 80 distinct values.
-    std::vector<BucketData> data{{
-        {"0ejz", 1, 0, 0},
-        {"4FGjc", 3, 5, 3},
-        {"9bU3", 2, 3, 2},
-        {"Cy5Kw", 3, 3, 3},
-        {"Lm4U", 2, 11, 5},
-        {"TTV", 5, 14, 8},
-        {"YtzS", 2, 3, 2},
-        {"o9cD4", 6, 26, 16},
-        {"qfmnP", 1, 4, 2},
-        {"xqbi", 2, 4, 4},
-    }};
-    const ScalarHistogram hist = createHistogram(data);
-
-    // Predicates over bucket bound.
-    auto [tag, value] = value::makeNewString("TTV"_sd);
-    value::ValueGuard vg(tag, value);
-
-    // Query: [{$match: {a: {$eq: 'TTV'}}}].
-    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(5.0, expectedCard, 0.1);  // Actual: 5.
-
-    // Query: [{$match: {a: {$lt: 'TTV'}}}].
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(47.0, expectedCard, 0.1);  // Actual: 47.
-
-    // Query: [{$match: {a: {$lte: 'TTV'}}}].
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_APPROX_EQUAL(52.0, expectedCard, 0.1);  // Actual: 52.
-
-    // Predicates over value inside of a bucket.
-    std::tie(tag, value) = value::makeNewString("Pfa"_sd);
-
-    // Query: [{$match: {a: {$eq: 'Pfa'}}}].
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(1.75, expectedCard, 0.1);  // Actual: 2.
-
-    // Query: [{$match: {a: {$lt: 'Pfa'}}}].
-    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
-    ASSERT_APPROX_EQUAL(38.3, expectedCard, 0.1);  // Actual: 35.
-
-    // Query: [{$match: {a: {$lte: 'Pfa'}}}].
-    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
-    ASSERT_APPROX_EQUAL(40.0, expectedCard, 0.1);  // Actual: 37.
-}
-
-TEST(EstimatorTest, UniformIntStrEstimate) {
-    // This hard-codes a maxdiff histogram with 20 buckets built off of a uniform distribution with
-    // two types occurring with equal probability:
-    // - 100 distinct ints between 0 and 1000, and
-    // - 100 distinct strings of length between 2 and 5.
-    std::vector<BucketData> data{{
-        {2, 3, 0, 0},       {19, 4, 1, 1},      {226, 2, 49, 20},  {301, 5, 12, 4},
-        {317, 3, 0, 0},     {344, 2, 3, 1},     {423, 5, 18, 6},   {445, 3, 0, 0},
-        {495, 3, 4, 2},     {542, 5, 9, 3},     {696, 3, 44, 19},  {773, 4, 11, 5},
-        {805, 2, 8, 4},     {931, 5, 21, 8},    {998, 4, 21, 3},   {"8N4", 5, 31, 14},
-        {"MIb", 5, 45, 17}, {"Zgi", 3, 55, 22}, {"pZ", 6, 62, 25}, {"yUwxz", 5, 29, 12},
-    }};
-    const ScalarHistogram hist = createHistogram(data);
-    const ArrayHistogram arrHist(
-        hist, TypeCounts{{value::TypeTags::NumberInt64, 254}, {value::TypeTags::StringSmall, 246}});
-
-    // Predicates over value inside of the last numeric bucket.
-
-    // Query: [{$match: {a: {$eq: 993}}}].
-    double expectedCard = estimateIntValCard(hist, 993, EstimationType::kEqual);
-    ASSERT_APPROX_EQUAL(7.0, expectedCard, 0.1);  // Actual: 9.
-
-    // Query: [{$match: {a: {$lt: 993}}}].
-    expectedCard = estimateIntValCard(hist, 993, EstimationType::kLess);
-    ASSERT_APPROX_EQUAL(241.4, expectedCard, 0.1);  // Actual: 241.
-
-    // Query: [{$match: {a: {$lte: 993}}}].
-    expectedCard = estimateIntValCard(hist, 993, EstimationType::kLessOrEqual);
-    ASSERT_APPROX_EQUAL(248.4, expectedCard, 0.1);  // Actual: 250.
-
-    // Predicates over value inside of the first string bucket.
-    auto [tag, value] = value::makeNewString("04e"_sd);
-    value::ValueGuard vg(tag, value);
-
-    // Query: [{$match: {a: {$eq: '04e'}}}].
-    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
-    ASSERT_APPROX_EQUAL(2.2, expectedCard, 0.1);  // Actual: 3.
-
-    value::TypeTags lowTag = value::TypeTags::NumberInt64;
-    value::Value lowVal = 100000000;
-
-    // Type bracketing: low value of different type than the bucket bound.
-    // Query: [{$match: {a: {$eq: 100000000}}}].
-    expectedCard = estimateCardEq(arrHist, lowTag, lowVal, true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(0.0, expectedCard, 0.1);  // Actual: 0.
-
-    // No interpolation for inequality to values inside the first string bucket, fallback to half of
-    // the bucket frequency.
-
-    // Query: [{$match: {a: {$lt: '04e'}}}].
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     lowTag,
-                                     lowVal,
-                                     false /* highInclusive */,
-                                     tag,
-                                     value,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(13.3, expectedCard, 0.1);  // Actual: 0.
-
-    // Query: [{$match: {a: {$lte: '04e'}}}].
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     lowTag,
-                                     lowVal,
-                                     true /* highInclusive */,
-                                     tag,
-                                     value,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(15.5, expectedCard, 0.1);  // Actual: 3.
-
-    // Value towards the end of the bucket gets the same half bucket estimate.
-    std::tie(tag, value) = value::makeNewString("8B5"_sd);
-
-    // Query: [{$match: {a: {$lt: '8B5'}}}].
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     lowTag,
-                                     lowVal,
-                                     false /* highInclusive */,
-                                     tag,
-                                     value,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(13.3, expectedCard, 0.1);  // Actual: 24.
-
-    // Query: [{$match: {a: {$lte: '8B5'}}}].
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     lowTag,
-                                     lowVal,
-                                     true /* highInclusive */,
-                                     tag,
-                                     value,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(15.5, expectedCard, 0.1);  // Actual: 29.
-}
-
-TEST(EstimatorTest, UniformIntArrayOnlyEstimate) {
-    // This hard-codes a maxdiff histogram with 10 buckets built off of an array distribution with
-    // arrays between 3 and 5 elements long, each containing 100 distinct ints uniformly distributed
-    // between 0 and 1000. There are no scalar elements.
-    std::vector<BucketData> scalarData{{}};
-    const ScalarHistogram scalarHist = createHistogram(scalarData);
-
-    std::vector<BucketData> minData{{
-        {5, 3, 0, 0},   {19, 5, 2, 1},  {57, 4, 4, 3},  {116, 7, 13, 7}, {198, 3, 15, 6},
-        {228, 2, 3, 2}, {254, 4, 0, 0}, {280, 2, 2, 1}, {335, 3, 5, 3},  {344, 2, 0, 0},
-        {388, 3, 0, 0}, {420, 2, 0, 0}, {454, 1, 6, 3}, {488, 2, 1, 1},  {530, 1, 0, 0},
-        {561, 1, 0, 0}, {609, 1, 0, 0}, {685, 1, 0, 0}, {713, 1, 0, 0},  {758, 1, 0, 0},
-    }};
-    const ScalarHistogram minHist = createHistogram(minData);
-
-    std::vector<BucketData> maxData{{
-        {301, 1, 0, 0},  {408, 2, 0, 0}, {445, 1, 0, 0}, {605, 2, 0, 0},  {620, 1, 0, 0},
-        {665, 1, 1, 1},  {687, 3, 0, 0}, {704, 2, 6, 2}, {718, 2, 2, 1},  {741, 2, 1, 1},
-        {752, 2, 0, 0},  {823, 7, 3, 3}, {827, 1, 0, 0}, {852, 3, 0, 0},  {864, 5, 0, 0},
-        {909, 7, 12, 5}, {931, 2, 3, 1}, {939, 3, 0, 0}, {970, 2, 12, 4}, {998, 1, 10, 4},
-    }};
-    const ScalarHistogram maxHist = createHistogram(maxData);
-
-    std::vector<BucketData> uniqueData{{
-        {5, 3, 0, 0},    {19, 6, 2, 1},    {57, 4, 4, 3},    {116, 7, 15, 8},  {228, 2, 38, 13},
-        {254, 7, 0, 0},  {269, 10, 0, 0},  {280, 7, 3, 1},   {306, 4, 1, 1},   {317, 4, 0, 0},
-        {344, 2, 19, 5}, {423, 2, 27, 8},  {507, 2, 22, 13}, {704, 8, 72, 34}, {718, 6, 3, 1},
-        {758, 3, 13, 4}, {864, 7, 35, 14}, {883, 4, 0, 0},   {939, 5, 32, 10}, {998, 1, 24, 9},
-    }};
-    const ScalarHistogram uniqueHist = createHistogram(uniqueData);
-
-    const ArrayHistogram arrHist(scalarHist,
-                                 TypeCounts{{value::TypeTags::Array, 100}},
-                                 uniqueHist,
-                                 minHist,
-                                 maxHist,
-                                 TypeCounts{},
-                                 0);
-
-    // Query in the middle of the domain: estimate from ArrayUnique histogram.
-    value::TypeTags lowTag = value::TypeTags::NumberInt64;
-    value::Value lowVal = 500;
-    value::TypeTags highTag = value::TypeTags::NumberInt64;
-    value::Value highVal = 600;
-
-    // Test interpolation for query: [{$match: {a: {$elemMatch: {$gt: 500, $lt: 600}}}}].
-    double expectedCard = estimateCardRange(arrHist,
-                                            false /* lowInclusive */,
-                                            lowTag,
-                                            lowVal,
-                                            false /* highInclusive */,
-                                            highTag,
-                                            highVal,
-                                            false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(27.0, expectedCard, 0.1);  // actual 21.
-
-    // Test interpolation for query: [{$match: {a: {$gt: 500, $lt: 600}}}].
-    // Note: although there are no scalars, the estimate is different than the
-    // above since we use different formulas.
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     lowTag,
-                                     lowVal,
-                                     false /* highInclusive */,
-                                     highTag,
-                                     highVal,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(92.0, expectedCard, 0.1);  // actual 92.
-
-    // Query at the end of the domain: more precise estimates from ArrayMin, ArrayMax histograms.
-    lowVal = 10;
-    highVal = 110;
-
-    // Test interpolation for query: [{$match: {a: {$elemMatch: {$gt: 10, $lt: 110}}}}].
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     lowTag,
-                                     lowVal,
-                                     false /* highInclusive */,
-                                     highTag,
-                                     highVal,
-                                     false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(24.1, expectedCard, 0.1);  // actual 29.
-
-    // Test interpolation for query: [{$match: {a: {$gt: 10, $lt: 110}}}].
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     lowTag,
-                                     lowVal,
-                                     false /* highInclusive */,
-                                     highTag,
-                                     highVal,
-                                     true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(27.8, expectedCard, 0.1);  // actual 31.
-}
-
-TEST(EstimatorTest, UniformIntMixedArrayEstimate) {
-    // This hard-codes a maxdiff histogram with 20 buckets built off of a mixed distribution split
-    // with equal probability between:
-    // - an array distribution between 3 and 5 elements long, each containing 80 distinct ints
-    // uniformly distributed between 0 and 1000, and
-    // - a uniform int distribution with 80 distinct ints between 0 and 1000.
-    std::vector<BucketData> scalarData{{
-        {25, 1, 0, 0},  {41, 2, 0, 0},  {142, 2, 3, 3},  {209, 3, 3, 1}, {243, 1, 2, 1},
-        {296, 3, 4, 3}, {321, 5, 4, 2}, {480, 3, 9, 8},  {513, 3, 3, 2}, {554, 1, 0, 0},
-        {637, 3, 3, 2}, {666, 2, 1, 1}, {697, 2, 2, 1},  {750, 3, 3, 2}, {768, 4, 0, 0},
-        {791, 4, 3, 3}, {851, 2, 2, 2}, {927, 2, 10, 6}, {958, 3, 2, 1}, {980, 3, 0, 0},
-    }};
-    const ScalarHistogram scalarHist = createHistogram(scalarData);
-
-    std::vector<BucketData> minData{{
-        {3, 3, 0, 0},   {5, 8, 0, 0},   {9, 3, 0, 0},   {19, 2, 0, 0},  {49, 7, 4, 2},
-        {69, 6, 0, 0},  {115, 3, 5, 3}, {125, 2, 0, 0}, {146, 1, 2, 1}, {198, 2, 4, 3},
-        {214, 2, 0, 0}, {228, 3, 0, 0}, {260, 3, 4, 1}, {280, 1, 2, 2}, {330, 2, 2, 1},
-        {344, 6, 0, 0}, {388, 2, 0, 0}, {420, 2, 0, 0}, {461, 2, 8, 4}, {696, 1, 2, 1},
-    }};
-    const ScalarHistogram minHist = createHistogram(minData);
-
-    std::vector<BucketData> maxData{{
-        {301, 1, 0, 0}, {445, 1, 0, 0},  {491, 1, 0, 0}, {533, 3, 0, 0},  {605, 3, 0, 0},
-        {620, 2, 0, 0}, {647, 3, 0, 0},  {665, 4, 0, 0}, {713, 3, 10, 4}, {741, 3, 0, 0},
-        {814, 3, 2, 2}, {839, 2, 1, 1},  {864, 1, 2, 2}, {883, 3, 0, 0},  {893, 7, 0, 0},
-        {898, 5, 0, 0}, {909, 1, 12, 3}, {931, 2, 2, 1}, {953, 6, 3, 2},  {993, 1, 7, 5},
-    }};
-    const ScalarHistogram maxHist = createHistogram(maxData);
-
-    std::vector<BucketData> uniqueData{{
-        {3, 3, 0, 0},     {19, 5, 11, 2},   {49, 7, 5, 3},    {69, 8, 0, 0},    {75, 3, 0, 0},
-        {125, 2, 10, 5},  {228, 3, 27, 14}, {260, 4, 5, 1},   {344, 6, 36, 13}, {423, 4, 20, 8},
-        {605, 4, 61, 28}, {665, 8, 12, 6},  {758, 4, 41, 16}, {768, 5, 0, 0},   {776, 3, 0, 0},
-        {864, 3, 15, 10}, {883, 8, 0, 0},   {911, 2, 28, 6},  {953, 6, 8, 4},   {993, 1, 7, 5},
-    }};
-    const ScalarHistogram uniqueHist = createHistogram(uniqueData);
-
-    TypeCounts typeCounts{{value::TypeTags::NumberInt64, 106}, {value::TypeTags::Array, 94}};
-    const ArrayHistogram arrHist(scalarHist,
-                                 typeCounts,
-                                 uniqueHist,
-                                 minHist,
-                                 maxHist,
-                                 TypeCounts{{value::TypeTags::NumberInt64, 375}},
-                                 0);
-
-    value::TypeTags lowTag = value::TypeTags::NumberInt64;
-    value::Value lowVal = 500;
-    value::TypeTags highTag = value::TypeTags::NumberInt64;
-    value::Value highVal = 550;
-
-    // Test interpolation for query: [{$match: {a: {$gt: 500, $lt: 550}}}].
-    double expectedCard = estimateCardRange(arrHist,
-                                            false /* lowInclusive */,
-                                            lowTag,
-                                            lowVal,
-                                            false /* highInclusive */,
-                                            highTag,
-                                            highVal,
-                                            true /* includeScalar */);
-    ASSERT_APPROX_EQUAL(92.9, expectedCard, 0.1);  // Actual: 94.
-
-    // Test interpolation for query: [{$match: {a: {$elemMatch: {$gt: 500, $lt: 550}}}}].
-    expectedCard = estimateCardRange(arrHist,
-                                     false /* lowInclusive */,
-                                     lowTag,
-                                     lowVal,
-                                     false /* highInclusive */,
-                                     highTag,
-                                     highVal,
-                                     false /* includeScalar */);
-    ASSERT_APPROX_EQUAL(11.0, expectedCard, 0.1);  // Actual: 8.
-}
-
-}  // namespace
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_sampling.cpp b/src/mongo/db/query/ce/ce_sampling.cpp
deleted file mode 100644
index ce31ae842e2..00000000000
--- a/src/mongo/db/query/ce/ce_sampling.cpp
+++ /dev/null
@@ -1,362 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/ce_sampling.h"
-
-#include "mongo/db/exec/sbe/abt/abt_lower.h"
-#include "mongo/db/query/cqf_command_utils.h"
-#include "mongo/db/query/optimizer/explain.h"
-#include "mongo/db/query/optimizer/index_bounds.h"
-#include "mongo/db/query/optimizer/props.h"
-#include "mongo/db/query/optimizer/utils/abt_hash.h"
-#include "mongo/db/query/optimizer/utils/memo_utils.h"
-#include "mongo/logv2/log.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
-
-namespace mongo::ce {
-namespace cascades = optimizer::cascades;
-namespace properties = optimizer::properties;
-
-using ABT = optimizer::ABT;
-using CEType = optimizer::CEType;
-using LogicalProps = properties::LogicalProps;
-using OptPhaseManager = optimizer::OptPhaseManager;
-using Memo = cascades::Memo;
-using Metadata = optimizer::Metadata;
-
-class SamplingPlanExtractor {
-public:
-    SamplingPlanExtractor(const Memo& memo,
-                          const OptPhaseManager& phaseManager,
-                          const size_t sampleSize)
-        : _memo(memo), _sampleSize(sampleSize), _phaseManager(phaseManager) {}
-
-    void transport(ABT& n, const optimizer::MemoLogicalDelegatorNode& node) {
-        n = extract(_memo.getLogicalNodes(node.getGroupId()).front());
-    }
-
-    void transport(ABT& n, const optimizer::ScanNode& /*node*/, ABT& /*binder*/) {
-        // We will lower the scan node in a sampling context here.
-        // TODO: for now just return the documents in random order.
-        n = optimizer::make<optimizer::LimitSkipNode>(
-            properties::LimitSkipRequirement(_sampleSize, 0), std::move(n));
-    }
-
-    void transport(ABT& n,
-                   const optimizer::FilterNode& /*node*/,
-                   ABT& childResult,
-                   ABT& /*exprResult*/) {
-        // Skip over filters.
-        n = childResult;
-    }
-
-    void transport(ABT& /*n*/,
-                   const optimizer::EvaluationNode& /*node*/,
-                   ABT& /*childResult*/,
-                   ABT& /*exprResult*/) {
-        // Keep Eval nodes.
-    }
-
-    void transport(
-        ABT& n, const optimizer::SargableNode& node, ABT& childResult, ABT& refs, ABT& binds) {
-        ABT result = childResult;
-        // Retain only output bindings without applying filters.
-        for (const auto& [key, req] : node.getReqMap()) {
-            if (const auto& boundProjName = req.getBoundProjectionName()) {
-                optimizer::lowerPartialSchemaRequirement(
-                    key,
-                    optimizer::PartialSchemaRequirement{
-                        boundProjName,
-                        optimizer::IntervalReqExpr::makeSingularDNF(),
-                        req.getIsPerfOnly()},
-                    result,
-                    _phaseManager.getPathToInterval());
-            }
-        }
-        std::swap(n, result);
-    }
-
-    void transport(ABT& n, const optimizer::CollationNode& /*node*/, ABT& childResult, ABT& refs) {
-        // Skip over collation nodes.
-        n = childResult;
-    }
-
-    template <typename T, typename... Ts>
-    void transport(ABT& /*n*/, const T& /*node*/, Ts&&...) {
-        if constexpr (std::is_base_of_v<optimizer::Node, T>) {
-            uasserted(6624242, "Should not be seeing other types of nodes here.");
-        }
-    }
-
-    ABT extract(ABT node) {
-        optimizer::algebra::transport<true>(node, *this);
-        return node;
-    }
-
-private:
-    const Memo& _memo;
-    const size_t _sampleSize;
-    const OptPhaseManager& _phaseManager;
-};
-
-class CESamplingTransportImpl {
-    static constexpr size_t kMaxSampleSize = 1000;
-
-public:
-    CESamplingTransportImpl(OperationContext* opCtx,
-                            OptPhaseManager phaseManager,
-                            const int64_t numRecords,
-                            std::unique_ptr<cascades::CEInterface> fallbackCE)
-        : _phaseManager(std::move(phaseManager)),
-          _opCtx(opCtx),
-          _sampleSize(std::min<int64_t>(numRecords, kMaxSampleSize)),
-          _fallbackCE(std::move(fallbackCE)) {}
-
-    CEType transport(const ABT& n,
-                     const optimizer::FilterNode& node,
-                     const Metadata& metadata,
-                     const Memo& memo,
-                     const LogicalProps& logicalProps,
-                     CEType childResult,
-                     CEType /*exprResult*/) {
-        if (!properties::hasProperty<properties::IndexingAvailability>(logicalProps)) {
-            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
-        }
-
-        SamplingPlanExtractor planExtractor(memo, _phaseManager, _sampleSize);
-        // Create a plan with all eval nodes so far and the filter last.
-        ABT abtTree =
-            optimizer::make<optimizer::FilterNode>(node.getFilter(), planExtractor.extract(n));
-
-        return estimateFilterCE(metadata, memo, logicalProps, n, std::move(abtTree), childResult);
-    }
-
-    CEType transport(const ABT& n,
-                     const optimizer::SargableNode& node,
-                     const Metadata& metadata,
-                     const Memo& memo,
-                     const LogicalProps& logicalProps,
-                     CEType childResult,
-                     CEType /*bindResult*/,
-                     CEType /*refsResult*/) {
-        if (!properties::hasProperty<properties::IndexingAvailability>(logicalProps)) {
-            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
-        }
-
-        SamplingPlanExtractor planExtractor(memo, _phaseManager, _sampleSize);
-        ABT extracted = planExtractor.extract(n);
-
-        // Estimate individual requirements separately by potentially re-using cached results.
-        // Here we assume that each requirement is independent.
-        // TODO: consider estimating together the entire set of requirements (but caching!)
-        CEType result = childResult;
-        for (const auto& [key, req] : node.getReqMap()) {
-            if (req.getIsPerfOnly()) {
-                // Ignore perf-only requirements.
-                continue;
-            }
-
-            if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
-                ABT lowered = extracted;
-                // Lower requirement without an output binding.
-                lowerPartialSchemaRequirement(
-                    key,
-                    optimizer::PartialSchemaRequirement{boost::none /*boundProjectionName*/,
-                                                        req.getIntervals(),
-                                                        req.getIsPerfOnly()},
-                    lowered,
-                    _phaseManager.getPathToInterval());
-                uassert(6624243, "Expected a filter node", lowered.is<optimizer::FilterNode>());
-                result =
-                    estimateFilterCE(metadata, memo, logicalProps, n, std::move(lowered), result);
-            }
-        }
-
-        return result;
-    }
-
-    /**
-     * Other ABT types.
-     */
-    template <typename T, typename... Ts>
-    CEType transport(const ABT& n,
-                     const T& /*node*/,
-                     const Metadata& metadata,
-                     const Memo& memo,
-                     const LogicalProps& logicalProps,
-                     Ts&&...) {
-        if (optimizer::canBeLogicalNode<T>()) {
-            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
-        }
-        return 0.0;
-    }
-
-    CEType derive(const Metadata& metadata,
-                  const Memo& memo,
-                  const LogicalProps& logicalProps,
-                  const ABT::reference_type logicalNodeRef) {
-        return optimizer::algebra::transport<true>(
-            logicalNodeRef, *this, metadata, memo, logicalProps);
-    }
-
-private:
-    CEType estimateFilterCE(const Metadata& metadata,
-                            const Memo& memo,
-                            const LogicalProps& logicalProps,
-                            const ABT& n,
-                            ABT abtTree,
-                            CEType childResult) {
-        auto it = _selectivityCacheMap.find(abtTree);
-        if (it != _selectivityCacheMap.cend()) {
-            // Cache hit.
-            return it->second * childResult;
-        }
-
-        const auto [success, selectivity] = estimateSelectivity(abtTree);
-        if (!success) {
-            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
-        }
-
-        _selectivityCacheMap.emplace(std::move(abtTree), selectivity);
-
-        OPTIMIZER_DEBUG_LOG(6264805,
-                            5,
-                            "CE sampling estimated filter selectivity",
-                            "selectivity"_attr = selectivity);
-        return selectivity * childResult;
-    }
-
-    std::pair<bool, optimizer::SelectivityType> estimateSelectivity(ABT abtTree) {
-        // Add a group by to count number of documents.
-        const optimizer::ProjectionName sampleSumProjection = "sum";
-        abtTree = optimizer::make<optimizer::GroupByNode>(
-            optimizer::ProjectionNameVector{},
-            optimizer::ProjectionNameVector{sampleSumProjection},
-            optimizer::makeSeq(optimizer::make<optimizer::FunctionCall>(
-                "$sum", makeSeq(optimizer::Constant::int64(1)))),
-            std::move(abtTree));
-        abtTree = optimizer::make<optimizer::RootNode>(
-            properties::ProjectionRequirement{optimizer::ProjectionNameVector{sampleSumProjection}},
-            std::move(abtTree));
-
-
-        OPTIMIZER_DEBUG_LOG(6264806,
-                            5,
-                            "Estimate selectivity ABT",
-                            "explain"_attr = optimizer::ExplainGenerator::explainV2(abtTree));
-
-        _phaseManager.optimize(abtTree);
-
-        auto env = optimizer::VariableEnvironment::build(abtTree);
-        optimizer::SlotVarMap slotMap;
-        boost::optional<sbe::value::SlotId> ridSlot;
-        sbe::value::SlotIdGenerator ids;
-        optimizer::SBENodeLowering g{env,
-                                     slotMap,
-                                     ridSlot,
-                                     ids,
-                                     _phaseManager.getMetadata(),
-                                     _phaseManager.getNodeToGroupPropsMap(),
-                                     _phaseManager.getRIDProjections(),
-                                     true /*randomScan*/};
-        auto sbePlan = g.optimize(abtTree);
-        tassert(6624261, "Unexpected rid slot", !ridSlot);
-
-        // TODO: return errors instead of exceptions?
-        uassert(6624244, "Lowering failed", sbePlan != nullptr);
-        uassert(6624245, "Invalid slot map size", slotMap.size() == 1);
-
-        sbePlan->attachToOperationContext(_opCtx);
-        sbe::CompileCtx ctx(std::make_unique<sbe::RuntimeEnvironment>());
-        sbePlan->prepare(ctx);
-
-        std::vector<sbe::value::SlotAccessor*> accessors;
-        for (auto& [name, slot] : slotMap) {
-            accessors.emplace_back(sbePlan->getAccessor(ctx, slot));
-        }
-
-        sbePlan->open(false);
-        ON_BLOCK_EXIT([&] { sbePlan->close(); });
-
-        while (sbePlan->getNext() != sbe::PlanState::IS_EOF) {
-            const auto [tag, value] = accessors.at(0)->getViewOfValue();
-            if (tag == sbe::value::TypeTags::NumberInt64) {
-                // TODO: check if we get exactly one result from the groupby?
-                return {true, static_cast<double>(value) / _sampleSize};
-            }
-            return {false, {}};
-        };
-
-        // If nothing passes the filter, estimate 0.0 selectivity. HashGroup will return 0 results.
-        return {true, 0.0};
-    }
-
-    struct NodeRefHash {
-        size_t operator()(const ABT& node) const {
-            return optimizer::ABTHashGenerator::generate(node);
-        }
-    };
-
-    struct NodeRefCompare {
-        bool operator()(const ABT& left, const ABT& right) const {
-            return left == right;
-        }
-    };
-
-    // Cache a logical node reference to computed selectivity. Used for Filter and Sargable nodes.
-    optimizer::opt::unordered_map<ABT, optimizer::SelectivityType, NodeRefHash, NodeRefCompare>
-        _selectivityCacheMap;
-
-    OptPhaseManager _phaseManager;
-
-    // We don't own this.
-    OperationContext* _opCtx;
-
-    const int64_t _sampleSize;
-    std::unique_ptr<cascades::CEInterface> _fallbackCE;
-};
-
-CESamplingTransport::CESamplingTransport(OperationContext* opCtx,
-                                         OptPhaseManager phaseManager,
-                                         const int64_t numRecords,
-                                         std::unique_ptr<cascades::CEInterface> fallbackCE)
-    : _impl(std::make_unique<CESamplingTransportImpl>(
-          opCtx, std::move(phaseManager), numRecords, std::move(fallbackCE))) {}
-
-CESamplingTransport::~CESamplingTransport() {}
-
-CEType CESamplingTransport::deriveCE(const Metadata& metadata,
-                                     const Memo& memo,
-                                     const LogicalProps& logicalProps,
-                                     const ABT::reference_type logicalNodeRef) const {
-    return _impl->derive(metadata, memo, logicalProps, logicalNodeRef);
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_sampling.h b/src/mongo/db/query/ce/ce_sampling.h
deleted file mode 100644
index 9e13abb5d13..00000000000
--- a/src/mongo/db/query/ce/ce_sampling.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/query/optimizer/cascades/interfaces.h"
-#include "mongo/db/query/optimizer/opt_phase_manager.h"
-
-namespace mongo::ce {
-
-class CESamplingTransportImpl;
-
-class CESamplingTransport : public optimizer::cascades::CEInterface {
-public:
-    CESamplingTransport(OperationContext* opCtx,
-                        optimizer::OptPhaseManager phaseManager,
-                        int64_t numRecords,
-                        std::unique_ptr<optimizer::cascades::CEInterface> fallbackCE);
-    ~CESamplingTransport();
-
-    optimizer::CEType deriveCE(const optimizer::Metadata& metadata,
-                               const optimizer::cascades::Memo& memo,
-                               const optimizer::properties::LogicalProps& logicalProps,
-                               optimizer::ABT::reference_type logicalNodeRef) const final;
-
-private:
-    std::unique_ptr<CESamplingTransportImpl> _impl;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_test_utils.cpp b/src/mongo/db/query/ce/ce_test_utils.cpp
deleted file mode 100644
index 5212c48ab00..00000000000
--- a/src/mongo/db/query/ce/ce_test_utils.cpp
+++ /dev/null
@@ -1,216 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include <cstddef>
-
-#include "mongo/db/query/ce/ce_test_utils.h"
-
-#include "mongo/db/pipeline/abt/utils.h"
-#include "mongo/db/query/optimizer/explain.h"
-#include "mongo/db/query/optimizer/metadata_factory.h"
-#include "mongo/db/query/optimizer/opt_phase_manager.h"
-#include "mongo/db/query/optimizer/rewrites/const_eval.h"
-#include "mongo/db/query/optimizer/utils/unit_test_pipeline_utils.h"
-#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
-#include "mongo/db/query/sbe_stage_builder_helpers.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo::ce {
-
-using namespace optimizer;
-using namespace cascades;
-
-CETester::CETester(std::string collName,
-                   double collCard,
-                   const optimizer::OptPhaseManager::PhaseSet& optPhases)
-    : _optPhases(optPhases), _hints(), _metadata({}), _collName(collName) {
-    addCollection(collName, collCard);
-}
-
-optimizer::CEType CETester::getMatchCE(const std::string& queryPredicate,
-                                       std::function<bool(const ABT&)> nodePredicate) const {
-    return getCE("[{$match: " + queryPredicate + "}]", nodePredicate);
-}
-
-optimizer::CEType CETester::getCE(const std::string& pipeline,
-                                  std::function<bool(const ABT&)> nodePredicate) const {
-    if constexpr (kCETestLogOnly) {
-        std::cout << "\n\nQuery: " << pipeline << "\n";
-    }
-
-    // Construct ABT from pipeline and optimize.
-    ABT abt = translatePipeline(pipeline, _collName);
-
-    // Get cardinality estimate.
-    return getCE(abt, nodePredicate);
-}
-
-optimizer::CEType CETester::getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate) const {
-    if constexpr (kCETestLogOnly) {
-        std::cout << ExplainGenerator::explainV2(abt) << std::endl;
-    }
-
-    OptPhaseManager phaseManager{_optPhases,
-                                 _prefixId,
-                                 false /*requireRID*/,
-                                 _metadata,
-                                 getCETransport(),
-                                 makeHeuristicCE(),
-                                 makeCosting(),
-                                 defaultConvertPathToInterval,
-                                 ConstEval::constFold,
-                                 DebugInfo::kDefaultForTests,
-                                 _hints};
-    phaseManager.optimize(abt);
-
-    const auto& memo = phaseManager.getMemo();
-    if constexpr (kCETestLogOnly) {
-        std::cout << ExplainGenerator::explainMemo(memo) << std::endl;
-    }
-
-    auto cht = getCETransport();
-
-    // If we are running no optimization phases, we are ensuring that we get the correct estimate on
-    // the original ABT (usually testing the CE for FilterNodes). The memo won't have any groups for
-    // us to estimate directly yet.
-    if (_optPhases.empty()) {
-        auto card = cht->deriveCE(_metadata, memo, {}, abt.ref());
-
-        if constexpr (kCETestLogOnly) {
-            std::cout << "CE: " << card << std::endl;
-        }
-
-        return card;
-    }
-
-    CEType outCard = kInvalidCardinality;
-    for (size_t groupId = 0; groupId < memo.getGroupCount(); groupId++) {
-        // Note that we always verify CE for MemoLogicalDelegatorNodes when calling getCE().
-
-        // If the 'optPhases' either ends with the MemoSubstitutionPhase or the
-        // MemoImplementationPhase, we should have exactly one logical node per group. However, if
-        // we have indexes, or a $group, we may have multiple logical nodes. In this case, we still
-        // want to pick the first node.
-        const auto& node = memo.getLogicalNodes(groupId).front();
-
-        // This gets the cardinality estimate actually produced during optimization.
-        const auto& logicalProps = memo.getLogicalProps(groupId);
-        auto memoCE = properties::getPropertyConst<properties::CardinalityEstimate>(logicalProps)
-                          .getEstimate();
-
-        // Conversely, here we call deriveCE() on the ABT produced by the optimization phases, which
-        // has all its delegators dereferenced.
-        auto card = cht->deriveCE(_metadata, memo, logicalProps, node.ref());
-
-        if constexpr (!kCETestLogOnly) {
-            // Ensure that the CE stored for the logical nodes of each group is what we would expect
-            // when estimating that node directly. Note that this check will fail if we are testing
-            // histogram estimation and only using the MemoSubstitutionPhase because the memo always
-            // uses heuristic estimation in this case.
-            ASSERT_APPROX_EQUAL(card, memoCE, kMaxCEError);
-        } else {
-            if (std::abs(memoCE - card) > kMaxCEError) {
-                std::cout << "ERROR: CE Group(" << groupId << ") " << card << " vs. " << memoCE
-                          << std::endl;
-                std::cout << ExplainGenerator::explainV2(node) << std::endl;
-            }
-        }
-
-        if (nodePredicate(node)) {
-            // We want to return the cardinality for the memo group matching the 'nodePredicate'.
-            outCard = memoCE;
-        }
-    }
-
-    ASSERT_NOT_EQUALS(outCard, kInvalidCardinality);
-
-    if constexpr (kCETestLogOnly) {
-        std::cout << "CE: " << outCard << std::endl;
-    }
-
-    return outCard;
-}
-
-ScanDefinition& CETester::getCollScanDefinition() {
-    auto it = _metadata._scanDefs.find(_collName);
-    invariant(it != _metadata._scanDefs.end());
-    return it->second;
-}
-
-
-void CETester::setCollCard(double card) {
-    auto& scanDef = getCollScanDefinition();
-    addCollection(_collName, card, scanDef.getIndexDefs());
-}
-
-void CETester::setIndexes(opt::unordered_map<std::string, IndexDefinition> indexes) {
-    auto& scanDef = getCollScanDefinition();
-    addCollection(_collName, scanDef.getCE(), indexes);
-}
-
-void CETester::addCollection(std::string collName,
-                             double numRecords,
-                             opt::unordered_map<std::string, IndexDefinition> indexes) {
-    _metadata._scanDefs.insert_or_assign(collName,
-                                         createScanDef({},
-                                                       indexes,
-                                                       ConstEval::constFold,
-                                                       {DistributionType::Centralized},
-                                                       true /*exists*/,
-                                                       numRecords));
-}
-
-ScalarHistogram createHistogram(const std::vector<BucketData>& data) {
-    value::Array bounds;
-    std::vector<Bucket> buckets;
-
-    double cumulativeFreq = 0.0;
-    double cumulativeNDV = 0.0;
-
-    for (size_t i = 0; i < data.size(); i++) {
-        const auto& item = data.at(i);
-        const auto [tag, val] = stage_builder::makeValue(item._v);
-        bounds.push_back(tag, val);
-
-        cumulativeFreq += item._equalFreq + item._rangeFreq;
-        cumulativeNDV += item._ndv + 1.0;
-        buckets.emplace_back(
-            item._equalFreq, item._rangeFreq, cumulativeFreq, item._ndv, cumulativeNDV);
-    }
-
-    return {std::move(bounds), std::move(buckets)};
-}
-
-double estimateIntValCard(const ScalarHistogram& hist, const int v, const EstimationType type) {
-    const auto [tag, val] =
-        std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(v));
-    return estimate(hist, tag, val, type).card;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_test_utils.h b/src/mongo/db/query/ce/ce_test_utils.h
deleted file mode 100644
index 0894501bc2a..00000000000
--- a/src/mongo/db/query/ce/ce_test_utils.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <cstddef>
-#include <sys/types.h>
-
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/db/query/ce/scalar_histogram.h"
-#include "mongo/db/query/optimizer/cascades/interfaces.h"
-#include "mongo/db/query/optimizer/opt_phase_manager.h"
-
-namespace mongo {
-
-namespace optimizer {
-namespace cascades {
-
-// Forward declaration.
-class CEInterface;
-
-}  // namespace cascades
-}  // namespace optimizer
-
-namespace ce {
-
-using namespace optimizer;
-using namespace sbe;
-
-// Enable this flag to log all estimates, and let all tests pass.
-constexpr bool kCETestLogOnly = false;
-
-const double kMaxCEError = 0.01;
-const CEType kInvalidCardinality = -1.0;
-
-const OptPhaseManager::PhaseSet kDefaultCETestPhaseSet{OptPhase::MemoSubstitutionPhase,
-                                                       OptPhase::MemoExplorationPhase,
-                                                       OptPhase::MemoImplementationPhase};
-
-const OptPhaseManager::PhaseSet kOnlySubPhaseSet{OptPhase::MemoSubstitutionPhase};
-
-const OptPhaseManager::PhaseSet kNoOptPhaseSet{};
-
-/**
- * Helpful macros for asserting that the CE of a $match predicate is approximately what we were
- * expecting.
- */
-
-#define _ASSERT_CE(estimatedCE, expectedCE)                             \
-    if constexpr (kCETestLogOnly) {                                     \
-        if (std::abs(estimatedCE - expectedCE) > kMaxCEError) {         \
-            std::cout << "ERROR: expected " << expectedCE << std::endl; \
-        }                                                               \
-        ASSERT_APPROX_EQUAL(1.0, 1.0, kMaxCEError);                     \
-    } else {                                                            \
-        ASSERT_APPROX_EQUAL(estimatedCE, expectedCE, kMaxCEError);      \
-    }
-#define _PREDICATE(field, predicate) (str::stream() << "{" << field << ": " << predicate "}")
-#define _ELEMMATCH_PREDICATE(field, predicate) \
-    (str::stream() << "{" << field << ": {$elemMatch: " << predicate << "}}")
-
-// This macro verifies the cardinality of a pipeline or an input ABT.
-#define ASSERT_CE(ce, pipeline, expectedCE) _ASSERT_CE(ce.getCE(pipeline), (expectedCE))
-
-// This macro does the same as above but also sets the collection cardinality.
-#define ASSERT_CE_CARD(ce, pipeline, expectedCE, collCard) \
-    ce.setCollCard(collCard);                              \
-    ASSERT_CE(ce, pipeline, expectedCE)
-
-// This macro verifies the cardinality of a pipeline with a single $match predicate.
-#define ASSERT_MATCH_CE(ce, predicate, expectedCE) \
-    _ASSERT_CE(ce.getMatchCE(predicate), (expectedCE))
-
-#define ASSERT_MATCH_CE_NODE(ce, queryPredicate, expectedCE, nodePredicate) \
-    _ASSERT_CE(ce.getMatchCE(queryPredicate, nodePredicate), (expectedCE))
-
-// This macro does the same as above but also sets the collection cardinality.
-#define ASSERT_MATCH_CE_CARD(ce, predicate, expectedCE, collCard) \
-    ce.setCollCard(collCard);                                     \
-    ASSERT_MATCH_CE(ce, predicate, expectedCE)
-
-// This macro tests cardinality of two versions of the predicate; with and without $elemMatch.
-#define ASSERT_EQ_ELEMMATCH_CE(tester, expectedCE, elemMatchExpectedCE, field, predicate) \
-    ASSERT_MATCH_CE(tester, _PREDICATE(field, predicate), expectedCE);                    \
-    ASSERT_MATCH_CE(tester, _ELEMMATCH_PREDICATE(field, predicate), elemMatchExpectedCE)
-
-#define ASSERT_EQ_ELEMMATCH_CE_NODE(tester, expectedCE, elemMatchExpectedCE, field, predicate, n) \
-    ASSERT_MATCH_CE_NODE(tester, _PREDICATE(field, predicate), expectedCE, n);                    \
-    ASSERT_MATCH_CE_NODE(tester, _ELEMMATCH_PREDICATE(field, predicate), elemMatchExpectedCE, n)
-
-// Some commonly used functions for picking nodes in the memo for testing estimation.
-template <size_t NumReq>
-bool isSargableNode(const ABT& n) {
-    if constexpr (NumReq == 0) {
-        return n.is<optimizer::SargableNode>();
-    }
-
-    // Sometimes SargableNodes get split and placed into different memo groups, but we are looking
-    // for a SargableNode with a specific number of predicates. For tests, we only care about
-    // verifying the cardinality of that one.
-    if (auto* sargable = n.cast<optimizer::SargableNode>()) {
-        return sargable->getReqMap().size() == NumReq;
-    }
-    return false;
-}
-const auto isSargable = isSargableNode<0>;
-const auto isSargable1 = isSargableNode<1>;
-const auto isSargable2 = isSargableNode<2>;
-const auto isSargable3 = isSargableNode<3>;
-const auto isSargable4 = isSargableNode<4>;
-const auto isRoot = [](const ABT& n) -> bool { return n.is<optimizer::RootNode>(); };
-
-/**
- * A test utility class for helping verify the cardinality of CE transports on a given $match
- * predicate.
- */
-class CETester {
-public:
-    /**
-     * The tester initializes at least one collection with the name 'collName' and the cardinality
-     * 'numRecords' in the metadata.
-     */
-    CETester(std::string collName,
-             double numRecords,
-             const OptPhaseManager::PhaseSet& optPhases = kDefaultCETestPhaseSet);
-
-    /**
-     * Returns the estimated cardinality of a given 'matchPredicate'.
-     *
-     * 'nodePredicate' identifies the node in the memo we want to estimate.
-     */
-    CEType getMatchCE(const std::string& matchPredicate,
-                      std::function<bool(const ABT&)> nodePredicate = isRoot) const;
-
-    /**
-     * Returns the estimated cardinality of a given 'pipeline'.
-     *
-     * 'nodePredicate' identifies the node in the memo we want to estimate.
-     */
-    CEType getCE(const std::string& pipeline,
-                 std::function<bool(const ABT&)> nodePredicate = isRoot) const;
-
-    /**
-     * Returns the estimated cardinality of a given 'abt'.
-     *
-     * 'nodePredicate' identifies the node in the memo we want to estimate.
-     */
-    CEType getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate = isRoot) const;
-
-    /**
-     * Updates the cardinality of the collection '_collName'.
-     */
-    void setCollCard(double card);
-
-    /**
-     * Updates the indexes used by the collection '_collName'.
-     */
-    void setIndexes(opt::unordered_map<std::string, IndexDefinition> indexes);
-
-    /**
-     * Adds a ScanDefinition for an additional collection for the test.
-     */
-    void addCollection(std::string collName,
-                       double numRecords,
-                       opt::unordered_map<std::string, IndexDefinition> indexes = {});
-
-    /**
-     * Prevents the optimizer from generating collection scan plans.
-     */
-    void setDisableScan(bool disableScan) {
-        _hints._disableScan = disableScan;
-    }
-
-protected:
-    /**
-     * Subclasses need to override this method to initialize the transports they are testing.
-     */
-    virtual std::unique_ptr<cascades::CEInterface> getCETransport() const = 0;
-
-private:
-    /**
-     * Helper to find the ScanDefinition of '_collName' in _metadata.
-     */
-    ScanDefinition& getCollScanDefinition();
-
-    // Phases to use when optimizing an input query.
-    const OptPhaseManager::PhaseSet& _optPhases;
-
-    // Used to initialize the OptPhaseManager.
-    mutable PrefixId _prefixId;
-
-    // Allows us to pass hints to the optimizer.
-    QueryHints _hints;
-
-    // Stores the ScanDefinitions for all collections defined in the test.
-    Metadata _metadata;
-
-    // Name of the collection tests will be executed against.
-    std::string _collName;
-};
-
-/**
- * Test utility for helping with creation of manual histograms in the unit tests.
- */
-struct BucketData {
-    Value _v;
-    double _equalFreq;
-    double _rangeFreq;
-    double _ndv;
-
-    BucketData(Value v, double equalFreq, double rangeFreq, double ndv)
-        : _v(v), _equalFreq(equalFreq), _rangeFreq(rangeFreq), _ndv(ndv) {}
-    BucketData(const std::string& v, double equalFreq, double rangeFreq, double ndv)
-        : BucketData(Value(v), equalFreq, rangeFreq, ndv) {}
-    BucketData(int v, double equalFreq, double rangeFreq, double ndv)
-        : BucketData(Value(v), equalFreq, rangeFreq, ndv) {}
-};
-
-ScalarHistogram createHistogram(const std::vector<BucketData>& data);
-
-double estimateIntValCard(const ScalarHistogram& hist, int v, EstimationType type);
-
-}  // namespace ce
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/collection_statistics.h b/src/mongo/db/query/ce/collection_statistics.h
deleted file mode 100644
index 5949215b448..00000000000
--- a/src/mongo/db/query/ce/collection_statistics.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/array_histogram.h"
-
-namespace mongo::ce {
-
-using Histograms = std::map<std::string, std::shared_ptr<ArrayHistogram>>;
-
-class CollectionStatistics {
-public:
-    /**
-     * Returns the cardinality of the given collection.
-     */
-    virtual double getCardinality() const = 0;
-
-    /**
-     * Returns the histogram for the given field path, or nullptr if none exists.
-     */
-    virtual const ArrayHistogram* getHistogram(const std::string& path) const = 0;
-
-    /**
-     * Adds a histogram along the given path.
-     */
-    virtual void addHistogram(const std::string& path,
-                              std::shared_ptr<ArrayHistogram> histogram) const = 0;
-
-    virtual ~CollectionStatistics() = default;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/collection_statistics_impl.cpp b/src/mongo/db/query/ce/collection_statistics_impl.cpp
deleted file mode 100644
index 7bf6b4e7a11..00000000000
--- a/src/mongo/db/query/ce/collection_statistics_impl.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/collection_statistics_impl.h"
-#include "mongo/db/client.h"
-#include "mongo/db/query/ce/stats_catalog.h"
-
-namespace mongo::ce {
-
-CollectionStatisticsImpl::CollectionStatisticsImpl(double cardinality, const NamespaceString& nss)
-    : _cardinality{cardinality}, _histograms{}, _nss{nss} {};
-
-double CollectionStatisticsImpl::getCardinality() const {
-    return _cardinality;
-}
-
-void CollectionStatisticsImpl::addHistogram(const std::string& path,
-                                            std::shared_ptr<ArrayHistogram> histogram) const {
-    _histograms[path] = histogram;
-}
-
-const ArrayHistogram* CollectionStatisticsImpl::getHistogram(const std::string& path) const {
-    if (auto mapIt = _histograms.find(path); mapIt != _histograms.end()) {
-        return mapIt->second.get();
-    } else {
-        uassert(8423368, "no current client", Client::getCurrent());
-        auto opCtx = Client::getCurrent()->getOperationContext();
-        uassert(8423367, "no operation context", opCtx);
-        StatsCatalog& statsCatalog = StatsCatalog::get(opCtx);
-        const auto swHistogram = statsCatalog.getHistogram(opCtx, _nss, path);
-        if (!swHistogram.isOK()) {
-            if (swHistogram != ErrorCodes::NamespaceNotFound) {
-                uasserted(swHistogram.getStatus().code(),
-                          str::stream() << "Error getting histograms for path " << _nss << " : "
-                                        << path << swHistogram.getStatus().reason());
-            }
-            return nullptr;
-        }
-        const auto histogram = std::move(swHistogram.getValue());
-        addHistogram(path, histogram);
-        return histogram.get();
-    }
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/collection_statistics_impl.h b/src/mongo/db/query/ce/collection_statistics_impl.h
deleted file mode 100644
index 11b2c9630ce..00000000000
--- a/src/mongo/db/query/ce/collection_statistics_impl.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/collection_statistics.h"
-
-namespace mongo::ce {
-
-using Histograms = std::map<std::string, std::shared_ptr<ArrayHistogram>>;
-
-class CollectionStatisticsImpl : public CollectionStatistics {
-public:
-    CollectionStatisticsImpl(double cardinality, const NamespaceString& nss);
-
-    /**
-     * Returns the cardinality of the given collection.
-     */
-    double getCardinality() const override;
-
-    /**
-     * Returns the histogram for the given field path, or nullptr if none exists.
-     */
-    const ArrayHistogram* getHistogram(const std::string& path) const override;
-
-    /**
-     * Adds a histogram along the given path.
-     */
-    void addHistogram(const std::string& path,
-                      std::shared_ptr<ArrayHistogram> histogram) const override;
-
-    ~CollectionStatisticsImpl() = default;
-
-private:
-    double _cardinality;
-    mutable Histograms _histograms;
-    const NamespaceString _nss;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/collection_statistics_mock.cpp b/src/mongo/db/query/ce/collection_statistics_mock.cpp
deleted file mode 100644
index d8faa285e20..00000000000
--- a/src/mongo/db/query/ce/collection_statistics_mock.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/collection_statistics_mock.h"
-
-namespace mongo::ce {
-
-CollectionStatisticsMock::CollectionStatisticsMock(double cardinality)
-    : _cardinality{cardinality}, _histograms{} {};
-
-double CollectionStatisticsMock::getCardinality() const {
-    return _cardinality;
-}
-
-void CollectionStatisticsMock::addHistogram(const std::string& path,
-                                            std::shared_ptr<ArrayHistogram> histogram) const {
-    _histograms[path] = histogram;
-}
-
-const ArrayHistogram* CollectionStatisticsMock::getHistogram(const std::string& path) const {
-    if (auto mapIt = _histograms.find(path); mapIt != _histograms.end()) {
-        return mapIt->second.get();
-    }
-    return nullptr;
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/collection_statistics_mock.h b/src/mongo/db/query/ce/collection_statistics_mock.h
deleted file mode 100644
index a93964cd701..00000000000
--- a/src/mongo/db/query/ce/collection_statistics_mock.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/collection_statistics.h"
-
-namespace mongo::ce {
-
-class CollectionStatisticsMock : public CollectionStatistics {
-public:
-    CollectionStatisticsMock(double cardinality);
-
-    /**
-     * Returns the cardinality of the given collection.
-     */
-    double getCardinality() const override;
-
-    /**
-     * Adds a histogram along the given path.
-     */
-    void addHistogram(const std::string& path,
-                      std::shared_ptr<ArrayHistogram> histogram) const override;
-
-    /**
-     * Returns the histogram for the given field path, or nullptr if none exists.
-     */
-    const ArrayHistogram* getHistogram(const std::string& path) const override;
-
-    ~CollectionStatisticsMock() = default;
-
-private:
-    double _cardinality;
-    mutable Histograms _histograms;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/generated_histograms_test.cpp b/src/mongo/db/query/ce/generated_histograms_test.cpp
new file mode 100644
index 00000000000..3f5ce361584
--- /dev/null
+++ b/src/mongo/db/query/ce/generated_histograms_test.cpp
@@ -0,0 +1,366 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include <string>
+#include <vector>
+
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/ce/test_utils.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::optimizer::ce {
+namespace {
+namespace value = sbe::value;
+
+using stats::ArrayHistogram;
+using stats::ScalarHistogram;
+using stats::TypeCounts;
+
+constexpr double kErrorBound = 0.1;
+
+TEST(EstimatorTest, UniformIntStrEstimate) {
+    /* The code in this comment generates a dataset and creates the histogram used in this test. To
+    recreate the data set and the histogram, place this code in a unit test which uses the utilities
+    from rand_utils_new.cpp.
+
+    constexpr int minLen = 3, maxLen = 5;
+    constexpr int minVal = 0, maxVal = 1000;
+    constexpr size_t dataSize = 1000;
+    constexpr size_t nBuckets = std::min(20UL, dataSize);
+
+    MixedDistributionDescriptor dd{{DistrType::kUniform, 1.0}};
+    TypeDistrVector td;
+    td.emplace_back(std::make_unique<IntDistribution>(dd, 0.5, 250, minVal, maxVal));
+    td.emplace_back(std::make_unique<StrDistribution>(dd, 0.5, 250, minLen, maxLen));
+
+    std::mt19937_64 gen(0);
+    DatasetDescriptorNew desc{std::move(td), gen};
+
+    std::vector<SBEValue> dataset;
+    dataset = desc.genRandomDataset(dataSize);
+
+    const ScalarHistogram& hist = makeHistogram(dataset, nBuckets);
+    */
+
+    std::vector<BucketData> data{
+        {2, 5, 0, 0},       {57, 4, 21, 12},     {159, 4, 59, 24},    {172, 5, 0, 0},
+        {184, 4, 2, 2},     {344, 4, 73, 32},    {363, 4, 1, 1},      {420, 3, 16, 10},
+        {516, 2, 49, 23},   {758, 4, 113, 54},   {931, 5, 104, 41},   {998, 4, 29, 12},
+        {"3vL", 6, 30, 11}, {"9WUk", 1, 59, 24}, {"HraK", 4, 56, 26}, {"Zujbu", 1, 130, 64},
+        {"kEr", 5, 80, 40}, {"rupc", 6, 44, 21}, {"up1O", 5, 16, 7},  {"ztf", 5, 37, 17}};
+
+    const ScalarHistogram hist = createHistogram(data);
+    const ArrayHistogram arrHist(
+        hist, TypeCounts{{value::TypeTags::NumberInt64, 515}, {value::TypeTags::StringSmall, 485}});
+
+    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
+    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
+    const auto [tagAbc, valAbc] = value::makeNewString("abc"_sd);
+    value::ValueGuard vg(tagAbc, valAbc);
+    auto [tagObj, valObj] = value::makeNewObject();
+    value::ValueGuard vgObj(tagObj, valObj);
+
+    // Predicates over bucket bound.
+    // Actual cardinality {$eq: 804} = 2.
+    double expectedCard = estimateIntValCard(hist, 804, EstimationType::kEqual);
+    ASSERT_APPROX_EQUAL(2.5, expectedCard, kErrorBound);
+
+    // Actual cardinality {$lt: 100} = 40.
+    expectedCard = estimateIntValCard(hist, 100, EstimationType::kLess);
+    ASSERT_APPROX_EQUAL(52.4, expectedCard, kErrorBound);
+
+    // Range query crossing the type brackets.
+    // Actual cardinality {$gt: 100} = 475.
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     value::TypeTags::NumberInt64,
+                                     value::bitcastFrom<int64_t>(100),
+                                     false /* highInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(460.1, expectedCard, kErrorBound);
+
+    // Actual cardinality {$lt: 'abc'} = 291.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* highInclusive */,
+                                     tagAbc,
+                                     valAbc,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(319.9, expectedCard, kErrorBound);
+
+    // Actual cardinality {$gte: 'abc'} = 194.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagAbc,
+                                     valAbc,
+                                     false /* highInclusive */,
+                                     tagObj,
+                                     valObj,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(167.0, expectedCard, kErrorBound);
+
+    // Queries over the low string bound.
+    // Actual cardinality {$eq: ''} = 0.
+    expectedCard = estimateCardEq(arrHist, tagLowStr, valLowStr, true);
+    ASSERT_APPROX_EQUAL(2.727, expectedCard, 0.001);
+
+    // Actual cardinality {$gt: ''} = 485.
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     false /* highInclusive */,
+                                     tagObj,
+                                     valObj,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(485, expectedCard, 0.001);
+}
+
+TEST(EstimatorTest, IntStrArrayEstimate) {
+    /* The code in this comment generates a dataset of 1000 integers, strings and arrays of integers
+       and strings and creates the histogram used in this test. To recreate the data set and the
+       histogram, place this code in a unit test which uses the utilities from rand_utils_new.cpp.
+
+       constexpr int minLen = 2, maxLen = 5;
+       constexpr int minVal = 0, maxVal = 1000;
+       constexpr size_t dataSize = 1000;
+       constexpr size_t nBuckets = std::min(20UL, dataSize);
+
+       MixedDistributionDescriptor dd{{DistrType::kUniform, 1.0}};
+       TypeDistrVector td1;
+       td1.emplace_back(std::make_unique<IntDistribution>(dd, 0.7, 200, minVal, maxVal));
+       td1.emplace_back(std::make_unique<StrDistribution>(dd, 0.3, 100, minLen, maxLen));
+
+       std::mt19937_64 gen(5);
+       auto desc1 = std::make_unique<DatasetDescriptorNew>(std::move(td1), gen);
+
+       TypeDistrVector td2;
+       td2.emplace_back(std::make_unique<IntDistribution>(dd, 0.4, 200, minVal, maxVal));
+       td2.emplace_back(std::make_unique<StrDistribution>(dd, 0.3, 200, minLen, maxLen));
+       td2.emplace_back(std::make_unique<ArrDistribution>(dd, 0.3, 200, 2, 6, std::move(desc1),
+       0.0));
+
+       DatasetDescriptorNew desc{std::move(td2), gen};
+       std::vector<SBEValue> dataset;
+       dataset = desc.genRandomDataset(dataSize);
+
+       const ScalarHistogram& hist = makeHistogram(dataset, nBuckets);
+        */
+
+    std::vector<BucketData> scalarData{
+        {10, 1, 0, 0},    {11, 4, 0, 0},       {44, 2, 5, 2},         {213, 3, 40, 20},
+        {256, 5, 13, 6},  {270, 3, 9, 2},      {407, 3, 56, 28},      {510, 3, 32, 16},
+        {524, 3, 0, 0},   {561, 5, 16, 8},     {583, 3, 4, 3},        {599, 3, 1, 1},
+        {663, 5, 19, 9},  {681, 5, 6, 2},      {873, 5, 75, 37},      {909, 4, 16, 7},
+        {994, 3, 36, 14}, {"9TcY", 4, 44, 23}, {"Zow00", 5, 134, 67}, {"zsS", 2, 130, 66},
+    };
+
+    const ScalarHistogram scalarHist = createHistogram(scalarData);
+
+    std::vector<BucketData> minData{
+        {12, 5, 0, 0},      {17, 8, 0, 0},        {28, 7, 7, 1},        {55, 5, 22, 5},
+        {110, 5, 45, 11},   {225, 4, 43, 15},     {563, 3, 98, 36},     {643, 4, 3, 2},
+        {701, 4, 9, 5},     {845, 1, 6, 4},       {921, 2, 0, 0},       {980, 1, 0, 0},
+        {"1l", 9, 16, 4},   {"8YN", 4, 19, 5},    {"PE2OO", 2, 41, 15}, {"WdJ", 8, 25, 7},
+        {"dKb7", 9, 17, 6}, {"msdP", 12, 25, 10}, {"t7wmp", 5, 15, 6},  {"yx", 2, 13, 4},
+    };
+
+    const ScalarHistogram minHist = createHistogram(minData);
+
+    std::vector<BucketData> maxData{
+        {26, 2, 0, 0},    {79, 3, 0, 0},      {147, 1, 0, 0},      {207, 2, 0, 0},
+        {362, 6, 7, 5},   {563, 3, 47, 19},   {603, 9, 2, 1},      {676, 6, 21, 10},
+        {702, 6, 9, 4},   {712, 6, 0, 0},     {759, 8, 4, 1},      {774, 6, 3, 1},
+        {831, 9, 28, 9},  {948, 7, 51, 15},   {981, 3, 33, 8},     {"9Iey", 4, 20, 8},
+        {"Ji", 3, 21, 8}, {"WdJ", 9, 26, 10}, {"msdP", 9, 59, 20}, {"zbI", 3, 68, 16},
+    };
+
+    const ScalarHistogram maxHist = createHistogram(maxData);
+
+    std::vector<BucketData> uniqueData{
+        {12, 5, 0, 0},      {28, 8, 15, 2},      {55, 8, 23, 5},       {110, 5, 59, 12},
+        {225, 8, 79, 18},   {362, 8, 88, 20},    {507, 10, 165, 36},   {572, 5, 25, 6},
+        {603, 12, 25, 3},   {712, 6, 106, 19},   {759, 11, 17, 4},     {774, 6, 3, 1},
+        {831, 14, 50, 13},  {981, 3, 105, 25},   {"547DP", 4, 43, 9},  {"9Iey", 4, 8, 1},
+        {"WdJ", 9, 85, 26}, {"ZGYcw", 2, 14, 4}, {"msdP", 14, 80, 21}, {"zbI", 3, 74, 17},
+    };
+
+    const ScalarHistogram uniqueHist = createHistogram(uniqueData);
+
+    TypeCounts typeCounts{{value::TypeTags::NumberInt64, 388},
+                          {value::TypeTags::StringSmall, 319},
+                          {value::TypeTags::Array, 293}};
+    TypeCounts arrayTypeCounts{{value::TypeTags::NumberInt64, 874},
+                               {value::TypeTags::StringSmall, 340}};
+    const ArrayHistogram arrHist(scalarHist,
+                                 typeCounts,
+                                 uniqueHist,
+                                 minHist,
+                                 maxHist,
+                                 arrayTypeCounts,
+                                 0 /* No empty arrays */);
+
+    const auto [tagLowDbl, valLowDbl] =
+        std::make_pair(value::TypeTags::NumberDouble,
+                       value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN()));
+    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
+    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
+
+    // Actual cardinality {$lt: 100} = 115.
+    double expectedCard = estimateCardRange(arrHist,
+                                            false /* lowInclusive */,
+                                            tagLowDbl,
+                                            valLowDbl,
+                                            false /* highInclusive */,
+                                            value::TypeTags::NumberInt64,
+                                            value::bitcastFrom<int64_t>(100),
+                                            true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(109.9, expectedCard, kErrorBound);
+
+    // Actual cardinality {$gt: 502} = 434.
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     value::TypeTags::NumberInt64,
+                                     value::bitcastFrom<int64_t>(500),
+                                     false /* highInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(443.8, expectedCard, kErrorBound);
+
+    // Actual cardinality {$gte: 502} = 437.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     value::TypeTags::NumberInt64,
+                                     value::bitcastFrom<int64_t>(500),
+                                     false /* highInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(448.3, expectedCard, kErrorBound);
+
+    // Actual cardinality {$eq: ''} = 0.
+    expectedCard = estimateCardEq(arrHist, tagLowStr, valLowStr, true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(6.69, expectedCard, 0.001);
+
+    // Actual cardinality {$eq: 'DD2'} = 2.
+    auto [tagStr, valStr] = value::makeNewString("DD2"_sd);
+    value::ValueGuard vg(tagStr, valStr);
+    expectedCard = estimateCardEq(arrHist, tagStr, valStr, true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(5.27, expectedCard, kErrorBound);
+
+    // Actual cardinality {$lte: 'DD2'} = 120.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* highInclusive */,
+                                     tagStr,
+                                     valStr,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(160.6, expectedCard, kErrorBound);
+
+    // Actual cardinality {$gt: 'DD2'} = 450.
+    auto [tagObj, valObj] = value::makeNewObject();
+    value::ValueGuard vgObj(tagObj, valObj);
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     tagStr,
+                                     valStr,
+                                     false /* highInclusive */,
+                                     tagObj,
+                                     valObj,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(411.2, expectedCard, kErrorBound);
+
+    // Queries with $elemMatch.
+    const auto [tagInt, valInt] =
+        std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(603));
+
+    // Actual cardinality {$match: {a: {$elemMatch: {$eq: 603}}}} = 12.
+    expectedCard = estimateCardEq(arrHist, tagInt, valInt, false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(12.0, expectedCard, kErrorBound);
+
+    // Actual cardinality {$match: {a: {$elemMatch: {$lte: 603}}}} = 252.
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     tagLowDbl,
+                                     valLowDbl,
+                                     true /* highInclusive */,
+                                     tagInt,
+                                     valInt,
+                                     false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(293.0, expectedCard, kErrorBound);
+
+    // Actual cardinality {$match: {a: {$elemMatch: {$gte: 603}}}} = 200.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagInt,
+                                     valInt,
+                                     false /* highInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(250.8, expectedCard, kErrorBound);
+
+    // Actual cardinality {$match: {a: {$elemMatch: {$eq: 'cu'}}}} = 7.
+    std::tie(tagStr, valStr) = value::makeNewString("cu"_sd);
+    expectedCard = estimateCardEq(arrHist, tagStr, valStr, false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(3.8, expectedCard, kErrorBound);
+
+    // Actual cardinality {$match: {a: {$elemMatch: {$gte: 'cu'}}}} = 125.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagStr,
+                                     valStr,
+                                     false /* highInclusive */,
+                                     tagObj,
+                                     valObj,
+                                     false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(109.7, expectedCard, kErrorBound);
+
+    // Actual cardinality {$match: {a: {$elemMatch: {$lte: 'cu'}}}} = 141.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* highInclusive */,
+                                     tagStr,
+                                     valStr,
+                                     false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(156.1, expectedCard, kErrorBound);
+}
+}  // namespace
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp b/src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp
new file mode 100644
index 00000000000..7efe1a974ba
--- /dev/null
+++ b/src/mongo/db/query/ce/heuristic_dataflow_nodes_test.cpp
@@ -0,0 +1,221 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/heuristic_estimator.h"
+#include "mongo/db/query/ce/test_utils.h"
+#include "mongo/db/query/optimizer/props.h"
+#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/db/query/optimizer/utils/utils.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::optimizer::ce {
+namespace {
+constexpr double kCollCard = 1000.0;
+const std::string kCollName = "test";
+
+constexpr double kOtherCollCard = 200.0;
+const std::string kOtherCollName = "otherTest";
+
+constexpr double kThirdCollCard = 50.0;
+const std::string kThirdCollName = "thirdTest";
+
+class DataflowCETester : public CETester {
+public:
+    DataflowCETester() : CETester(kCollName, kCollCard, kDefaultCETestPhaseSet) {}
+
+protected:
+    std::unique_ptr<cascades::CardinalityEstimator> getEstimator() const override {
+        return std::make_unique<HeuristicEstimator>();
+    }
+};
+
+bool isRootNodeFn(const ABT& node) {
+    return node.is<RootNode>();
+}
+
+TEST(CEDataflowTest, EstimateTrivialNodes) {
+    DataflowCETester t;
+    const auto matchCard = t.getMatchCE("{a: 1}", isRootNodeFn);
+
+    // Verify 'CollationNode' estimate returns the input cardinality.
+    ASSERT_CE(t, "[{$sort: {a: 1}}]", kCollCard);
+    ASSERT_CE(t, "[{$sort: {a: -1, b: 1}}]", kCollCard);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$sort: {a: 1, b: 1}}]", matchCard);
+
+    // Verify 'EvaluationNode' estimate.
+    ASSERT_CE(t, "[{$project: {a: {$add: [\"$a\", 1]}}}]", kCollCard);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$project: {a: {$add: [\"$a\", 1]}}}]", matchCard);
+}
+
+TEST(CEDataflowTest, EstimateUnionNode) {
+    auto makeUnionBranch = [](const std::string& collName) {
+        ProjectionName scanVar{"scan_" + collName};
+        auto scanNode = make<ScanNode>(scanVar, collName);
+        auto evalPath =
+            make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>(scanVar));
+        return make<EvaluationNode>("a", std::move(evalPath), std::move(scanNode));
+    };
+
+    // Verify that the estimate of 'UnionNode' always returns the sum of estimates of its children.
+    // In the following tests we force a simple plan to be generated by passing in a 'manually'
+    // constructed ABT.
+    {
+        DataflowCETester t;
+        t.addCollection(kOtherCollName, kOtherCollCard, {});
+        t.addCollection(kThirdCollName, kThirdCollCard, {});
+        {
+            auto unionNode = make<UnionNode>(
+                ProjectionNameVector{"a"},
+                makeSeq(makeUnionBranch(kCollName), makeUnionBranch(kOtherCollName)));
+            auto rootNode = make<RootNode>(
+                properties::ProjectionRequirement{ProjectionNameVector{"a"}}, std::move(unionNode));
+            ASSERT_CE(t, rootNode, kCollCard + kOtherCollCard);
+        }
+        {
+            auto unionNode = make<UnionNode>(
+                ProjectionNameVector{"a"},
+                makeSeq(makeUnionBranch(kCollName), makeUnionBranch(kOtherCollName)));
+            auto parentUnionNode =
+                make<UnionNode>(ProjectionNameVector{"a"},
+                                makeSeq(std::move(unionNode), makeUnionBranch(kThirdCollName)));
+            auto rootNode =
+                make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"a"}},
+                               std::move(parentUnionNode));
+            ASSERT_CE(t, rootNode, kCollCard + kOtherCollCard + kThirdCollCard);
+        }
+    }
+
+    // The following plans include a UnionNode.
+    {
+        DataflowCETester t;
+        t.setCollCard(2000);
+        t.setIndexes(
+            {{"indexA", makeIndexDefinition("a", CollationOp::Ascending, /* isMultiKey */ true)}});
+        t.setDisableScan(true);
+        ASSERT_MATCH_CE(t, {"{a: [12]}"}, 1);
+    }
+    {
+        DataflowCETester t;
+        t.setIndexes(
+            {{"indexA", makeIndexDefinition("a", CollationOp::Ascending, /* isMultiKey */ false)},
+             {"indexB", makeIndexDefinition("b", CollationOp::Ascending, /* isMultiKey */ false)}});
+        t.setDisableScan(true);
+        ASSERT_MATCH_CE(t, {"{a: 1, b: 2}"}, 5.62341);
+    }
+}
+
+TEST(CEDataflowTest, EstimateLimitSkipNode) {
+    DataflowCETester t;
+    const CEType matchCard = t.getMatchCE("{a: 1}", isRootNodeFn);
+
+    // Verify that 'LimitSkipNode' estimate with only a limit set is min(limit, inputCE).
+    ASSERT_CE(t, "[{$limit: 1}]", 1.0);
+    ASSERT_CE(t, "[{$limit: 50}]", 50.0);
+    ASSERT_CE(t, "[{$limit: 1000}]", kCollCard);
+    ASSERT_CE(t, "[{$limit: 10000}]", kCollCard);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$limit: 1}]", 1.0);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$limit: 5}]", 5.0);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$limit: 50}]", matchCard);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$limit: 1000}]", matchCard);
+
+    // Verify that 'LimitSkipNode' estimate with only a skip set is max(inputCE - skip, 0).
+    ASSERT_CE(t, "[{$skip: 0}]", kCollCard);
+    ASSERT_CE(t, "[{$skip: 1}]", kCollCard - 1.0);
+    ASSERT_CE(t, "[{$skip: 50}]", kCollCard - 50.0);
+    ASSERT_CE(t, "[{$skip: 1000}]", 0.0);
+    ASSERT_CE(t, "[{$skip: 10000}]", 0.0);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 1}]", matchCard - 1.0);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 5}]", matchCard - 5.0);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 50}]", 0.0);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$skip: 1000}]", 0.0);
+
+    // Test estimates for combinations of $limit & $skip.
+    ASSERT_CE(t, "[{$limit: 1}, {$skip: 1}]", 0.0);
+    ASSERT_CE(t, "[{$skip: 1}, {$limit: 1}]", 1.0);
+    ASSERT_CE(t, "[{$limit: 1}, {$skip: 50}]", 0.0);
+    ASSERT_CE(t, "[{$skip: 50}, {$limit: 1}]", 1.0);
+    ASSERT_CE(t, "[{$limit: 50}, {$skip: 1}]", 49.0);
+    ASSERT_CE(t, "[{$skip: 1}, {$limit: 50}]", 50.0);
+    ASSERT_CE(t, "[{$limit: 50}, {$skip: 50}]", 0.0);
+    ASSERT_CE(t, "[{$skip: 50}, {$limit: 50}]", 50.0);
+    ASSERT_CE(t, "[{$limit: 1000}, {$skip: 50}]", kCollCard - 50.0);
+    ASSERT_CE(t, "[{$skip: 50}, {$limit: 1000}]", kCollCard - 50.0);
+    ASSERT_CE(t, "[{$limit: 50}, {$skip: 1000}]", 0.0);
+    ASSERT_CE(t, "[{$skip: 1000}, {$limit: 50}]", 0.0);
+    ASSERT_CE(t, "[{$limit: 1000}, {$skip: 1000}]", 0.0);
+    ASSERT_CE(t, "[{$skip: 1000}, {$limit: 1000}]", 0.0);
+
+    // Test estimates for combinations of $limit & $skip separated by a $match.
+    ASSERT_CE(t, "[{$limit: 1}, {$match: {a: 1}}, {$skip: 1}]", 0.0);
+    ASSERT_CE(t, "[{$limit: 1}, {$match: {a: 1}}, {$skip: 50}]", 0.0);
+
+    // Input card to $match: 50. $match selectivity here is sqrt(50)/50.
+    ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1}]", 6.07107);
+    ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 50}]", 0.0);
+    ASSERT_CE(t, "[{$limit: 50}, {$match: {a: 1}}, {$skip: 1000}]", 0.0);
+
+    // Input card to $match is kCollCard. However, our estimate is larger than matchCard because we
+    // have a FilterNode that does not get converted to a SargableNode in this case. The $match
+    // selectivity here is sqrt(1000)/1000.
+    ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1}]", 30.6228);
+    ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 20}]", 11.6228);
+    ASSERT_CE(t, "[{$limit: 1000}, {$match: {a: 1}}, {$skip: 1000}]", 0.0);
+
+    // Input card to $match: 999. $match selectivity here is sqrt(999)/999.
+    ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1}]", 1.0);
+    ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 20}]", 20.0);
+    ASSERT_CE(t, "[{$skip: 1}, {$match: {a: 1}}, {$limit: 1000}]", 31.607);
+
+    // Input card to $match: 950. $match selectivity here is sqrt(950)/950.
+    ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1}]", 1.0);
+    ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 20}]", 20.0);
+    ASSERT_CE(t, "[{$skip: 50}, {$match: {a: 1}}, {$limit: 1000}]", 30.8221);
+
+    // Input card to $match is 0.0.
+    ASSERT_CE(t, "[{$skip: 1000}, {$match: {a: 1}}, {$limit: 50}]", 0.0);
+    ASSERT_CE(t, "[{$skip: 1000}, {$match: {a: 1}}, {$limit: 1000}]", 0.0);
+}
+
+TEST(CEDataflowTest, EstimateUnwindNode) {
+    DataflowCETester t;
+    const CEType matchCard = t.getMatchCE("{a: 1}", isRootNodeFn);
+
+    // We assume that arrays on average have ~10 elements, so we estimate this as inputCard*10.
+    ASSERT_CE(t, "[{$unwind: '$a'}]", 10 * kCollCard);
+    ASSERT_CE(t, "[{$match: {a: 1}}, {$unwind: '$a'}]", 10 * matchCard);
+    ASSERT_CE(t, "[{$unwind: {path: '$a', preserveNullAndEmptyArrays: true}}]", 10 * kCollCard);
+    ASSERT_CE(t,
+              "[{$match: {a: 1}}, {$unwind: {path: '$a', preserveNullAndEmptyArrays: true}}]",
+              10 * matchCard);
+
+    // TODO SERVER-70035: implement histogram estimation of $unwind.
+}
+
+}  // namespace
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/heuristic_estimator.cpp b/src/mongo/db/query/ce/heuristic_estimator.cpp
new file mode 100644
index 00000000000..88421015f39
--- /dev/null
+++ b/src/mongo/db/query/ce/heuristic_estimator.cpp
@@ -0,0 +1,600 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/heuristic_estimator.h"
+
+#include "mongo/db/query/optimizer/cascades/memo.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo::optimizer::ce {
+// Invalid estimate - an arbitrary negative value used for initialization.
+constexpr SelectivityType kInvalidSel = -1.0;
+
+constexpr SelectivityType kDefaultFilterSel = 0.1;
+constexpr SelectivityType kDefaultExistsSel = 0.70;
+
+// The selectivities used in the piece-wise function for open-range intervals.
+// Note that we assume a smaller input cardinality will result in a less selective range.
+constexpr SelectivityType kSmallCardOpenRangeSel = 0.70;
+constexpr SelectivityType kMediumCardOpenRangeSel = 0.45;
+constexpr SelectivityType kLargeCardOpenRangeSel = 0.33;
+
+// The selectivities used in the piece-wise function for closed-range intervals.
+// Note that we assume a smaller input cardinality will result in a less selective range.
+constexpr SelectivityType kSmallCardClosedRangeSel = 0.50;
+constexpr SelectivityType kMediumCardClosedRangeSel = 0.33;
+constexpr SelectivityType kLargeCardClosedRangeSel = 0.20;
+
+// Global and Local selectivity should multiply to the Complete selectivity.
+constexpr SelectivityType kDefaultCompleteGroupSel = 0.01;
+constexpr SelectivityType kDefaultLocalGroupSel = 0.02;
+constexpr SelectivityType kDefaultGlobalGroupSel = 0.5;
+
+// The following constants are the steps used in the piece-wise functions that select selectivies
+// based on input cardinality.
+constexpr CEType kSmallLimit = 20.0;
+constexpr CEType kMediumLimit = 100.0;
+
+// Assumed average number of elements in an array.
+constexpr CEType kDefaultAverageArraySize = 10.0;
+
+/**
+ * Default selectivity of equalities. To avoid super small selectivities for small
+ * cardinalities, that would result in 0 cardinality for many small inputs, the
+ * estimate is scaled as inputCard grows. The bigger inputCard, the smaller the
+ * selectivity.
+ */
+SelectivityType equalitySel(const CEType inputCard) {
+    uassert(6716604, "Zero cardinality must be handled by the caller.", inputCard > 0.0);
+    if (inputCard <= 1.0) {
+        // If the input has < 1 values, it cannot be reduced any further by a condition.
+        return 1.0;
+    }
+    return std::sqrt(inputCard) / inputCard;
+}
+
+/**
+ * Default selectivity of intervals with bounds on both ends. These intervals are
+ * considered less selective than equalities.
+ * Examples: (a > 'abc' AND a < 'hta'), (0 < b <= 13)
+ */
+SelectivityType closedRangeSel(const CEType inputCard) {
+    SelectivityType sel = kInvalidSel;
+    if (inputCard < kSmallLimit) {
+        sel = kSmallCardClosedRangeSel;
+    } else if (inputCard < kMediumLimit) {
+        sel = kMediumCardClosedRangeSel;
+    } else {
+        sel = kLargeCardClosedRangeSel;
+    }
+    return sel;
+}
+
+/**
+ * Default selectivity of intervals open on one end. These intervals are
+ * considered less selective than those with both ends specified by the user query.
+ * Examples: (a > 'xyz'), (b <= 13)
+ */
+SelectivityType openRangeSel(const CEType inputCard) {
+    SelectivityType sel = kInvalidSel;
+    if (inputCard < kSmallLimit) {
+        sel = kSmallCardOpenRangeSel;
+    } else if (inputCard < kMediumLimit) {
+        sel = kMediumCardOpenRangeSel;
+    } else {
+        sel = kLargeCardOpenRangeSel;
+    }
+    return sel;
+}
+
+mongo::sbe::value::TypeTags constType(const Constant* constBoundPtr) {
+    if (constBoundPtr == nullptr) {
+        return mongo::sbe::value::TypeTags::Nothing;
+    }
+    const auto [tag, val] = constBoundPtr->get();
+    return tag;
+}
+
+mongo::sbe::value::TypeTags boundType(const BoundRequirement& bound) {
+    return constType(bound.getBound().cast<Constant>());
+}
+
+SelectivityType intervalSel(const IntervalRequirement& interval, const CEType inputCard) {
+    SelectivityType sel = kInvalidSel;
+    if (interval.isFullyOpen()) {
+        sel = 1.0;
+    } else if (interval.isEquality()) {
+        sel = equalitySel(inputCard);
+    } else if (interval.getHighBound().isPlusInf() || interval.getLowBound().isMinusInf() ||
+               boundType(interval.getLowBound()) != boundType(interval.getHighBound())) {
+        // The interval has an actual bound only on one of it ends if:
+        // - one of the bounds is infinite, or
+        // - both bounds are of a different type - this is the case when due to type bracketing
+        //   one of the bounds is the lowest/highest value of the previous/next type.
+        // TODO: Notice that sometimes type bracketing uses a min/max value from the same type,
+        // so sometimes we may not detect an open-ended interval.
+        sel = openRangeSel(inputCard);
+    } else {
+        sel = closedRangeSel(inputCard);
+    }
+    uassert(6716603, "Invalid selectivity.", validSelectivity(sel));
+    return sel;
+}
+
+SelectivityType negationSel(SelectivityType sel) {
+    return 1.0 - sel;
+}
+
+SelectivityType operationSel(const Operations op, const CEType inputCard) {
+    switch (op) {
+        case Operations::Eq:
+            return equalitySel(inputCard);
+        case Operations::Neq:
+            return negationSel(equalitySel(inputCard));
+        case Operations::EqMember:
+            // Reached when the query has $in. We don't handle it yet.
+            return kDefaultFilterSel;
+        case Operations::Gt:
+        case Operations::Gte:
+        case Operations::Lt:
+        case Operations::Lte:
+            return openRangeSel(inputCard);
+        default:
+            MONGO_UNREACHABLE;
+    }
+}
+
+SelectivityType intervalSel(const PathCompare& left,
+                            const PathCompare& right,
+                            const CEType inputCard) {
+    if (left.op() == Operations::EqMember || right.op() == Operations::EqMember) {
+        // Reached when the query has $in. We don't handle it yet.
+        return kDefaultFilterSel;
+    }
+
+    bool lowBoundUnknown = false;
+    bool highBoundUnknown = false;
+    boost::optional<mongo::sbe::value::TypeTags> lowBoundType;
+    boost::optional<mongo::sbe::value::TypeTags> highBoundType;
+
+    for (const auto& compare : {left, right}) {
+        switch (compare.op()) {
+            case Operations::Eq: {
+                // This branch is reached when we have a conjunction of equalities on the same path.
+                uassert(6777601,
+                        "Expected conjunction of equalities.",
+                        left.op() == Operations::Eq && right.op() == Operations::Eq);
+
+                const auto leftConst = left.getVal().cast<Constant>();
+                const auto rightConst = right.getVal().cast<Constant>();
+                if (leftConst && rightConst && !(*leftConst == *rightConst)) {
+                    // Equality comparison on different constants is a contradiction.
+                    return 0.0;
+                }
+                // We can't tell if the equalities result in a contradiction or not, so we use the
+                // default equality selectivity.
+                return equalitySel(inputCard);
+            }
+            case Operations::Gt:
+            case Operations::Gte:
+                lowBoundUnknown = lowBoundUnknown || compare.getVal().is<Variable>();
+                lowBoundType = constType(compare.getVal().cast<Constant>());
+                break;
+            case Operations::Lt:
+            case Operations::Lte:
+                highBoundUnknown = highBoundUnknown || compare.getVal().is<Variable>();
+                highBoundType = constType(compare.getVal().cast<Constant>());
+                break;
+            default:
+                MONGO_UNREACHABLE;
+        }
+    }
+
+    if (lowBoundType && highBoundType &&
+        (lowBoundType == highBoundType || lowBoundUnknown || highBoundUnknown)) {
+        // Interval is closed only if:
+        // - it has low and high bounds
+        // - bounds are of the same type
+        //
+        // If bounds are of a different type, it implies that one bound is the
+        // lowest/highest value of the previous/next type and has been added for type bracketing
+        // purposes. We treat such bounds as infinity.
+        //
+        // If there are unknown boundaries (Variables), we assume that they are of the same type
+        // as the other bound.
+        //
+        // TODO: Notice that sometimes type bracketing uses a min/max value from the same type,
+        // so sometimes we may not detect an open-ended interval.
+        return closedRangeSel(inputCard);
+    }
+
+    if (lowBoundType || highBoundType) {
+        return openRangeSel(inputCard);
+    }
+
+    MONGO_UNREACHABLE;
+}
+
+/**
+ * Heuristic selectivity estimation for EvalFilter nodes. Used for estimating cardinalities of
+ * FilterNodes. The estimate is computed by traversing the tree bottom-up, applying default
+ * selectivity functions to atomic predicates (comparisons), and combining child selectivities of
+ * disjunctions and conjunctions via simple addition and multiplication.
+ */
+class EvalFilterSelectivityTransport {
+public:
+    /**
+     * Helper class for holding values passed from child to parent nodes when traversing the tree.
+     */
+    struct EvalFilterSelectivityResult {
+        // Each item represents a field in a dotted path.
+        // Collected while traversing a path expression.
+        // Used for deciding whether a conjunction of comparisons is an interval or not.
+        FieldPathType path;
+        // When handling a PathComposeM, we need to access its child comparisons which might be
+        // hidden under path expressions.
+        const PathCompare* compare;
+        // The selectivity estimate.
+        SelectivityType selectivity;
+    };
+
+    EvalFilterSelectivityResult transport(const EvalFilter& /*node*/,
+                                          CEType /*inputCard*/,
+                                          EvalFilterSelectivityResult pathResult,
+                                          EvalFilterSelectivityResult /*inputResult*/) {
+        return pathResult;
+    }
+
+    EvalFilterSelectivityResult transport(const PathGet& node,
+                                          CEType /*inputCard*/,
+                                          EvalFilterSelectivityResult childResult) {
+        childResult.path.push_back(node.name());
+        return childResult;
+    }
+
+    EvalFilterSelectivityResult transport(const PathTraverse& node,
+                                          CEType /*inputCard*/,
+                                          EvalFilterSelectivityResult childResult) {
+        return childResult;
+    }
+
+    EvalFilterSelectivityResult transport(const PathCompare& node,
+                                          CEType inputCard,
+                                          EvalFilterSelectivityResult /*childResult*/) {
+        // Note that the result will be ignored if this operation is part of an interval.
+        const SelectivityType sel = operationSel(node.op(), inputCard);
+        return {{}, &node, sel};
+    }
+
+    EvalFilterSelectivityResult transport(const PathComposeM& node,
+                                          CEType inputCard,
+                                          EvalFilterSelectivityResult leftChildResult,
+                                          EvalFilterSelectivityResult rightChildResult) {
+        const bool isInterval = leftChildResult.compare && rightChildResult.compare &&
+            leftChildResult.path == rightChildResult.path;
+
+        const SelectivityType sel = isInterval
+            ? intervalSel(*leftChildResult.compare, *rightChildResult.compare, inputCard)
+            : conjunctionSel(leftChildResult.selectivity, rightChildResult.selectivity);
+
+        return {{}, nullptr, sel};
+    }
+
+    EvalFilterSelectivityResult transport(const PathComposeA& node,
+                                          CEType /*inputCard*/,
+                                          EvalFilterSelectivityResult leftChildResult,
+                                          EvalFilterSelectivityResult rightChildResult) {
+        const SelectivityType sel =
+            disjunctionSel(leftChildResult.selectivity, rightChildResult.selectivity);
+
+        return {{}, nullptr, sel};
+    }
+
+    EvalFilterSelectivityResult transport(const UnaryOp& node,
+                                          CEType /*inputCard*/,
+                                          EvalFilterSelectivityResult childResult) {
+        switch (node.op()) {
+            case Operations::Not:
+                childResult.selectivity = negationSel(childResult.selectivity);
+                return childResult;
+            case Operations::Neg:
+                // If we see negation (-) in a UnaryOp, we ignore it for CE purposes.
+                return childResult;
+            default:
+                MONGO_UNREACHABLE;
+        }
+    }
+
+    EvalFilterSelectivityResult transport(const PathConstant& /*node*/,
+                                          CEType /*inputCard*/,
+                                          EvalFilterSelectivityResult childResult) {
+        return childResult;
+    }
+
+    EvalFilterSelectivityResult transport(const PathDefault& node,
+                                          CEType inputCard,
+                                          EvalFilterSelectivityResult childResult) {
+        if (node.getDefault() == Constant::boolean(false)) {
+            // We have a {$exists: true} predicate on this path if we have a Constant[false] child
+            // here. Note that ${exists: false} is handled by the presence of a negation expression
+            // higher in the ABT.
+            childResult.selectivity = kDefaultExistsSel;
+        }
+        return childResult;
+    }
+
+    template <typename T, typename... Ts>
+    EvalFilterSelectivityResult transport(const T& /*node*/, Ts&&...) {
+        return {{}, nullptr, kDefaultFilterSel};
+    }
+
+    static SelectivityType derive(const CEType inputCard, const ABT::reference_type ref) {
+        EvalFilterSelectivityTransport instance;
+        const auto result = algebra::transport<false>(ref, instance, inputCard);
+        return result.selectivity;
+    }
+
+private:
+    SelectivityType negationSel(const SelectivityType in) {
+        return 1.0 - in;
+    }
+
+    SelectivityType conjunctionSel(const SelectivityType left, const SelectivityType right) {
+        return left * right;
+    }
+
+    SelectivityType disjunctionSel(const SelectivityType left, const SelectivityType right) {
+        // We sum the selectivities and subtract the overlapping part so that it's only counted
+        // once.
+        return left + right - left * right;
+    }
+};
+
+class HeuristicTransport {
+public:
+    CEType transport(const ScanNode& node, CEType /*bindResult*/) {
+        // Default cardinality estimate.
+        const CEType metadataCE = _metadata._scanDefs.at(node.getScanDefName()).getCE();
+        return (metadataCE < 0.0) ? kDefaultCard : metadataCE;
+    }
+
+    CEType transport(const ValueScanNode& node, CEType /*bindResult*/) {
+        return node.getArraySize();
+    }
+
+    CEType transport(const MemoLogicalDelegatorNode& node) {
+        return properties::getPropertyConst<properties::CardinalityEstimate>(
+                   _memo.getLogicalProps(node.getGroupId()))
+            .getEstimate();
+    }
+
+    CEType transport(const FilterNode& node, CEType childResult, CEType /*exprResult*/) {
+        if (childResult == 0.0) {
+            // Early out and return 0 since we don't expect to get more results.
+            return 0.0;
+        }
+        if (node.getFilter() == Constant::boolean(true)) {
+            // Trivially true filter.
+            return childResult;
+        }
+        if (node.getFilter() == Constant::boolean(false)) {
+            // Trivially false filter.
+            return 0.0;
+        }
+
+        const SelectivityType sel =
+            EvalFilterSelectivityTransport::derive(childResult, node.getFilter().ref());
+
+        return std::max(sel * childResult, kMinCard);
+    }
+
+    CEType transport(const EvaluationNode& node, CEType childResult, CEType /*exprResult*/) {
+        // Evaluations do not change cardinality.
+        return childResult;
+    }
+
+    CEType transport(const SargableNode& node,
+                     CEType childResult,
+                     CEType /*bindsResult*/,
+                     CEType /*refsResult*/) {
+        // Early out and return 0 since we don't expect to get more results.
+        if (childResult == 0.0) {
+            return 0.0;
+        }
+
+        SelectivityType topLevelSel = 1.0;
+        std::vector<SelectivityType> topLevelSelectivities;
+        for (const auto& [key, req] : node.getReqMap()) {
+            if (req.getIsPerfOnly()) {
+                // Ignore perf-only requirements.
+                continue;
+            }
+
+            SelectivityType disjSel = 1.0;
+            std::vector<SelectivityType> disjSelectivities;
+            // Intervals are in DNF.
+            const auto intervalDNF = req.getIntervals();
+            const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes();
+            for (const auto& disjunct : disjuncts) {
+                const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes();
+                SelectivityType conjSel = 1.0;
+                std::vector<SelectivityType> conjSelectivities;
+                for (const auto& conjunct : conjuncts) {
+                    const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr();
+                    const SelectivityType sel = intervalSel(interval, childResult);
+                    conjSelectivities.push_back(sel);
+                }
+                conjSel = conjExponentialBackoff(std::move(conjSelectivities));
+                disjSelectivities.push_back(conjSel);
+            }
+            disjSel = disjExponentialBackoff(std::move(disjSelectivities));
+            topLevelSelectivities.push_back(disjSel);
+        }
+
+        if (topLevelSelectivities.empty()) {
+            return 1.0;
+        }
+        // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
+        topLevelSel = conjExponentialBackoff(std::move(topLevelSelectivities));
+        CEType card = std::max(topLevelSel * childResult, kMinCard);
+        uassert(6716602, "Invalid cardinality.", validCardinality(card));
+        return card;
+    }
+
+    CEType transport(const RIDIntersectNode& node,
+                     CEType /*leftChildResult*/,
+                     CEType /*rightChildResult*/) {
+        // CE for the group should already be derived via the underlying Filter or Evaluation
+        // logical nodes.
+        uasserted(6624038, "Should not be necessary to derive CE for RIDIntersectNode");
+    }
+
+    CEType transport(const RIDUnionNode& node,
+                     CEType /*leftChildResult*/,
+                     CEType /*rightChildResult*/) {
+        // CE for the group should already be derived via the underlying Filter or Evaluation
+        // logical nodes.
+        uasserted(7016301, "Should not be necessary to derive CE for RIDUnionNode");
+    }
+
+    CEType transport(const BinaryJoinNode& node,
+                     CEType leftChildResult,
+                     CEType rightChildResult,
+                     CEType /*exprResult*/) {
+        const auto& filter = node.getFilter();
+
+        SelectivityType selectivity = kDefaultFilterSel;
+        if (filter == Constant::boolean(false)) {
+            selectivity = 0.0;
+        } else if (filter == Constant::boolean(true)) {
+            selectivity = 1.0;
+        }
+        return leftChildResult * rightChildResult * selectivity;
+    }
+
+    CEType transport(const UnionNode& node,
+                     std::vector<CEType> childResults,
+                     CEType /*bindResult*/,
+                     CEType /*refsResult*/) {
+        // Combine the CE of each child.
+        CEType result = 0;
+        for (auto&& child : childResults) {
+            result += child;
+        }
+        return result;
+    }
+
+    CEType transport(const GroupByNode& node,
+                     CEType childResult,
+                     CEType /*bindAggResult*/,
+                     CEType /*refsAggResult*/,
+                     CEType /*bindGbResult*/,
+                     CEType /*refsGbResult*/) {
+        // TODO: estimate number of groups.
+        switch (node.getType()) {
+            case GroupNodeType::Complete:
+                return kDefaultCompleteGroupSel * childResult;
+
+            // Global and Local selectivity should multiply to Complete selectivity.
+            case GroupNodeType::Global:
+                return kDefaultGlobalGroupSel * childResult;
+            case GroupNodeType::Local:
+                return kDefaultLocalGroupSel * childResult;
+
+            default:
+                MONGO_UNREACHABLE;
+        }
+    }
+
+    CEType transport(const UnwindNode& node,
+                     CEType childResult,
+                     CEType /*bindResult*/,
+                     CEType /*refsResult*/) {
+        return kDefaultAverageArraySize * childResult;
+    }
+
+    CEType transport(const CollationNode& node, CEType childResult, CEType /*refsResult*/) {
+        // Collations do not change cardinality.
+        return childResult;
+    }
+
+    CEType transport(const LimitSkipNode& node, CEType childResult) {
+        const auto limit = node.getProperty().getLimit();
+        const auto skip = node.getProperty().getSkip();
+        const auto cardAfterSkip = std::max(childResult - skip, 0.0);
+        if (limit < cardAfterSkip) {
+            return limit;
+        }
+        return cardAfterSkip;
+    }
+
+    CEType transport(const ExchangeNode& node, CEType childResult, CEType /*refsResult*/) {
+        // Exchanges do not change cardinality.
+        return childResult;
+    }
+
+    CEType transport(const RootNode& node, CEType childResult, CEType /*refsResult*/) {
+        // Root node does not change cardinality.
+        return childResult;
+    }
+
+    /**
+     * Other ABT types.
+     */
+    template <typename T, typename... Ts>
+    CEType transport(const T& /*node*/, Ts&&...) {
+        static_assert(!canBeLogicalNode<T>(), "Logical node must implement its CE derivation.");
+        return 0.0;
+    }
+
+    static CEType derive(const Metadata& metadata,
+                         const cascades::Memo& memo,
+                         const ABT::reference_type logicalNodeRef) {
+        HeuristicTransport instance(metadata, memo);
+        return algebra::transport<false>(logicalNodeRef, instance);
+    }
+
+private:
+    HeuristicTransport(const Metadata& metadata, const cascades::Memo& memo)
+        : _metadata(metadata), _memo(memo) {}
+
+    // We don't own this.
+    const Metadata& _metadata;
+    const cascades::Memo& _memo;
+};
+
+CEType HeuristicEstimator::deriveCE(const Metadata& metadata,
+                                    const cascades::Memo& memo,
+                                    const properties::LogicalProps& /*logicalProps*/,
+                                    const ABT::reference_type logicalNodeRef) const {
+    return HeuristicTransport::derive(metadata, memo, logicalNodeRef);
+}
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/heuristic_estimator.h b/src/mongo/db/query/ce/heuristic_estimator.h
new file mode 100644
index 00000000000..0cfef17d6c2
--- /dev/null
+++ b/src/mongo/db/query/ce/heuristic_estimator.h
@@ -0,0 +1,49 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/optimizer/cascades/interfaces.h"
+
+namespace mongo::optimizer::ce {
+
+/**
+ * Default cardinality estimation in the absence of statistics.
+ * Relies purely on heuristics.
+ * We currently do not use logical properties for heuristic ce.
+ */
+class HeuristicEstimator : public cascades::CardinalityEstimator {
+public:
+    CEType deriveCE(const Metadata& metadata,
+                    const cascades::Memo& memo,
+                    const properties::LogicalProps& /*logicalProps*/,
+                    ABT::reference_type logicalNodeRef) const override final;
+};
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/heuristic_estimator_test.cpp b/src/mongo/db/query/ce/heuristic_estimator_test.cpp
new file mode 100644
index 00000000000..f92f63edde9
--- /dev/null
+++ b/src/mongo/db/query/ce/heuristic_estimator_test.cpp
@@ -0,0 +1,978 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include <string>
+
+#include "mongo/db/query/ce/heuristic_estimator.h"
+#include "mongo/db/query/ce/test_utils.h"
+#include "mongo/db/query/optimizer/cascades/logical_props_derivation.h"
+#include "mongo/db/query/optimizer/cascades/memo.h"
+#include "mongo/db/query/optimizer/defs.h"
+#include "mongo/db/query/optimizer/explain.h"
+#include "mongo/db/query/optimizer/metadata.h"
+#include "mongo/db/query/optimizer/opt_phase_manager.h"
+#include "mongo/db/query/optimizer/props.h"
+#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/db/query/optimizer/utils/utils.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::optimizer::ce {
+namespace {
+constexpr double kCollCard = 10000.0;
+const std::string collName = "test";
+
+class HeuristicCETester : public CETester {
+public:
+    HeuristicCETester(std::string collName,
+                      const OptPhaseManager::PhaseSet& optPhases = kDefaultCETestPhaseSet)
+        : CETester(collName, kCollCard, optPhases) {}
+
+protected:
+    std::unique_ptr<cascades::CardinalityEstimator> getEstimator() const override {
+        return std::make_unique<HeuristicEstimator>();
+    }
+};
+
+TEST(CEHeuristicTest, CEWithoutOptimizationGtLtNum) {
+    std::string query = "{a0 : {$gt : 14, $lt : 21}}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE(ht, query, 1089.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationEqNum) {
+    std::string query = "{a: 123}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationEqStr) {
+    std::string query = "{a: 'foo'}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationGtNum) {
+    std::string query = "{a: {$gt: 44}}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationGtStr) {
+    std::string query = "{a: {$gt: 'foo'}}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationLtNum) {
+    std::string query = "{a: {$lt: 44}}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 330.0, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationDNF1pathSimple) {
+    std::string query =
+        "{$or: ["
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 44}}]}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.6591, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 36.0354, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 205.941, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj1) {
+    std::string query =
+        "{$or: ["
+        "{a: {$lt: 3}},"
+        "{$and: [{b: {$gt:5}}, {c: {$lt: 10}}]}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 7.623, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 55.5761, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 402.963, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj2) {
+    std::string query =
+        "{$and: ["
+        "{a: {$lt: 3}},"
+        "{$or: [{b: {$gt:5}}, {b: {$lt: 10}}]}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 5.733, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 31.0736, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 181.863, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj3) {
+    std::string query =
+        "{$and: ["
+        "{$and: [{a: {$gt: 5}}, {a: {$lt: 10}}]},"
+        "{$and: ["
+        "   {b: {$gt: 15}},"
+        "   {c: {$lt: 110}},"
+        "   {$or: [{a1: 1}, {b1: 2}, {c1: 3}]}"
+        "]}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.52063, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 4.15975, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 9.11877, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationNestedConjAndDisj4) {
+    std::string query =
+        "{$or: ["
+        "{$or: [{a: {$gt: 5}}, {a: {$lt: 10}}]},"
+        "{$or: ["
+        "   {b: {$gt: 15}},"
+        "   {c: {$lt: 110}},"
+        "   {$and: [{a1: 1}, {b1: 2}, {c1: 3}]}"
+        "]}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 8.9298, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 89.9501, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 798.495, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationTraverseSelectivityDoesNotAccumulate) {
+    std::string query =
+        "{$or: ["
+        "{a0: 1},"
+        "{a0: {$lt: -4}},"
+        "{b0: {$gt: 10}}"
+        "]}";
+    std::string queryWithLongPaths =
+        "{$or: ["
+        "{'a0.a1.a2.a3.a4.a5.a6.a7.a8.a9': 1},"
+        "{'a0.a1.a2.a3.a4.a5.a6.a7.a8.a9': {$lt: -4}},"
+        "{'b0.b1.b3': {$gt: 10}}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    auto ce1 = ht.getMatchCE(query);
+    auto ce2 = ht.getMatchCE(queryWithLongPaths);
+    ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnSameValue) {
+    std::string query =
+        "{$or: ["
+        "{a: 1},"
+        "{$and: [{a: 2}, {a: 2}]}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 5.0, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 18.8997, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 62.2456, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithEqOnDifferentValues) {
+    std::string query =
+        "{$or: ["
+        "{a: 1},"
+        "{$and: [{a: 2}, {a: 3}]}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.0, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 9.94987, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 31.6228, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionWithIn) {
+    std::string query =
+        "{$or: ["
+        "{a: 1},"
+        "{$and: [{a: 2}, {a: {$in: [2, 3, 4]}}]}"
+        "]}";
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    // Estimation for $in is not implemented yet, so we assume it has the default filter selectivity
+    // of 0.1.
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.6, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 18.8549, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 128.46, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationOneLowBoundWithoutTraverse) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(make<PathGet>("a", make<PathCompare>(Operations::Gt, Constant::int64(42))),
+                         make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationOneHighBoundWithoutTraverse) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(make<PathGet>("a", make<PathCompare>(Operations::Lt, Constant::int64(42))),
+                         make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationTwoLowBoundsWithoutTraverse) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(make<PathGet>("a",
+                                       make<PathComposeM>(
+                                           make<PathCompare>(Operations::Gt, Constant::int64(5)),
+                                           make<PathCompare>(Operations::Gt, Constant::int64(10)))),
+                         make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationTwoHighBoundsWithoutTraverse) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(make<PathGet>("a",
+                                       make<PathComposeM>(
+                                           make<PathCompare>(Operations::Lt, Constant::int64(5)),
+                                           make<PathCompare>(Operations::Lt, Constant::int64(10)))),
+                         make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationClosedRangeWithoutTraverse) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(make<PathGet>("a",
+                                       make<PathComposeM>(
+                                           make<PathCompare>(Operations::Gt, Constant::int64(7)),
+                                           make<PathCompare>(Operations::Lt, Constant::int64(13)))),
+                         make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 1.5, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 3.5, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 5.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 20.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 2000.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationIntervalWithDifferentTypes) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(
+            make<PathGet>(
+                "a",
+                make<PathComposeM>(make<PathCompare>(Operations::Gt, Constant::int64(5)),
+                                   make<PathCompare>(Operations::Lt, Constant::str("foo")))),
+            make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationClosedRangeWithPathExpr) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(
+            make<PathComposeM>(
+                make<PathGet>(
+                    "a0",
+                    make<PathTraverse>(
+                        make<PathGet>("a1",
+                                      make<PathTraverse>(
+                                          make<PathCompare>(Operations::Gt, Constant::int64(5)),
+                                          PathTraverse::kSingleLevel)),
+                        PathTraverse::kSingleLevel)),
+                make<PathGet>(
+                    "a0",
+                    make<PathTraverse>(
+                        make<PathGet>("a1",
+                                      make<PathTraverse>(
+                                          make<PathCompare>(Operations::Lt, Constant::int64(10)),
+                                          PathTraverse::kSingleLevel)),
+                        PathTraverse::kSingleLevel))),
+            make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 1.5, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 3.5, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 5.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 20.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 2000.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationClosedRangeWith1Variable) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(
+            make<PathComposeM>(
+                make<PathGet>(
+                    "a0",
+                    make<PathTraverse>(
+                        make<PathGet>("a1",
+                                      make<PathTraverse>(
+                                          make<PathCompare>(Operations::Gt, Constant::int64(5)),
+                                          PathTraverse::kSingleLevel)),
+                        PathTraverse::kSingleLevel)),
+                make<PathGet>(
+                    "a0",
+                    make<PathTraverse>(
+                        make<PathGet>("a1",
+                                      make<PathTraverse>(
+                                          make<PathCompare>(Operations::Lt, make<Variable>("test")),
+                                          PathTraverse::kSingleLevel)),
+                        PathTraverse::kSingleLevel))),
+            make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 1.5, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 3.5, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 5.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 20.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 2000.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationOpenRangeWith1Variable) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(
+            make<PathComposeM>(
+                make<PathGet>(
+                    "a0",
+                    make<PathTraverse>(
+                        make<PathGet>("a1",
+                                      make<PathTraverse>(
+                                          make<PathCompare>(Operations::Lt, Constant::int64(5)),
+                                          PathTraverse::kSingleLevel)),
+                        PathTraverse::kSingleLevel)),
+                make<PathGet>(
+                    "a0",
+                    make<PathTraverse>(
+                        make<PathGet>("a1",
+                                      make<PathTraverse>(
+                                          make<PathCompare>(Operations::Lt, make<Variable>("test")),
+                                          PathTraverse::kSingleLevel)),
+                        PathTraverse::kSingleLevel))),
+            make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 2.1, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 4.9, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 7.0, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 33.0, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 3300.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationConjunctionOfBoundsWithDifferentPaths) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(
+            make<PathComposeM>(
+                make<PathGet>(
+                    "a0",
+                    make<PathTraverse>(
+                        make<PathGet>("a1",
+                                      make<PathTraverse>(
+                                          make<PathCompare>(Operations::Gt, Constant::int64(5)),
+                                          PathTraverse::kSingleLevel)),
+                        PathTraverse::kSingleLevel)),
+                make<PathGet>(
+                    "b0",
+                    make<PathTraverse>(
+                        make<PathGet>("b1",
+                                      make<PathTraverse>(
+                                          make<PathCompare>(Operations::Lt, Constant::int64(10)),
+                                          PathTraverse::kSingleLevel)),
+                        PathTraverse::kSingleLevel))),
+            make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 1.47, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 3.43, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 4.9, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 10.89, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 1089.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationDisjunctionOnSamePathWithoutTraverse) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(
+            make<PathComposeA>(
+                make<PathGet>(
+                    "a0",
+                    make<PathGet>("a1", make<PathCompare>(Operations::Gt, Constant::int64(5)))),
+                make<PathGet>(
+                    "a0",
+                    make<PathGet>("a1", make<PathCompare>(Operations::Eq, Constant::int64(100))))),
+            make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 2.61962, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 5.69373, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 7.94868, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 39.7, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 3367.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationDisjunctionOnDifferentPathsWithoutTraverse) {
+    ABT scanNode = make<ScanNode>("test", "test");
+
+    ABT filterNode = make<FilterNode>(
+        make<EvalFilter>(
+            make<PathComposeA>(
+                make<PathGet>(
+                    "a0",
+                    make<PathGet>("a1", make<PathCompare>(Operations::Gt, Constant::int64(5)))),
+                make<PathGet>(
+                    "b0",
+                    make<PathGet>("b1", make<PathCompare>(Operations::Eq, Constant::int64(100))))),
+            make<Variable>("test")),
+        std::move(scanNode));
+
+    ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+                                  std::move(filterNode));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ASSERT_CE_CARD(ht, rootNode, 0.0, 0.0);
+    ASSERT_CE_CARD(ht, rootNode, 2.61962, 3.0);
+    ASSERT_CE_CARD(ht, rootNode, 5.69373, 7.0);
+    ASSERT_CE_CARD(ht, rootNode, 7.94868, 10.0);
+    ASSERT_CE_CARD(ht, rootNode, 39.7, 100.0);
+    ASSERT_CE_CARD(ht, rootNode, 3367.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEWithoutOptimizationEquivalentConjunctions) {
+    ABT rootNode1 = make<RootNode>(
+        properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+        make<FilterNode>(
+            make<EvalFilter>(
+                make<PathComposeM>(
+                    make<PathGet>(
+                        "a0",
+                        make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(5)),
+                                           PathTraverse::kSingleLevel)),
+                    make<PathGet>(
+                        "b0",
+                        make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(10)),
+                                           PathTraverse::kSingleLevel))),
+                make<Variable>("test")),
+            make<ScanNode>("test", "test")));
+
+    ABT rootNode2 = make<RootNode>(
+        properties::ProjectionRequirement{ProjectionNameVector{"test"}},
+        make<FilterNode>(
+            make<EvalFilter>(make<PathGet>("a0",
+                                           make<PathTraverse>(make<PathCompare>(Operations::Gt,
+                                                                                Constant::int64(5)),
+                                                              PathTraverse::kSingleLevel)),
+                             make<Variable>("test")),
+            make<FilterNode>(
+                make<EvalFilter>(
+                    make<PathGet>(
+                        "b0",
+                        make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(10)),
+                                           PathTraverse::kSingleLevel)),
+                    make<Variable>("test")),
+                make<ScanNode>("test", "test"))));
+
+    HeuristicCETester ht(collName, kNoOptPhaseSet);
+    ht.setCollCard(kCollCard);
+    auto ce1 = ht.getCE(rootNode1);
+    auto ce2 = ht.getCE(rootNode2);
+    ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Eq) {
+    std::string query = "{a : 123}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.1, 0.1);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Gt) {
+    std::string query = "{a: {$gt: 44}}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.01, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.7, 1.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 330, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Gt_Lt) {
+    std::string query = "{a: {$gt: 44, $lt: 99}}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.585662, 1.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 5.27096, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 29.885, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 189.571, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_AND2Eq) {
+    std::string query = "{a : 13, b : 42}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.31607, 3.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.62658, 7.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.77828, 10.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 100.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_AND3Eq) {
+    std::string query = "{a : 13, b : 42, c : 69}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.1472, 3.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.27537, 7.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.33352, 10.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.77828, 100.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR1path) {
+    std::string query = "{$or: [{a0: {$gt: 44}}, {a0: {$lt: 9}}]}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 7.52115, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 58.6188, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 451.581, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR2paths) {
+    std::string query = "{$or: [{a0: {$gt:44}}, {b0: {$lt: 9}}]}";
+    HeuristicCETester ht(collName, kOnlySubPhaseSet);
+    // Disjunctions on different paths are not SARGable.
+    ASSERT_MATCH_CE_CARD(ht, query, 8.19, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 69.0525, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 551.1, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathSimple) {
+    std::string query =
+        "{$or: ["
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 44}}]}"
+        "]}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.42792, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 37.0586, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 225.232, 1000.0);
+}
+
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathComplex) {
+    HeuristicCETester ht(collName, kOnlySubPhaseSet);
+    // Each disjunct has different number of conjuncts,
+    // so that its selectivity is different. We need 5 disjuncts to test exponential backoff which
+    // cuts off at the first 4. The conjuncts are in selectivity order.
+    std::string query1 =
+        "{$or: ["
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+        "81}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+        "81}}, {a0: {$lt: 77}}]}"
+        "]}";
+    auto ce1 = ht.getMatchCE(query1);
+    // The conjuncts are in inverse selectivity order.
+    std::string query2 =
+        "{$or: ["
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+        "81}}, {a0: {$lt: 77}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+        "81}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}]},"
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]}"
+        "]}";
+    auto ce2 = ht.getMatchCE(query2);
+    ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF2paths) {
+    std::string query =
+        "{$or: ["
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+        "{$and: [{b0: {$gt:40}}, {b0: {$lt: 44}}]}"
+        "]}";
+    HeuristicCETester ht(collName, kOnlySubPhaseSet);
+    // Disjunctions on different paths are not SARGable.
+    ASSERT_MATCH_CE_CARD(ht, query, 6.6591, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 36.0354, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 205.941, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF1path) {
+    std::string query =
+        "{$and : ["
+        "{$or : [ {a0 : {$gt : 11}}, {a0 : {$lt : 44}} ]},"
+        "{$or : [ {a0 : {$gt : 77}}, {a0 : {$eq : 51}} ]}"
+        "]}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.21212, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 36.4418, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 228.935, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF2paths) {
+    std::string query =
+        "{$and : ["
+        "{$or : [ {a0 : {$gt : 11}}, {a0 : {$lt : 44}} ]},"
+        "{$or : [ {b0 : {$gt : 77}}, {b0 : {$eq : 51}} ]}"
+        "]}";
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.21212, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 36.4418, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 228.935, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionExplorationPhases) {
+    HeuristicCETester ht(collName);
+    ASSERT_MATCH_CE(ht, "{a : 13, b : 42}", 10.0);
+}
+
+TEST(CEHeuristicTest, CENotEquality) {
+    double collCard = kCollCard;
+    HeuristicCETester opt(collName);
+
+    // We avoid optimizing in order to verify heuristic estimate of FilterNode subtree. Note that we
+    // do not generate SargableNodes for $not predicates, but we do generate SargableNodes without
+    // it; for the purposes of this test, we want to demonstrate that $not returns the inverse of
+    // the FilterNode estimate.
+    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
+
+    // Equality selectivity is sqrt(kCollCard)/kCollCard = 0.01. When we see a UnaryOp [Not] above
+    // this subtree, we invert the selectivity 1.0 - 0.01 = 0.99.
+    double ce = 100.0;
+    double inverseCE = collCard - ce;
+    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
+
+    // Update cardinality to 25.
+    collCard = 25;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    // Selectivity is sqrt(25)/25.
+    ce = 5.0;
+    inverseCE = collCard - ce;
+    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
+
+    // Update cardinality to 9.
+    collCard = 9;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    // Selectivity is sqrt(3)/9.
+    ce = 3.0;
+    inverseCE = collCard - ce;
+    ASSERT_MATCH_CE(noOpt, "{a: {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$eq: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$eq: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$eq: 1}}}", inverseCE);
+}
+
+TEST(CEHeuristicTest, CENotOpenRange) {
+    // Repeat the above test for open ranges; the $not cardinality estimate should add up with the
+    // non-$not estimate to the collection cardinality.
+    double collCard = kCollCard;
+    HeuristicCETester opt(collName);
+    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
+
+    // Expect open-range selectivity for input card > 100 (0.33).
+    double ce = 3300;
+    double inverseCE = collCard - ce;
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
+
+    // Update cardinality to 25.
+    collCard = 25;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    // Expect open-range selectivity for input card in range (20, 100) (0.45).
+    ce = 11.25;
+    inverseCE = collCard - ce;
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
+
+    // Update cardinality to 10.
+    collCard = 10.0;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    // Expect open-range selectivity for input card < 20 (0.70).
+    ce = 7.0;
+    inverseCE = collCard - ce;
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$lt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$lte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$lte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 1}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 1}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 1}}}", inverseCE);
+}
+
+TEST(CEHeuristicTest, CENotClosedRange) {
+    // Repeat the above test for closed ranges; the $not cardinality estimate should add up with the
+    // non-$not estimate to the collection cardinality.
+    double collCard = kCollCard;
+    double ce = 1089.0;
+    double inverseCE = collCard - ce;
+    HeuristicCETester opt(collName);
+    HeuristicCETester noOpt(collName, kNoOptPhaseSet);
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+
+    /*
+     * Update cardinality to 25. Here we observe an interesting edge case where the estimated
+     * cardinality is not the inverse of the actual cardinality.
+     *
+     * Consider the predicate {a: {$gt: 10, $lt: 20}}. This generates two FilterNodes stacked on top
+     * of each other. However, the predicate {a: {$not: {$gt: 10, $lt: 20}}} generates just one
+     * FilterNode.
+     *
+     * We always use input cardinality to determine which interval selectivity we're going to use.
+     * However, we have a different input cardinality for the one FilterNode case (collCard) than
+     * for the two FilterNodes case: the first node gets collCard, and the second node gets a
+     * smaller value after the selectivity of the first filter is applied.
+     *
+     * Because we use a piecewise function to pick the selectivity, and because we go from inputCard
+     * < 100 to inputCard < 20, we choose different selectivities for the intervals in the second
+     * FilterNode (0.50) than in the first (0.33).
+     */
+    collCard = 25;
+    ce = 7.875;
+    inverseCE = 19.9375;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+
+    // Update cardinality to 10.
+    collCard = 10.0;
+    ce = 4.9;
+    inverseCE = collCard - ce;
+    opt.setCollCard(collCard);
+    noOpt.setCollCard(collCard);
+
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gte: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gte: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{a: {$gt: 10, $lte: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{a: {$not: {$gt: 10, $lte: 20}}}", inverseCE);
+    ASSERT_MATCH_CE(noOpt, "{'validate.long.path.estimate': {$gte: 10, $lt: 20}}", ce);
+    ASSERT_MATCH_CE(opt, "{'validate.long.path.estimate': {$not: {$gte: 10, $lt: 20}}}", inverseCE);
+}
+
+TEST(CEHeuristicTest, CEExists) {
+    HeuristicCETester noOpt(collName);
+
+    // Test basic case + $not.
+    ASSERT_MATCH_CE(noOpt, "{a: {$exists: true}}", 7000);
+    ASSERT_MATCH_CE(noOpt, "{a: {$exists: false}}", 3000);
+    ASSERT_MATCH_CE(noOpt, "{a: {$not: {$exists: false}}}", 7000);
+    ASSERT_MATCH_CE(noOpt, "{a: {$not: {$exists: true}}}", 3000);
+
+    // Test combinations of predicates.
+    ASSERT_MATCH_CE(noOpt, "{a: {$exists: true, $eq: 123}}", 70);
+    ASSERT_MATCH_CE(noOpt, "{a: {$exists: false, $eq: null}}", 30);
+    ASSERT_MATCH_CE(noOpt, "{a: {$exists: false}, b: {$eq: 123}}", 30);
+    ASSERT_MATCH_CE(noOpt, "{a: {$exists: true, $gt: 123}}", 2310);
+}
+
+}  // namespace
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/hinted_estimator.cpp b/src/mongo/db/query/ce/hinted_estimator.cpp
new file mode 100644
index 00000000000..b27381268b8
--- /dev/null
+++ b/src/mongo/db/query/ce/hinted_estimator.cpp
@@ -0,0 +1,100 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/hinted_estimator.h"
+
+#include "mongo/db/query/ce/heuristic_estimator.h"
+
+namespace mongo::optimizer::ce {
+class HintedTransport {
+public:
+    CEType transport(const ABT& n,
+                     const SargableNode& node,
+                     CEType childResult,
+                     CEType /*bindsResult*/,
+                     CEType /*refsResult*/) {
+        CEType result = childResult;
+        for (const auto& [key, req] : node.getReqMap()) {
+            if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
+                auto it = _hints.find(key);
+                if (it != _hints.cend()) {
+                    // Assume independence.
+                    result *= it->second;
+                }
+            }
+        }
+
+        return result;
+    }
+
+    template <typename T, typename... Ts>
+    CEType transport(const ABT& n, const T& /*node*/, Ts&&...) {
+        if (canBeLogicalNode<T>()) {
+            return _heuristicCE.deriveCE(_metadata, _memo, _logicalProps, n.ref());
+        }
+        return 0.0;
+    }
+
+    static CEType derive(const Metadata& metadata,
+                         const cascades::Memo& memo,
+                         const PartialSchemaSelHints& hints,
+                         const properties::LogicalProps& logicalProps,
+                         const ABT::reference_type logicalNodeRef) {
+        HintedTransport instance(metadata, memo, logicalProps, hints);
+        return algebra::transport<true>(logicalNodeRef, instance);
+    }
+
+private:
+    HintedTransport(const Metadata& metadata,
+                    const cascades::Memo& memo,
+                    const properties::LogicalProps& logicalProps,
+                    const PartialSchemaSelHints& hints)
+        : _heuristicCE(),
+          _metadata(metadata),
+          _memo(memo),
+          _logicalProps(logicalProps),
+          _hints(hints) {}
+
+    HeuristicEstimator _heuristicCE;
+
+    // We don't own this.
+    const Metadata& _metadata;
+    const cascades::Memo& _memo;
+    const properties::LogicalProps& _logicalProps;
+    const PartialSchemaSelHints& _hints;
+};
+
+CEType HintedEstimator::deriveCE(const Metadata& metadata,
+                                 const cascades::Memo& memo,
+                                 const properties::LogicalProps& logicalProps,
+                                 const ABT::reference_type logicalNodeRef) const {
+    return HintedTransport::derive(metadata, memo, _hints, logicalProps, logicalNodeRef);
+}
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/hinted_estimator.h b/src/mongo/db/query/ce/hinted_estimator.h
new file mode 100644
index 00000000000..766a1a1f03c
--- /dev/null
+++ b/src/mongo/db/query/ce/hinted_estimator.h
@@ -0,0 +1,57 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/optimizer/cascades/interfaces.h"
+
+namespace mongo::optimizer::ce {
+
+using PartialSchemaSelHints =
+    std::map<PartialSchemaKey, SelectivityType, PartialSchemaKeyLessComparator>;
+
+/**
+ * Estimation based on hints. The hints are organized in a PartialSchemaSelHints structure.
+ * SargableNodes are estimated based on the matching PartialSchemaKeys.
+ */
+class HintedEstimator : public cascades::CardinalityEstimator {
+public:
+    HintedEstimator(PartialSchemaSelHints hints) : _hints(std::move(hints)) {}
+
+    CEType deriveCE(const Metadata& metadata,
+                    const cascades::Memo& memo,
+                    const properties::LogicalProps& logicalProps,
+                    ABT::reference_type logicalNodeRef) const override final;
+
+private:
+    // Selectivity hints per PartialSchemaKey.
+    PartialSchemaSelHints _hints;
+};
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/histogram_array_data_test.cpp b/src/mongo/db/query/ce/histogram_array_data_test.cpp
new file mode 100644
index 00000000000..7f8bb92fc51
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram_array_data_test.cpp
@@ -0,0 +1,298 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include <vector>
+
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/ce/test_utils.h"
+#include "mongo/db/query/query_test_service_context.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::optimizer::ce {
+namespace {
+namespace value = sbe::value;
+
+using stats::ArrayHistogram;
+using stats::ScalarHistogram;
+using stats::TypeCounts;
+
+/**
+ * Structure representing a range query and its estimated and actual cardinalities.
+ * Used to record hand-crafted queries over a pre-generated dataset.
+ */
+struct QuerySpec {
+    // Low bound of the query range.
+    int32_t low;
+    // Upper bound of the query range.
+    int32_t high;
+    // Estimated cardinality of $match query.
+    double estMatch;
+    // Actual cardinality of $match query.
+    double actMatch;
+    // Estimated cardinality of $elemMatch query.
+    double estElemMatch;
+    // Actual cardinality of $elemMatch query.
+    double actElemMatch;
+};
+
+static std::pair<double, double> computeErrors(size_t actualCard, double estimatedCard) {
+    double error = estimatedCard - actualCard;
+    double relError = (actualCard == 0) ? (estimatedCard == 0 ? 0.0 : -1.0) : error / actualCard;
+    return std::make_pair(error, relError);
+}
+
+static std::string serializeQuery(QuerySpec& q, bool isElemMatch) {
+    std::ostringstream os;
+    os << "{$match: {a: {";
+    if (isElemMatch) {
+        os << "$elemMatch: {";
+    }
+    os << "$gt: " << q.low;
+    os << ", $lt: " << q.high;
+    if (isElemMatch) {
+        os << "}";
+    }
+    os << "}}}\n";
+    return os.str();
+}
+
+static std::string computeRMSE(std::vector<QuerySpec>& querySet, bool isElemMatch) {
+    double rms = 0.0, relRms = 0.0, meanAbsSelErr = 0.0;
+    size_t trialSize = querySet.size();
+    const size_t dataSize = 1000;
+
+    std::ostringstream os;
+    os << "\nQueries:\n";
+    for (auto& q : querySet) {
+        double estimatedCard = isElemMatch ? q.estElemMatch : q.estMatch;
+        double actualCard = isElemMatch ? q.actElemMatch : q.actMatch;
+
+        auto [error, relError] = computeErrors(actualCard, estimatedCard);
+        rms += error * error;
+        relRms += relError * relError;
+        meanAbsSelErr += std::abs(error);
+        os << serializeQuery(q, isElemMatch);
+        os << "Estimated: " << estimatedCard << " Actual " << actualCard << " (Error: " << error
+           << " RelError: " << relError << ")\n\n";
+    }
+    rms = std::sqrt(rms / trialSize);
+    relRms = std::sqrt(relRms / trialSize);
+    meanAbsSelErr /= (trialSize * dataSize);
+
+    os << "=====" << (isElemMatch ? " ElemMatch errors: " : "Match errors:") << "=====\n";
+    os << "RMSE : " << rms << " RelRMSE : " << relRms
+       << " MeanAbsSelectivityError: " << meanAbsSelErr << std::endl;
+    return os.str();
+}
+
+TEST(EstimatorArrayDataTest, Histogram1000ArraysSmall10Buckets) {
+    std::vector<BucketData> scalarData{{}};
+    const ScalarHistogram scalarHist = createHistogram(scalarData);
+
+    std::vector<BucketData> minData{{0, 5.0, 0.0, 0.0},
+                                    {553, 2.0, 935.0, 303.0},
+                                    {591, 4.0, 2.0, 1.0},
+                                    {656, 2.0, 21.0, 12.0},
+                                    {678, 3.0, 6.0, 3.0},
+                                    {693, 2.0, 1.0, 1.0},
+                                    {730, 1.0, 6.0, 3.0},
+                                    {788, 1.0, 2.0, 2.0},
+                                    {847, 2.0, 4.0, 1.0},
+                                    {867, 1.0, 0.0, 0.0}};
+
+    const ScalarHistogram aMinHist = createHistogram(minData);
+
+    std::vector<BucketData> maxData{{117, 1.0, 0.0, 0.0},
+                                    {210, 1.0, 1.0, 1.0},
+                                    {591, 1.0, 8.0, 4.0},
+                                    {656, 1.0, 0.0, 0.0},
+                                    {353, 2.0, 18.0, 9.0},
+                                    {610, 5.0, 125.0, 65.0},
+                                    {733, 8.0, 134.0, 53.0},
+                                    {768, 6.0, 50.0, 16.0},
+                                    {957, 8.0, 448.0, 137.0},
+                                    {1000, 7.0, 176.0, 40.0}};
+
+    const ScalarHistogram aMaxHist = createHistogram(maxData);
+
+    std::vector<BucketData> uniqueData{{0, 5.0, 0.0, 0.0},
+                                       {16, 11.0, 74.0, 13.0},
+                                       {192, 13.0, 698.0, 148.0},
+                                       {271, 9.0, 312.0, 70.0},
+                                       {670, 7.0, 1545.0, 355.0},
+                                       {712, 9.0, 159.0, 32.0},
+                                       {776, 11.0, 247.0, 54.0},
+                                       {869, 9.0, 361.0, 85.0},
+                                       {957, 8.0, 323.0, 76.0},
+                                       {1000, 7.0, 188.0, 40.0}};
+
+    const ScalarHistogram aUniqueHist = createHistogram(uniqueData);
+
+    TypeCounts typeCounts;
+    TypeCounts arrayTypeCounts;
+    // Dataset generated as 1000 arrays of size between 3 to 5.
+    typeCounts.insert({value::TypeTags::Array, 1000});
+    arrayTypeCounts.insert({value::TypeTags::NumberInt32, 3996});
+
+    const ArrayHistogram arrHist(scalarHist,
+                                 typeCounts,
+                                 aUniqueHist,
+                                 aMinHist,
+                                 aMaxHist,
+                                 arrayTypeCounts,
+                                 0 /* emptyArrayCount */);
+
+    std::vector<QuerySpec> querySet{{10, 20, 35.7, 93.0, 37.8, 39.0},
+                                    {10, 60, 103.3, 240.0, 158.0, 196.0},
+                                    {320, 330, 554.5, 746.0, 26.0, 30.0},
+                                    {320, 400, 672.9, 832.0, 231.5, 298.0},
+                                    {980, 990, 88.8, 101.0, 36.5, 41.0},
+                                    {970, 1050, 129.7, 141.0, 129.7, 141.0}};
+
+    for (const auto q : querySet) {
+        // $match query, includeScalar = true.
+        double estCard = estimateCardRange(arrHist,
+                                           false /* lowInclusive */,
+                                           value::TypeTags::NumberInt32,
+                                           sbe::value::bitcastFrom<int32_t>(q.low),
+                                           false /* highInclusive */,
+                                           value::TypeTags::NumberInt32,
+                                           sbe::value::bitcastFrom<int32_t>(q.high),
+                                           true /* includeScalar */);
+        ASSERT_APPROX_EQUAL(estCard, q.estMatch, 0.1);
+
+        // $elemMatch query, includeScalar = false.
+        estCard = estimateCardRange(arrHist,
+                                    false /* lowInclusive */,
+                                    value::TypeTags::NumberInt32,
+                                    sbe::value::bitcastFrom<int32_t>(q.low),
+                                    false /* highInclusive */,
+                                    value::TypeTags::NumberInt32,
+                                    sbe::value::bitcastFrom<int32_t>(q.high),
+                                    false /* includeScalar */);
+        ASSERT_APPROX_EQUAL(estCard, q.estElemMatch, 0.1);
+    }
+    std::cout << computeRMSE(querySet, false /* isElemMatch */) << std::endl;
+    std::cout << computeRMSE(querySet, true /* isElemMatch */) << std::endl;
+}
+
+TEST(EstimatorArrayDataTest, Histogram1000ArraysLarge10Buckets) {
+    std::vector<BucketData> scalarData{{}};
+    const ScalarHistogram scalarHist = createHistogram(scalarData);
+
+    std::vector<BucketData> minData{{0, 2.0, 0.0, 0.0},
+                                    {1324, 4.0, 925.0, 408.0},
+                                    {1389, 5.0, 7.0, 5.0},
+                                    {1521, 2.0, 16.0, 10.0},
+                                    {1621, 2.0, 13.0, 7.0},
+                                    {1852, 5.0, 10.0, 9.0},
+                                    {1864, 2.0, 0.0, 0.0},
+                                    {1971, 1.0, 3.0, 3.0},
+                                    {2062, 2.0, 0.0, 0.0},
+                                    {2873, 1.0, 0.0, 0.0}};
+
+    const ScalarHistogram aMinHist = createHistogram(minData);
+
+    std::vector<BucketData> maxData{{2261, 1.0, 0.0, 0.0},
+                                    {2673, 1.0, 0.0, 0.0},
+                                    {2930, 1.0, 1.0, 1.0},
+                                    {3048, 2.0, 2.0, 2.0},
+                                    {3128, 3.0, 1.0, 1.0},
+                                    {3281, 2.0, 0.0, 0.0},
+                                    {3378, 2.0, 7.0, 5.0},
+                                    {3453, 4.0, 2.0, 2.0},
+                                    {3763, 6.0, 44.0, 23.0},
+                                    {5000, 1.0, 920.0, 416.0}};
+
+    const ScalarHistogram aMaxHist = createHistogram(maxData);
+
+    std::vector<BucketData> uniqueData{{0, 2.0, 0.0, 0.0},
+                                       {1106, 9.0, 1970.0, 704.0},
+                                       {1542, 11.0, 736.0, 280.0},
+                                       {3267, 6.0, 3141.0, 1097.0},
+                                       {3531, 6.0, 461.0, 175.0},
+                                       {3570, 7.0, 48.0, 20.0},
+                                       {4573, 8.0, 1851.0, 656.0},
+                                       {4619, 6.0, 65.0, 30.0},
+                                       {4782, 5.0, 265.0, 99.0},
+                                       {5000, 1.0, 342.0, 135.0}};
+
+    const ScalarHistogram aUniqueHist = createHistogram(uniqueData);
+
+    TypeCounts typeCounts;
+    TypeCounts arrayTypeCounts;
+    // Dataset generated as 1000 arrays of size between 8 to 10.
+    typeCounts.insert({value::TypeTags::Array, 1000});
+    arrayTypeCounts.insert({value::TypeTags::NumberInt32, 8940});
+
+    const ArrayHistogram arrHist(scalarHist,
+                                 typeCounts,
+                                 aUniqueHist,
+                                 aMinHist,
+                                 aMaxHist,
+                                 arrayTypeCounts,
+                                 0 /* emptyArrayCount */);
+
+    std::vector<QuerySpec> querySet{{10, 20, 13.7, 39.0, 9.7, 26.0},
+                                    {10, 60, 41.6, 108.0, 55.7, 101.0},
+                                    {1000, 1010, 705.4, 861.0, 9.7, 7.0},
+                                    {1000, 1050, 733.3, 884.0, 55.7, 87.0},
+                                    {3250, 3300, 988.0, 988.0, 59.3, 86.0},
+                                    {4970, 4980, 23.3, 53.0, 8.5, 16.0}};
+
+    for (const auto q : querySet) {
+        // $match query, includeScalar = true.
+        double estCard = estimateCardRange(arrHist,
+                                           false /* lowInclusive */,
+                                           value::TypeTags::NumberInt32,
+                                           sbe::value::bitcastFrom<int32_t>(q.low),
+                                           false /* highInclusive */,
+                                           value::TypeTags::NumberInt32,
+                                           sbe::value::bitcastFrom<int32_t>(q.high),
+                                           true /* includeScalar */);
+        ASSERT_APPROX_EQUAL(estCard, q.estMatch, 0.1);
+
+        // $elemMatch query, includeScalar = false.
+        estCard = estimateCardRange(arrHist,
+                                    false /* lowInclusive */,
+                                    value::TypeTags::NumberInt32,
+                                    sbe::value::bitcastFrom<int32_t>(q.low),
+                                    false /* highInclusive */,
+                                    value::TypeTags::NumberInt32,
+                                    sbe::value::bitcastFrom<int32_t>(q.high),
+                                    false /* includeScalar */);
+        ASSERT_APPROX_EQUAL(estCard, q.estElemMatch, 0.1);
+    }
+    std::cout << computeRMSE(querySet, false /* isElemMatch */) << std::endl;
+    std::cout << computeRMSE(querySet, true /* isElemMatch */) << std::endl;
+}
+}  // namespace
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/histogram_edge_cases_test.cpp b/src/mongo/db/query/ce/histogram_edge_cases_test.cpp
new file mode 100644
index 00000000000..051d3134dcc
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram_edge_cases_test.cpp
@@ -0,0 +1,1007 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/pipeline/abt/utils.h"
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/ce/test_utils.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
+#include "mongo/db/query/sbe_stage_builder_helpers.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/maxdiff_test_utils.h"
+#include "mongo/db/query/stats/value_utils.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::optimizer::ce {
+namespace {
+namespace value = sbe::value;
+
+using stats::ArrayHistogram;
+using stats::makeInt64Value;
+using stats::SBEValue;
+using stats::ScalarHistogram;
+using stats::TypeCounts;
+
+constexpr double kErrorBound = 0.01;
+
+TEST(EstimatorTest, OneBucketIntHistogram) {
+    // Data set of 10 values, each with frequency 3, in the range (-inf, 100].
+    // Example: { -100, -20, 0, 20, 50, 60, 70, 80, 90, 100}.
+    std::vector<BucketData> data{{100, 3.0, 27.0, 9.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(30.0, getTotals(hist).card);
+
+    // Estimates with the bucket bound.
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+    ASSERT_EQ(27.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+    ASSERT_EQ(30.0, estimateIntValCard(hist, 100, EstimationType::kLessOrEqual));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kGreaterOrEqual));
+
+    // Estimates with a value inside the bucket.
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
+    // No interpolation possible for estimates of inequalities in a single bucket. The estimates
+    // are based on the default cardinality of half bucket +/- the estimate of equality inside of
+    // the bucket.
+    ASSERT_EQ(10.5, estimateIntValCard(hist, 10, EstimationType::kLess));
+    ASSERT_EQ(13.5, estimateIntValCard(hist, 10, EstimationType::kLessOrEqual));
+    ASSERT_EQ(16.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
+    ASSERT_EQ(19.5, estimateIntValCard(hist, 10, EstimationType::kGreaterOrEqual));
+
+    // Estimates for a value larger than the last bucket bound.
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
+    ASSERT_EQ(30.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
+    ASSERT_EQ(30.0, estimateIntValCard(hist, 1000, EstimationType::kLessOrEqual));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreaterOrEqual));
+}
+
+TEST(EstimatorTest, OneExclusiveBucketIntHistogram) {
+    // Data set of a single value.
+    // By exclusive bucket we mean a bucket with only boundary, that is the range frequency and NDV
+    // are zero.
+    std::vector<BucketData> data{{100, 2.0, 0.0, 0.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(2.0, getTotals(hist).card);
+
+    // Estimates with the bucket boundary.
+    ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 0, EstimationType::kEqual));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 0, EstimationType::kLess));
+    ASSERT_EQ(2.0, estimateIntValCard(hist, 0, EstimationType::kGreater));
+
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
+    ASSERT_EQ(2.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
+}
+
+TEST(EstimatorTest, OneBucketTwoIntValuesHistogram) {
+    // Data set of two values, example {5, 100, 100}.
+    std::vector<BucketData> data{{100, 2.0, 1.0, 1.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(3.0, getTotals(hist).card);
+
+    // Estimates with the bucket boundary.
+    ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+    ASSERT_EQ(1.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+
+    ASSERT_EQ(1.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
+    // Default estimate of half of the bucket's range frequency = 0.5.
+    ASSERT_EQ(0.5, estimateIntValCard(hist, 10, EstimationType::kLess));
+    ASSERT_EQ(2.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
+
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
+}
+
+TEST(EstimatorTest, OneBucketTwoIntValuesHistogram2) {
+    // Similar to the above test with higher frequency for the second value.
+    // Example {5, 5, 5, 100, 100}.
+    std::vector<BucketData> data{{100, 2.0, 3.0, 1.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(5.0, getTotals(hist).card);
+
+    // Estimates with the bucket boundary.
+    ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
+    // Default estimate of half of the bucket's range frequency = 1.5.
+    ASSERT_EQ(1.5, estimateIntValCard(hist, 10, EstimationType::kLess));
+    ASSERT_EQ(3.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
+
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
+    ASSERT_EQ(5.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
+}
+
+TEST(EstimatorTest, TwoBucketsIntHistogram) {
+    // Data set of 10 values in the range [1, 100].
+    std::vector<BucketData> data{{1, 1.0, 0.0, 0.0}, {100, 3.0, 26.0, 8.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(30.0, getTotals(hist).card);
+
+    // Estimates for a value smaller than the first bucket.
+    ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kEqual));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kLess));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kLessOrEqual));
+    ASSERT_EQ(30.0, estimateIntValCard(hist, -42, EstimationType::kGreater));
+    ASSERT_EQ(30.0, estimateIntValCard(hist, -42, EstimationType::kGreaterOrEqual));
+
+    // Estimates with bucket bounds.
+    ASSERT_EQ(1.0, estimateIntValCard(hist, 1, EstimationType::kEqual));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 1, EstimationType::kLess));
+    ASSERT_EQ(1.0, estimateIntValCard(hist, 1, EstimationType::kLessOrEqual));
+    ASSERT_EQ(29.0, estimateIntValCard(hist, 1, EstimationType::kGreater));
+    ASSERT_EQ(30.0, estimateIntValCard(hist, 1, EstimationType::kGreaterOrEqual));
+
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+    ASSERT_EQ(27.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+    ASSERT_EQ(30.0, estimateIntValCard(hist, 100, EstimationType::kLessOrEqual));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kGreaterOrEqual));
+
+    // Estimates with a value inside the bucket. The estimates use interpolation.
+    // The bucket ratio for the value of 10 is smaller than the estimate for equality
+    // and the estimates for Less and LessOrEqual are the same.
+    ASSERT_APPROX_EQUAL(3.25, estimateIntValCard(hist, 10, EstimationType::kEqual), kErrorBound);
+    ASSERT_APPROX_EQUAL(3.36, estimateIntValCard(hist, 10, EstimationType::kLess), kErrorBound);
+    ASSERT_APPROX_EQUAL(
+        3.36, estimateIntValCard(hist, 10, EstimationType::kLessOrEqual), kErrorBound);
+
+    ASSERT_APPROX_EQUAL(26.64, estimateIntValCard(hist, 10, EstimationType::kGreater), kErrorBound);
+    ASSERT_APPROX_EQUAL(
+        26.64, estimateIntValCard(hist, 10, EstimationType::kGreaterOrEqual), kErrorBound);
+
+    // Different estimates for Less and LessOrEqual for the value of 50.
+    ASSERT_APPROX_EQUAL(3.25, estimateIntValCard(hist, 50, EstimationType::kEqual), kErrorBound);
+    ASSERT_APPROX_EQUAL(10.61, estimateIntValCard(hist, 50, EstimationType::kLess), kErrorBound);
+    ASSERT_APPROX_EQUAL(
+        13.87, estimateIntValCard(hist, 50, EstimationType::kLessOrEqual), kErrorBound);
+    ASSERT_APPROX_EQUAL(16.13, estimateIntValCard(hist, 50, EstimationType::kGreater), kErrorBound);
+    ASSERT_APPROX_EQUAL(
+        19.38, estimateIntValCard(hist, 50, EstimationType::kGreaterOrEqual), kErrorBound);
+}
+
+TEST(EstimatorTest, ThreeExclusiveBucketsIntHistogram) {
+    std::vector<BucketData> data{{1, 1.0, 0.0, 0.0}, {10, 8.0, 0.0, 0.0}, {100, 1.0, 0.0, 0.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(10.0, getTotals(hist).card);
+
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 5, EstimationType::kEqual));
+    ASSERT_EQ(1.0, estimateIntValCard(hist, 5, EstimationType::kLess));
+    ASSERT_EQ(1.0, estimateIntValCard(hist, 5, EstimationType::kLessOrEqual));
+    ASSERT_EQ(9.0, estimateIntValCard(hist, 5, EstimationType::kGreater));
+    ASSERT_EQ(9.0, estimateIntValCard(hist, 5, EstimationType::kGreaterOrEqual));
+}
+TEST(EstimatorTest, OneBucketStrHistogram) {
+    std::vector<BucketData> data{{"xyz", 3.0, 27.0, 9.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(30.0, getTotals(hist).card);
+
+    // Estimates with bucket bound.
+    auto [tag, value] = value::makeNewString("xyz"_sd);
+    value::ValueGuard vg(tag, value);
+    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(3.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(27.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_EQ(30.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_EQ(3.0, expectedCard);
+
+    // Estimates for a value inside the bucket. Since there is no low value bound in the histogram
+    // all values smaller than the upper bound will be estimated the same way using half of the
+    // bucket cardinality.
+    std::tie(tag, value) = value::makeNewString("a"_sd);
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(3.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(10.5, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_EQ(13.5, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(16.5, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_EQ(19.5, expectedCard);
+
+    std::tie(tag, value) = value::makeNewString(""_sd);
+    // In the special case of a single string bucket, we estimate empty string equality as for any
+    // other string value. In practice if there are at least 2 buckets for the string data and an
+    // empty string in the data set, it will be chosen as a bound for the first bucket and produce
+    // precise estimates.
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(3.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_EQ(30.0, expectedCard);
+
+    // Estimates for a value larger than the upper bound.
+    std::tie(tag, value) = value::makeNewString("z"_sd);
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(30.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+}
+
+TEST(EstimatorTest, TwoBucketsStrHistogram) {
+    // Data set of 100 strings in the range ["abc", "xyz"], with average frequency of 2.
+    std::vector<BucketData> data{{"abc", 2.0, 0.0, 0.0}, {"xyz", 3.0, 95.0, 48.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(100.0, getTotals(hist).card);
+
+    // Estimates for a value smaller than the first bucket bound.
+    auto [tag, value] = value::makeNewString("a"_sd);
+    value::ValueGuard vg(tag, value);
+
+    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(100.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_EQ(100.0, expectedCard);
+
+    // Estimates with bucket bounds.
+    std::tie(tag, value) = value::makeNewString("abc"_sd);
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(2.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_EQ(2.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(98.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_EQ(100.0, expectedCard);
+
+    std::tie(tag, value) = value::makeNewString("xyz"_sd);
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(3.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(97.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_EQ(100.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_EQ(3.0, expectedCard);
+
+    // Estimates for a value inside the bucket.
+    std::tie(tag, value) = value::makeNewString("sun"_sd);
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(1.98, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(74.39, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_APPROX_EQUAL(76.37, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_APPROX_EQUAL(23.64, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_APPROX_EQUAL(25.62, expectedCard, kErrorBound);
+
+    // Estimate for a value very close to the bucket bound.
+    std::tie(tag, value) = value::makeNewString("xyw"_sd);
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(1.98, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(95.02, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_APPROX_EQUAL(96.99, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_APPROX_EQUAL(4.98, expectedCard, kErrorBound);
+}
+
+TEST(EstimatorTest, TwoBucketsDateHistogram) {
+    // June 6, 2017 -- June 7, 2017.
+    const int64_t startInstant = 1496777923000LL;
+    const int64_t endInstant = 1496864323000LL;
+    const auto startDate = Date_t::fromMillisSinceEpoch(startInstant);
+    const auto endDate = Date_t::fromMillisSinceEpoch(endInstant);
+
+    std::vector<BucketData> data{{Value(startDate), 3.0, 0.0, 0.0},
+                                 {Value(endDate), 1.0, 96.0, 48.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(100.0, getTotals(hist).card);
+
+    const auto valueBefore = value::bitcastFrom<int64_t>(startInstant - 1);
+    double expectedCard =
+        estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard =
+        estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kGreater).card;
+    ASSERT_EQ(100.0, expectedCard);
+
+    const auto valueStart = value::bitcastFrom<int64_t>(startInstant);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kEqual).card;
+    ASSERT_EQ(3.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kGreater).card;
+    ASSERT_EQ(97.0, expectedCard);
+
+    const auto valueEnd = value::bitcastFrom<int64_t>(endInstant);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kLess).card;
+    ASSERT_EQ(99.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+
+    const auto valueIn = value::bitcastFrom<int64_t>(startInstant + 43000000);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kEqual).card;
+    ASSERT_EQ(2.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(48.77, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kGreater).card;
+    ASSERT_APPROX_EQUAL(49.22, expectedCard, kErrorBound);
+
+    const auto valueAfter = value::bitcastFrom<int64_t>(endInstant + 100);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kLess).card;
+    ASSERT_EQ(100.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+}
+
+TEST(EstimatorTest, TwoBucketsTimestampHistogram) {
+    // June 6, 2017 -- June 7, 2017 in seconds.
+    const int64_t startInstant = 1496777923LL;
+    const int64_t endInstant = 1496864323LL;
+    const Timestamp startTs{Seconds(startInstant), 0};
+    const Timestamp endTs{Seconds(endInstant), 0};
+
+    std::vector<BucketData> data{{Value(startTs), 3.0, 0.0, 0.0}, {Value(endTs), 1.0, 96.0, 48.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(100.0, getTotals(hist).card);
+
+    const auto valueBefore = value::bitcastFrom<int64_t>(startTs.asULL() - 1);
+    double expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kGreater).card;
+    ASSERT_EQ(100.0, expectedCard);
+
+    const auto valueStart = value::bitcastFrom<int64_t>(
+        startTs.asULL());  // NB: startTs.asInt64() produces different value.
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kEqual).card;
+    ASSERT_EQ(3.0, expectedCard);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kGreater).card;
+    ASSERT_EQ(97.0, expectedCard);
+
+    const auto valueEnd = value::bitcastFrom<int64_t>(endTs.asULL());
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kLess).card;
+    ASSERT_EQ(99.0, expectedCard);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+
+    const auto valueIn = value::bitcastFrom<int64_t>((startTs.asULL() + endTs.asULL()) / 2);
+    expectedCard = estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kEqual).card;
+    ASSERT_EQ(2.0, expectedCard);
+    expectedCard = estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(49.0, expectedCard, kErrorBound);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kGreater).card;
+    ASSERT_APPROX_EQUAL(49.0, expectedCard, kErrorBound);
+
+    const auto valueAfter = value::bitcastFrom<int64_t>(endTs.asULL() + 100);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kLess).card;
+    ASSERT_EQ(100.0, expectedCard);
+    expectedCard =
+        estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+}
+
+TEST(EstimatorTest, TwoBucketsObjectIdHistogram) {
+    const auto startOid = OID("63340d8d27afef2de7357e8d");
+    const auto endOid = OID("63340dbed6cd8af737d4139a");
+    ASSERT_TRUE(startOid < endOid);
+
+    std::vector<BucketData> data{{Value(startOid), 2.0, 0.0, 0.0},
+                                 {Value(endOid), 1.0, 97.0, 77.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(100.0, getTotals(hist).card);
+
+    auto [tag, value] = value::makeNewObjectId();
+    value::ValueGuard vg(tag, value);
+    const auto oidBefore = OID("63340d8d27afef2de7357e8c");
+    oidBefore.view().readInto(value::getObjectIdView(value));
+
+    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(100.0, expectedCard);
+
+    // Bucket bounds.
+    startOid.view().readInto(value::getObjectIdView(value));
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(2.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(98.0, expectedCard);
+
+    endOid.view().readInto(value::getObjectIdView(value));
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(99.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+
+    // ObjectId value inside the bucket.
+    const auto oidInside = OID("63340db2cd4d46ff39178e9d");
+    oidInside.view().readInto(value::getObjectIdView(value));
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(1.25, expectedCard, kErrorBound);
+
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(83.95, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_APPROX_EQUAL(14.78, expectedCard, kErrorBound);
+
+    const auto oidAfter = OID("63340dbed6cd8af737d4139b");
+    oidAfter.view().readInto(value::getObjectIdView(value));
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(0.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(100.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+}
+
+TEST(EstimatorTest, TwoExclusiveBucketsMixedHistogram) {
+    // Data set of mixed data types: 3 integers and 5 strings.
+    std::vector<BucketData> data{{1, 3.0, 0.0, 0.0}, {"abc", 5.0, 0.0, 0.0}};
+    const ScalarHistogram hist = createHistogram(data);
+    const ArrayHistogram arrHist(
+        hist, TypeCounts{{value::TypeTags::NumberInt64, 3}, {value::TypeTags::StringSmall, 5}});
+
+    const auto [tagLowDbl, valLowDbl] =
+        std::make_pair(value::TypeTags::NumberDouble,
+                       value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN()));
+
+    // (NaN, 1).
+    double expectedCard = estimateCardRange(arrHist,
+                                            false /* lowInclusive */,
+                                            tagLowDbl,
+                                            valLowDbl,
+                                            false /* highInclusive */,
+                                            value::TypeTags::NumberInt32,
+                                            value::bitcastFrom<int64_t>(1),
+                                            true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(0.0, expectedCard, kErrorBound);
+
+    // (NaN, 5).
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     tagLowDbl,
+                                     valLowDbl,
+                                     false /* highInclusive */,
+                                     value::TypeTags::NumberInt32,
+                                     value::bitcastFrom<int64_t>(5),
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound);
+
+    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
+    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
+    auto [tag, value] = value::makeNewString("a"_sd);
+    value::ValueGuard vg(tag, value);
+
+    // [0, "").
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     value::TypeTags::NumberInt32,
+                                     value::bitcastFrom<int64_t>(0),
+                                     false /* highInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound);
+
+    // ["", "a"].
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* highInclusive */,
+                                     tag,
+                                     value,
+                                     true /* includeScalar */);
+
+    ASSERT_APPROX_EQUAL(0.0, expectedCard, kErrorBound);
+
+    std::tie(tag, value) = value::makeNewString("xyz"_sd);
+    // ["", "xyz"].
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* highInclusive */,
+                                     tag,
+                                     value,
+                                     true /* includeScalar */);
+
+    ASSERT_APPROX_EQUAL(5.0, expectedCard, kErrorBound);
+}
+
+TEST(EstimatorTest, TwoBucketsMixedHistogram) {
+    // Data set of mixed data types: 20 integers and 80 strings.
+    // Histogram with one bucket per data type.
+    std::vector<BucketData> data{{100, 3.0, 17.0, 9.0}, {"pqr", 5.0, 75.0, 25.0}};
+    const ScalarHistogram hist = createHistogram(data);
+    const ArrayHistogram arrHist(
+        hist, TypeCounts{{value::TypeTags::NumberInt64, 20}, {value::TypeTags::StringSmall, 80}});
+
+    ASSERT_EQ(100.0, getTotals(hist).card);
+
+    // Estimates with the bucket bounds.
+    ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+    ASSERT_EQ(17.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+    ASSERT_EQ(80.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+
+    auto [tag, value] = value::makeNewString("pqr"_sd);
+    value::ValueGuard vg(tag, value);
+    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_EQ(5.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_EQ(95.0, expectedCard);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_EQ(0.0, expectedCard);
+
+    // Estimates for a value smaller than the first bucket bound.
+    ASSERT_APPROX_EQUAL(1.88, estimateIntValCard(hist, 50, EstimationType::kEqual), kErrorBound);
+    ASSERT_APPROX_EQUAL(6.61, estimateIntValCard(hist, 50, EstimationType::kLess), kErrorBound);
+    ASSERT_APPROX_EQUAL(
+        8.49, estimateIntValCard(hist, 50, EstimationType::kLessOrEqual), kErrorBound);
+    ASSERT_APPROX_EQUAL(91.5, estimateIntValCard(hist, 50, EstimationType::kGreater), kErrorBound);
+    ASSERT_APPROX_EQUAL(
+        93.39, estimateIntValCard(hist, 50, EstimationType::kGreaterOrEqual), kErrorBound);
+
+    // Estimates for a value between bucket bounds.
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 105, EstimationType::kEqual));
+
+    std::tie(tag, value) = value::makeNewString("a"_sd);
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(3.0, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(54.5, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_APPROX_EQUAL(57.5, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+    ASSERT_APPROX_EQUAL(42.5, expectedCard, kErrorBound);
+    expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+    ASSERT_APPROX_EQUAL(45.5, expectedCard, kErrorBound);
+
+    // Range estimates, including min/max values per data type.
+    const auto [tagLowDbl, valLowDbl] =
+        std::make_pair(value::TypeTags::NumberDouble,
+                       value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN()));
+    const auto [tagHighInt, valHighInt] =
+        std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(1000000));
+
+    // [NaN, 25].
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowDbl,
+                                     valLowDbl,
+                                     true /* highInclusive */,
+                                     value::TypeTags::NumberInt32,
+                                     value::bitcastFrom<int64_t>(25),
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(8.49, expectedCard, kErrorBound);
+
+    // [25, 1000000].
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     value::TypeTags::NumberInt32,
+                                     value::bitcastFrom<int64_t>(25),
+                                     true /* highInclusive */,
+                                     tagHighInt,
+                                     valHighInt,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(13.38, expectedCard, kErrorBound);
+
+    // [NaN, 1000000].
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowDbl,
+                                     valLowDbl,
+                                     true /* highInclusive */,
+                                     tagHighInt,
+                                     valHighInt,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(20.0, expectedCard, kErrorBound);
+
+    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
+    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
+
+    // [NaN, "").
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowDbl,
+                                     valLowDbl,
+                                     false /* highInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(20.0, expectedCard, kErrorBound);
+
+    // [25, "").
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     value::TypeTags::NumberInt32,
+                                     value::bitcastFrom<int64_t>(25),
+                                     false /* highInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(13.39, expectedCard, kErrorBound);
+
+    // ["", "a"].
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     true /* highInclusive */,
+                                     tag,
+                                     value,
+                                     true /* includeScalar */);
+
+    ASSERT_APPROX_EQUAL(37.49, expectedCard, kErrorBound);
+
+    // ["", {}).
+    auto [tagObj, valObj] = value::makeNewObject();
+    value::ValueGuard vgObj(tagObj, valObj);
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tagLowStr,
+                                     valLowStr,
+                                     false /* highInclusive */,
+                                     tagObj,
+                                     valObj,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(80.0, expectedCard, kErrorBound);
+
+    // ["a", {}).
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     tag,
+                                     value,
+                                     false /* highInclusive */,
+                                     tagObj,
+                                     valObj,
+                                     true /* includeScalar */);
+
+    ASSERT_APPROX_EQUAL(45.5, expectedCard, kErrorBound);
+}
+
+// TODO: enable the following test after SERVER-71376 Fix histogram generation on MacOs
+#if 0
+/**
+ * Tests for cardinality estimates for queries over minimum values of date, timestamp, and objectId
+ * types. When the histogram has at least 2 buckets per data type, the minimum value, if present in
+ * the data, is picked as a bound for the first bucket for the corresponding data type. In this case
+ * the cardinality estimates are precise. To test the approximate estimation, we force the histogram
+ * generation to use one bucket per type (except the first numeric type).
+ */
+TEST(EstimatorTest, MinValueMixedHistogramFromData) {
+    const int64_t startInstant = 1506777923000LL;
+    const int64_t endInstant = 1516864323000LL;
+    const Timestamp startTs{Seconds(1516864323LL), 0};
+    const Timestamp endTs{Seconds(1526864323LL), 0};
+    const auto startOid = OID("63340d8d27afef2de7357e8d");
+    //    const auto endOid = OID("63340dbed6cd8af737d4139a");
+
+    std::vector<SBEValue> data;
+    data.emplace_back(value::TypeTags::Date, value::bitcastFrom<int64_t>(startInstant));
+    data.emplace_back(value::TypeTags::Date, value::bitcastFrom<int64_t>(endInstant));
+
+    data.emplace_back(value::TypeTags::Timestamp, value::bitcastFrom<int64_t>(startTs.asULL()));
+    data.emplace_back(value::TypeTags::Timestamp, value::bitcastFrom<int64_t>(endTs.asULL()));
+
+    auto [tag, val] = makeInt64Value(100);
+    data.emplace_back(tag, val);
+    std::tie(tag, val) = makeInt64Value(1000);
+    data.emplace_back(tag, val);
+
+    auto [strTag, strVal] = value::makeNewString("abc"_sd);
+    value::ValueGuard strVG(strTag, strVal);
+    auto [copyTag, copyVal] = value::copyValue(strTag, strVal);
+    data.emplace_back(copyTag, copyVal);
+    std::tie(strTag, strVal) = value::makeNewString("xyz"_sd);
+    std::tie(copyTag, copyVal) = value::copyValue(strTag, strVal);
+    data.emplace_back(copyTag, copyVal);
+
+    auto [objTag, objVal] = value::makeNewObjectId();
+    value::ValueGuard objVG(objTag, objVal);
+    startOid.view().readInto(value::getObjectIdView(objVal));
+    std::tie(tag, val) = copyValue(objTag, objVal);
+    data.emplace_back(tag, val);
+    /* TODO: add another objectId value when mapping to double is fixed by SERVER-71205.
+        endOid.view().readInto(value::getObjectIdView(objVal));
+        std::tie(tag, val) = copyValue(objTag, objVal);
+        data.emplace_back(tag, val);
+    */
+
+    sortValueVector(data);
+
+    // Force each type except numbers to use a single bucket. This way there is no bucket for the
+    // min value if present in the data and it needs to be estimated.
+    const ScalarHistogram& hist = makeHistogram(data, 6);
+    // Mixed data are sorted in the histogram according to the BSON order as defined in bsontypes.h
+    // the canonicalizeBSONTypeUnsafeLookup function.
+    if constexpr (kCETestLogOnly) {
+        std::cout << printValueArray(data) << "\n";
+        std::cout << "Mixed types " << hist.dump();
+    }
+
+    // Minimum ObjectId.
+    auto&& [minOid, inclOid] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::ObjectId);
+    auto [minOidTag, minOidVal] = minOid->cast<mongo::optimizer::Constant>()->get();
+    double expectedCard = estimate(hist, minOidTag, minOidVal, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+
+    // Minimum date.
+    const auto&& [minDate, inclDate] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Date);
+    const auto [minDateTag, minDateVal] = minDate->cast<mongo::optimizer::Constant>()->get();
+    expectedCard = estimate(hist, minDateTag, minDateVal, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+
+    // Minimum timestamp.
+    auto&& [minTs, inclTs] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Timestamp);
+    auto [minTsTag, minTsVal] = minTs->cast<mongo::optimizer::Constant>()->get();
+    expectedCard = estimate(hist, minTsTag, minTsVal, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+
+    // Add minimum values to the data set and create another histogram.
+    const auto [tagLowStr, valLowStr] = value::makeNewString(""_sd);
+    value::ValueGuard vgLowStr(tagLowStr, valLowStr);
+    std::tie(copyTag, copyVal) = value::copyValue(tagLowStr, valLowStr);
+    data.emplace_back(copyTag, copyVal);
+    data.emplace_back(minDateTag, minDateVal);
+    data.emplace_back(minTsTag, minTsVal);
+
+    sortValueVector(data);
+    const ScalarHistogram& hist2 = makeHistogram(data, 6);
+    if constexpr (kCETestLogOnly) {
+        std::cout << printValueArray(data) << "\n";
+        std::cout << "Mixed types " << hist2.dump();
+    }
+
+    // Precise estimate for equality to empty string, it is a bucket boundary.
+    expectedCard = estimate(hist2, tagLowStr, valLowStr, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+    // Equality to the minimum date/ts value is estimated by range_frequency/NDV.
+    expectedCard = estimate(hist2, minDateTag, minDateVal, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+    expectedCard = estimate(hist2, minTsTag, minTsVal, EstimationType::kEqual).card;
+    ASSERT_EQ(1.0, expectedCard);
+
+    // Inequality predicates using min values.
+    const ArrayHistogram arrHist(hist2,
+                                 TypeCounts{
+                                     {value::TypeTags::NumberInt64, 2},
+                                     {value::TypeTags::StringSmall, 3},
+                                     {value::TypeTags::ObjectId, 1},
+                                     {value::TypeTags::Date, 3},
+                                     {value::TypeTags::Timestamp, 3},
+                                 });
+    // [minDate, startInstant], estimated by the half of the date bucket.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     minDateTag,
+                                     minDateVal,
+                                     true /* highInclusive */,
+                                     value::TypeTags::Date,
+                                     value::bitcastFrom<int64_t>(startInstant),
+                                     true /* includeScalar */);
+    ASSERT_EQ(1.0, expectedCard);
+
+    // [minDate, endInstant], estimated by the entire date bucket.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     minDateTag,
+                                     minDateVal,
+                                     true /* highInclusive */,
+                                     value::TypeTags::Date,
+                                     value::bitcastFrom<int64_t>(endInstant),
+                                     true /* includeScalar */);
+    ASSERT_EQ(3.0, expectedCard);
+
+    // [minDate, minTs), estimated by the entire date bucket.
+    // (is this interval possible or is it better to have maxDate upper bound?).
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     minDateTag,
+                                     minDateVal,
+                                     false /* highInclusive */,
+                                     minTsTag,
+                                     minTsVal,
+                                     true /* includeScalar */);
+    ASSERT_EQ(3.0, expectedCard);
+
+    // [minTs, startTs], estimated by the half of the timestamp bucket.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     minTsTag,
+                                     minTsVal,
+                                     true /* highInclusive */,
+                                     value::TypeTags::Timestamp,
+                                     value::bitcastFrom<int64_t>(startTs.asULL()),
+                                     true /* includeScalar */);
+    ASSERT_EQ(1.0, expectedCard);
+
+    // [minTs, endTs], estimated by the entire timestamp bucket.
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     minTsTag,
+                                     minTsVal,
+                                     true /* highInclusive */,
+                                     value::TypeTags::Timestamp,
+                                     value::bitcastFrom<int64_t>(endTs.asULL()),
+                                     true /* includeScalar */);
+    ASSERT_EQ(3.0, expectedCard);
+
+    // [minTs, maxTs], estimated by the entire timestamp bucket.
+    auto&& [maxTs, inclMaxTs] = getMinMaxBoundForType(false /*isMin*/, value::TypeTags::Timestamp);
+    const auto [maxTsTag, maxTsVal] = maxTs->cast<mongo::optimizer::Constant>()->get();
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     minTsTag,
+                                     minTsVal,
+                                     true /* highInclusive */,
+                                     maxTsTag,
+                                     maxTsVal,
+                                     true /* includeScalar */);
+    ASSERT_EQ(3.0, expectedCard);
+}
+#endif
+
+TEST(EstimatorTest, MinValueMixedHistogramFromBuckets) {
+    const auto endOid = OID("63340dbed6cd8af737d4139a");
+    const auto endDate = Date_t::fromMillisSinceEpoch(1526864323000LL);
+    const Timestamp endTs{Seconds(1526864323LL), 0};
+
+    std::vector<BucketData> data{
+        {0, 1.0, 0.0, 0.0},
+        {100, 4.0, 95.0, 30.0},
+        {"xyz", 5.0, 95.0, 25.0},
+        {Value(endOid), 5.0, 95.0, 50.0},
+        {Value(endDate), 4.0, 96.0, 24.0},
+        {Value(endTs), 5.0, 95.0, 50.0},
+    };
+    const ScalarHistogram hist = createHistogram(data);
+    if constexpr (kCETestLogOnly) {
+        std::cout << "Mixed types " << hist.dump();
+    }
+    ASSERT_EQ(500.0, getTotals(hist).card);
+
+    // Minimum ObjectId.
+    auto&& [minOid, inclOid] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::ObjectId);
+    auto [minOidTag, minOidVal] = minOid->cast<mongo::optimizer::Constant>()->get();
+    double expectedCard = estimate(hist, minOidTag, minOidVal, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(1.9, expectedCard, kErrorBound);
+
+    // Minimum date.
+    const auto&& [minDate, inclDate] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Date);
+    const auto [minDateTag, minDateVal] = minDate->cast<mongo::optimizer::Constant>()->get();
+    expectedCard = estimate(hist, minDateTag, minDateVal, EstimationType::kEqual).card;
+    ASSERT_EQ(4.0, expectedCard);
+
+    // Minimum timestamp.
+    auto&& [minTs, inclTs] = getMinMaxBoundForType(true /*isMin*/, value::TypeTags::Timestamp);
+    auto [minTsTag, minTsVal] = minTs->cast<mongo::optimizer::Constant>()->get();
+    expectedCard = estimate(hist, minTsTag, minTsVal, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(1.9, expectedCard, kErrorBound);
+
+    // Inequality predicates using min values.
+    const ArrayHistogram arrHist(hist,
+                                 TypeCounts{
+                                     {value::TypeTags::NumberInt64, 100},
+                                     {value::TypeTags::StringSmall, 100},
+                                     {value::TypeTags::ObjectId, 100},
+                                     {value::TypeTags::Date, 100},
+                                     {value::TypeTags::Timestamp, 100},
+                                 });
+    // [minDate, innerDate], estimated by the half of the date bucket.
+    const int64_t innerDate = 1516864323000LL;
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     minDateTag,
+                                     minDateVal,
+                                     true /* highInclusive */,
+                                     value::TypeTags::Date,
+                                     value::bitcastFrom<int64_t>(innerDate),
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(48.0, expectedCard, kErrorBound);
+
+    // [minTs, innerTs], estimated by the half of the timestamp bucket.
+    const Timestamp innerTs{Seconds(1516864323LL), 0};
+    expectedCard = estimateCardRange(arrHist,
+                                     true /* lowInclusive */,
+                                     minTsTag,
+                                     minTsVal,
+                                     true /* highInclusive */,
+                                     value::TypeTags::Timestamp,
+                                     value::bitcastFrom<int64_t>(innerTs.asULL()),
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(47.5, expectedCard, kErrorBound);
+}
+}  // namespace
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/histogram_estimation.cpp b/src/mongo/db/query/ce/histogram_estimation.cpp
deleted file mode 100644
index cd1e52219c2..00000000000
--- a/src/mongo/db/query/ce/histogram_estimation.cpp
+++ /dev/null
@@ -1,488 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/db/exec/sbe/abt/abt_lower.h"
-#include "mongo/db/pipeline/abt/utils.h"
-#include "mongo/db/query/ce/value_utils.h"
-#include "mongo/db/query/optimizer/rewrites/const_eval.h"
-#include "mongo/db/query/optimizer/syntax/expr.h"
-#include "mongo/db/query/optimizer/utils/ce_math.h"
-#include "mongo/db/query/optimizer/utils/interval_utils.h"
-
-namespace mongo::ce {
-using namespace sbe;
-using namespace optimizer;
-
-std::pair<value::TypeTags, value::Value> getConstTypeVal(const ABT& abt) {
-    const auto* constant = abt.cast<Constant>();
-    tassert(7051102, "Interval ABTs passed in for estimation must have Constant bounds.", constant);
-    return constant->get();
-};
-
-boost::optional<std::pair<value::TypeTags, value::Value>> getBound(
-    const BoundRequirement& boundReq) {
-    const ABT& bound = boundReq.getBound();
-    if (bound.is<Constant>()) {
-        return getConstTypeVal(bound);
-    }
-    return boost::none;
-};
-
-IntervalRequirement getMinMaxIntervalForType(value::TypeTags type) {
-    // Note: This function works based on the assumption that there are no intervals that include
-    // values from more than one type. That is why the MinMax interval of a type will include all
-    // possible intervals over that type.
-
-    auto&& [min, minInclusive] = getMinMaxBoundForType(true /*isMin*/, type);
-    tassert(7051103, str::stream() << "Type " << type << " has no minimum", min);
-
-    auto&& [max, maxInclusive] = getMinMaxBoundForType(false /*isMin*/, type);
-    tassert(7051104, str::stream() << "Type " << type << " has no maximum", max);
-
-    return IntervalRequirement{BoundRequirement(minInclusive, *min),
-                               BoundRequirement(maxInclusive, *max)};
-}
-
-bool isIntervalSubsetOfType(const IntervalRequirement& interval, value::TypeTags type) {
-    // Create a conjunction of the interval and the min-max interval for the type as input for the
-    // intersection function.
-    auto intervals =
-        IntervalReqExpr::make<IntervalReqExpr::Disjunction>(IntervalReqExpr::NodeVector{
-            IntervalReqExpr::make<IntervalReqExpr::Conjunction>(IntervalReqExpr::NodeVector{
-                IntervalReqExpr::make<IntervalReqExpr::Atom>(interval),
-                IntervalReqExpr::make<IntervalReqExpr::Atom>(getMinMaxIntervalForType(type))})});
-
-    return intersectDNFIntervals(intervals, ConstEval::constFold).has_value();
-}
-
-EstimationResult getTotals(const ScalarHistogram& h) {
-    if (h.empty()) {
-        return {0.0, 0.0};
-    }
-
-    const Bucket& last = h.getBuckets().back();
-    return {last._cumulativeFreq, last._cumulativeNDV};
-}
-
-/**
- * Helper function that uses linear interpolation to estimate the cardinality and NDV for a value
- * that falls inside of a histogram bucket.
- */
-EstimationResult interpolateEstimateInBucket(const ScalarHistogram& h,
-                                             value::TypeTags tag,
-                                             value::Value val,
-                                             EstimationType type,
-                                             size_t bucketIndex) {
-
-    const Bucket& bucket = h.getBuckets().at(bucketIndex);
-    const auto [boundTag, boundVal] = h.getBounds().getAt(bucketIndex);
-
-    double resultCard = bucket._cumulativeFreq - bucket._equalFreq - bucket._rangeFreq;
-    double resultNDV = bucket._cumulativeNDV - bucket._ndv - 1.0;
-
-    // Check if the estimate is at the point of type brackets switch. If the current bucket is the
-    // first bucket of a new type bracket and the value is of another type, estimate cardinality
-    // from the current bucket as 0.
-    //
-    // For example, let bound 1 = 1000, bound 2 = "abc". The value 100000000 falls in bucket 2, the
-    // first bucket for strings, but should not get cardinality/ ndv fraction from it.
-    if (!sameTypeBracket(tag, boundTag)) {
-        if (type == EstimationType::kEqual) {
-            return {0.0, 0.0};
-        } else {
-            return {resultCard, resultNDV};
-        }
-    }
-
-    // Estimate for equality frequency inside of the bucket.
-    const double innerEqFreq = (bucket._ndv == 0.0) ? 0.0 : bucket._rangeFreq / bucket._ndv;
-
-    if (type == EstimationType::kEqual) {
-        return {innerEqFreq, 1.0};
-    }
-
-    // If the value is minimal for its type, and the operation is $lt or $lte return cardinality up
-    // to the previous bucket.
-    auto&& [minConstant, inclusive] = getMinMaxBoundForType(true /*isMin*/, tag);
-    auto [minTag, minVal] = getConstTypeVal(*minConstant);
-    if (compareValues(minTag, minVal, tag, val) == 0) {
-        return {resultCard, resultNDV};
-    }
-
-    // For $lt and $lte operations use linear interpolation to take a fraction of the bucket
-    // cardinality and NDV if there is a preceeding bucket with bound of the same type. Use half of
-    // the bucket estimates otherwise.
-    double ratio = 0.5;
-    if (bucketIndex > 0) {
-        const auto [lowBoundTag, lowBoundVal] = h.getBounds().getAt(bucketIndex - 1);
-        if (sameTypeBracket(lowBoundTag, boundTag)) {
-            double doubleLowBound = valueToDouble(lowBoundTag, lowBoundVal);
-            double doubleUpperBound = valueToDouble(boundTag, boundVal);
-            double doubleVal = valueToDouble(tag, val);
-            ratio = (doubleVal - doubleLowBound) / (doubleUpperBound - doubleLowBound);
-        }
-    }
-
-    const double bucketFreqRatio = bucket._rangeFreq * ratio;
-    resultCard += bucketFreqRatio;
-    resultNDV += bucket._ndv * ratio;
-
-    if (type == EstimationType::kLess) {
-        // Subtract from the estimate the cardinality and ndv corresponding to the equality
-        // operation, if they are larger than the ratio taken from this bucket.
-        const double innerEqFreqCorrection = (bucketFreqRatio < innerEqFreq) ? 0.0 : innerEqFreq;
-        const double innerEqNdv = (bucket._ndv * ratio <= 1.0) ? 0.0 : 1.0;
-        resultCard -= innerEqFreqCorrection;
-        resultNDV -= innerEqNdv;
-    }
-    return {resultCard, resultNDV};
-}
-
-EstimationResult estimate(const ScalarHistogram& h,
-                          value::TypeTags tag,
-                          value::Value val,
-                          EstimationType type) {
-    switch (type) {
-        case EstimationType::kGreater:
-            return getTotals(h) - estimate(h, tag, val, EstimationType::kLessOrEqual);
-
-        case EstimationType::kGreaterOrEqual:
-            return getTotals(h) - estimate(h, tag, val, EstimationType::kLess);
-
-        default:
-            // Continue.
-            break;
-    }
-
-    size_t bucketIndex = 0;
-    {
-        size_t len = h.getBuckets().size();
-        while (len > 0) {
-            const size_t half = len >> 1;
-            const auto [boundTag, boundVal] = h.getBounds().getAt(bucketIndex + half);
-
-            if (compareValues(boundTag, boundVal, tag, val) < 0) {
-                bucketIndex += half + 1;
-                len -= half + 1;
-            } else {
-                len = half;
-            }
-        }
-    }
-    if (bucketIndex == h.getBuckets().size()) {
-        // Value beyond the largest endpoint.
-        switch (type) {
-            case EstimationType::kEqual:
-                return {0.0, 0.0};
-
-            case EstimationType::kLess:
-            case EstimationType::kLessOrEqual:
-                return getTotals(h);
-
-            default:
-                MONGO_UNREACHABLE;
-        }
-    }
-
-    const Bucket& bucket = h.getBuckets().at(bucketIndex);
-    const auto [boundTag, boundVal] = h.getBounds().getAt(bucketIndex);
-    const bool isEndpoint = compareValues(boundTag, boundVal, tag, val) == 0;
-
-    if (isEndpoint) {
-        switch (type) {
-            case EstimationType::kEqual: {
-                return {bucket._equalFreq, 1.0};
-            }
-
-            case EstimationType::kLess: {
-                double resultCard = bucket._cumulativeFreq - bucket._equalFreq;
-                double resultNDV = bucket._cumulativeNDV - 1.0;
-                return {resultCard, resultNDV};
-            }
-
-            case EstimationType::kLessOrEqual: {
-                double resultCard = bucket._cumulativeFreq;
-                double resultNDV = bucket._cumulativeNDV;
-                return {resultCard, resultNDV};
-            }
-
-            default:
-                MONGO_UNREACHABLE;
-        }
-    } else {
-        return interpolateEstimateInBucket(h, tag, val, type, bucketIndex);
-    }
-}
-
-/**
- * Returns how many values of the given type are known by the array histogram.
- */
-double getTypeCard(const ArrayHistogram& ah, value::TypeTags tag, bool includeScalar) {
-    double count = 0.0;
-
-    // TODO SERVER-70936: booleans are estimated by different type counters (unless in arrays).
-    if (tag == sbe::value::TypeTags::Boolean) {
-        uasserted(7051101, "Cannot estimate boolean types yet with histogram CE.");
-    }
-
-    // Note that if we are asked by the optimizer to estimate an interval whose bounds are  arrays,
-    // this means we are trying to estimate equality on nested arrays. In this case, we do not want
-    // to include the "scalar" type counter for the array type, because this will cause us to
-    // estimate the nested array case as counting all arrays, regardless of whether or not they are
-    // nested.
-    if (includeScalar && tag != value::TypeTags::Array) {
-        auto typeIt = ah.getTypeCounts().find(tag);
-        if (typeIt != ah.getTypeCounts().end()) {
-            count += typeIt->second;
-        }
-    }
-    if (ah.isArray()) {
-        auto typeIt = ah.getArrayTypeCounts().find(tag);
-        if (typeIt != ah.getArrayTypeCounts().end()) {
-            count += typeIt->second;
-        }
-    }
-    return count;
-}
-
-/**
- * Estimates equality to the given tag/value using histograms.
- */
-double estimateCardEq(const ArrayHistogram& ah,
-                      value::TypeTags tag,
-                      value::Value val,
-                      bool includeScalar) {
-    double card = 0.0;
-    if (includeScalar) {
-        card = estimate(ah.getScalar(), tag, val, EstimationType::kEqual).card;
-    }
-    if (ah.isArray()) {
-        card += estimate(ah.getArrayUnique(), tag, val, EstimationType::kEqual).card;
-    }
-    return card;
-}
-
-static EstimationResult estimateRange(const ScalarHistogram& histogram,
-                                      bool lowInclusive,
-                                      value::TypeTags tagLow,
-                                      value::Value valLow,
-                                      bool highInclusive,
-                                      value::TypeTags tagHigh,
-                                      value::Value valHigh) {
-    const EstimationType highType =
-        highInclusive ? EstimationType::kLessOrEqual : EstimationType::kLess;
-    const EstimationResult highEstimate = estimate(histogram, tagHigh, valHigh, highType);
-
-    const EstimationType lowType =
-        lowInclusive ? EstimationType::kLess : EstimationType::kLessOrEqual;
-    const EstimationResult lowEstimate = estimate(histogram, tagLow, valLow, lowType);
-
-    return highEstimate - lowEstimate;
-}
-
-/**
- * Compute an estimate for range query on array data with formula:
- * Card(ArrayMin(a < valHigh)) - Card(ArrayMax(a < valLow))
- */
-static EstimationResult estimateRangeQueryOnArray(const ScalarHistogram& histogramAmin,
-                                                  const ScalarHistogram& histogramAmax,
-                                                  bool lowInclusive,
-                                                  value::TypeTags tagLow,
-                                                  value::Value valLow,
-                                                  bool highInclusive,
-                                                  value::TypeTags tagHigh,
-                                                  value::Value valHigh) {
-    const EstimationType highType =
-        highInclusive ? EstimationType::kLessOrEqual : EstimationType::kLess;
-    const EstimationResult highEstimate = estimate(histogramAmin, tagHigh, valHigh, highType);
-
-    const EstimationType lowType =
-        lowInclusive ? EstimationType::kLess : EstimationType::kLessOrEqual;
-    const EstimationResult lowEstimate = estimate(histogramAmax, tagLow, valLow, lowType);
-
-    return highEstimate - lowEstimate;
-}
-
-double estimateCardRange(const ArrayHistogram& ah,
-                         /* Define lower bound. */
-                         bool lowInclusive,
-                         value::TypeTags tagLow,
-                         value::Value valLow,
-                         /* Define upper bound. */
-                         bool highInclusive,
-                         value::TypeTags tagHigh,
-                         value::Value valHigh,
-                         bool includeScalar,
-                         EstimationAlgo estimationAlgo) {
-    uassert(6695701,
-            "Low bound must not be higher than high",
-            compareValues(tagLow, valLow, tagHigh, valHigh) <= 0);
-
-    // Helper lambda to shorten code for legibility.
-    auto estRange = [&](const ScalarHistogram& h) {
-        return estimateRange(h, lowInclusive, tagLow, valLow, highInclusive, tagHigh, valHigh);
-    };
-
-    double result = 0.0;
-    if (ah.isArray()) {
-
-        if (includeScalar) {
-            // Range query on array data.
-            const EstimationResult rangeCardOnArray = estimateRangeQueryOnArray(ah.getArrayMin(),
-                                                                                ah.getArrayMax(),
-                                                                                lowInclusive,
-                                                                                tagLow,
-                                                                                valLow,
-                                                                                highInclusive,
-                                                                                tagHigh,
-                                                                                valHigh);
-            result += rangeCardOnArray.card;
-        } else {
-            // $elemMatch query on array data.
-            const auto arrayMinEst = estRange(ah.getArrayMin());
-            const auto arrayMaxEst = estRange(ah.getArrayMax());
-            const auto arrayUniqueEst = estRange(ah.getArrayUnique());
-
-            // ToDo: try using ah.getArrayCount() - ah.getEmptyArrayCount();
-            // when the number of empty arrays is provided by the statistics.
-            const double totalArrayCount = ah.getArrayCount();
-
-            uassert(
-                6715101, "Array histograms should contain at least one array", totalArrayCount > 0);
-            switch (estimationAlgo) {
-                case EstimationAlgo::HistogramV1: {
-                    const double arrayUniqueDensity = (arrayUniqueEst.ndv == 0.0)
-                        ? 0.0
-                        : (arrayUniqueEst.card / std::sqrt(arrayUniqueEst.ndv));
-                    result =
-                        std::max(std::max(arrayMinEst.card, arrayMaxEst.card), arrayUniqueDensity);
-                    break;
-                }
-                case EstimationAlgo::HistogramV2: {
-                    const double avgArraySize =
-                        getTotals(ah.getArrayUnique()).card / totalArrayCount;
-                    const double adjustedUniqueCard = (avgArraySize == 0.0)
-                        ? 0.0
-                        : std::min(arrayUniqueEst.card / pow(avgArraySize, 0.2), totalArrayCount);
-                    result =
-                        std::max(std::max(arrayMinEst.card, arrayMaxEst.card), adjustedUniqueCard);
-                    break;
-                }
-                case EstimationAlgo::HistogramV3: {
-                    const double adjustedUniqueCard =
-                        0.85 * std::min(arrayUniqueEst.card, totalArrayCount);
-                    result =
-                        std::max(std::max(arrayMinEst.card, arrayMaxEst.card), adjustedUniqueCard);
-                    break;
-                }
-                default:
-                    MONGO_UNREACHABLE;
-            }
-        }
-    }
-
-    if (includeScalar) {
-        const auto scalarEst = estRange(ah.getScalar());
-        result += scalarEst.card;
-    }
-
-    return result;
-}
-
-double estimateIntervalCardinality(const ce::ArrayHistogram& ah,
-                                   const IntervalRequirement& interval,
-                                   CEType childResult,
-                                   bool includeScalar) {
-    if (interval.isFullyOpen()) {
-        return childResult;
-    } else if (interval.isEquality()) {
-        auto maybeConstBound = getBound(interval.getLowBound());
-        if (!maybeConstBound) {
-            return kInvalidEstimate;
-        }
-
-        auto [tag, val] = *maybeConstBound;
-        if (canEstimateTypeViaHistogram(tag)) {
-            return estimateCardEq(ah, tag, val, includeScalar);
-        }
-
-        // Otherwise, we return the cardinality for the type of the intervals.
-        return getTypeCard(ah, tag, includeScalar);
-    }
-
-    // Otherwise, we have a range.
-    auto lowBound = interval.getLowBound();
-    auto maybeConstLowBound = getBound(lowBound);
-    if (!maybeConstLowBound) {
-        return kInvalidEstimate;
-    }
-
-    auto highBound = interval.getHighBound();
-    auto maybeConstHighBound = getBound(highBound);
-    if (!maybeConstHighBound) {
-        return kInvalidEstimate;
-    }
-
-    auto [lowTag, lowVal] = *maybeConstLowBound;
-    auto [highTag, highVal] = *maybeConstHighBound;
-
-    // Check if we estimated this interval using histograms. One of the tags may not be of a type we
-    // know how to estimate using histograms; however, it should still be possible to estimate the
-    // interval if the other one is of the appropriate type.
-    if (canEstimateTypeViaHistogram(lowTag) || canEstimateTypeViaHistogram(highTag)) {
-        return estimateCardRange(ah,
-                                 lowBound.isInclusive(),
-                                 lowTag,
-                                 lowVal,
-                                 highBound.isInclusive(),
-                                 highTag,
-                                 highVal,
-                                 includeScalar);
-    }
-
-    // Otherwise, this interval was not in our histogram. We may be able to estimate this interval
-    // via type counts- if so, we just return the total count for the type.
-
-    // If the bound tags are equal, we can estimate this in the same way that we do equalities on
-    // non-histogrammable types. Otherwise, we need to figure out which type(s) are included by this
-    // range.
-    if (lowTag == highTag || isIntervalSubsetOfType(interval, lowTag)) {
-        return getTypeCard(ah, lowTag, includeScalar);
-    } else if (isIntervalSubsetOfType(interval, highTag)) {
-        return getTypeCard(ah, highTag, includeScalar);
-    }
-
-    // If we reach here, we've given up estimating, because our interval intersected both high & low
-    // type intervals (and possibly more types).
-    // TODO: could we aggregate type counts across all intersected types here?
-    return 0.0;
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/histogram_estimation.h b/src/mongo/db/query/ce/histogram_estimation.h
deleted file mode 100644
index f0291b42cd8..00000000000
--- a/src/mongo/db/query/ce/histogram_estimation.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/optimizer/defs.h"
-#include "mongo/db/query/optimizer/index_bounds.h"
-
-namespace mongo::ce {
-
-constexpr double kInvalidEstimate = -1.0;
-
-enum class EstimationType { kEqual, kLess, kLessOrEqual, kGreater, kGreaterOrEqual };
-enum class EstimationAlgo { HistogramV1, HistogramV2, HistogramV3 };
-
-const stdx::unordered_map<EstimationType, std::string> estimationTypeName = {
-    {EstimationType::kEqual, "eq"},
-    {EstimationType::kLess, "lt"},
-    {EstimationType::kLessOrEqual, "lte"},
-    {EstimationType::kGreater, "gt"},
-    {EstimationType::kGreaterOrEqual, "gte"}};
-
-struct EstimationResult {
-    double card;
-    double ndv;
-
-    EstimationResult operator-(const EstimationResult& other) const {
-        return {card - other.card, ndv - other.ndv};
-    }
-};
-
-/**
- * Returns cumulative total statistics for a histogram.
- */
-EstimationResult getTotals(const ScalarHistogram& h);
-
-/**
- * Compute an estimate for a given value and estimation type. Use linear interpolation for values
- * that fall inside of histogram buckets.
- */
-EstimationResult estimate(const ScalarHistogram& h,
-                          sbe::value::TypeTags tag,
-                          sbe::value::Value val,
-                          EstimationType type);
-
-/**
- * Given an array histogram, an interval, and the input cardinality, estimates the cardinality of
- * the interval.
- */
-double estimateIntervalCardinality(const ArrayHistogram& estimator,
-                                   const optimizer::IntervalRequirement& interval,
-                                   optimizer::CEType inputCardinality,
-                                   bool includeScalar);
-
-/**
- * Estimates the cardinality of an equality predicate given an ArrayHistogram and an SBE value and
- * type tag pair.
- */
-double estimateCardEq(const ArrayHistogram& ah,
-                      sbe::value::TypeTags tag,
-                      sbe::value::Value val,
-                      bool includeScalar);
-
-/**
- * Estimates the cardinality of a range predicate given an ArrayHistogram and a range predicate.
- * Set 'includeScalar' to true to indicate whether or not the provided range should include no-array
- * values. The other fields define the range of the estimation.
- */
-double estimateCardRange(const ArrayHistogram& ah,
-                         bool lowInclusive,
-                         sbe::value::TypeTags tagLow,
-                         sbe::value::Value valLow,
-                         bool highInclusive,
-                         sbe::value::TypeTags tagHigh,
-                         sbe::value::Value valHigh,
-                         bool includeScalar,
-                         EstimationAlgo estAlgo = EstimationAlgo::HistogramV2);
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/histogram_estimator.cpp b/src/mongo/db/query/ce/histogram_estimator.cpp
new file mode 100644
index 00000000000..6978ad4a307
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram_estimator.cpp
@@ -0,0 +1,272 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/histogram_estimator.h"
+
+#include "mongo/db/pipeline/abt/utils.h"
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/optimizer/utils/abt_hash.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
+#include "mongo/db/query/optimizer/utils/memo_utils.h"
+
+namespace mongo::optimizer::ce {
+namespace {
+/**
+ * This transport combines chains of PathGets and PathTraverses into an MQL-like string path.
+ */
+class PathDescribeTransport {
+public:
+    std::string transport(const PathTraverse& /*node*/, std::string childResult) {
+        return childResult;
+    }
+
+    std::string transport(const PathGet& node, std::string childResult) {
+        return str::stream() << node.name() << (childResult.length() > 0 ? "." : "") << childResult;
+    }
+
+    std::string transport(const EvalFilter& node, std::string pathResult, std::string inputResult) {
+        return pathResult;
+    }
+
+    std::string transport(const PathIdentity& node) {
+        return "";
+    }
+
+    template <typename T, typename... Ts>
+    std::string transport(const T& node, Ts&&... /* args */) {
+        uasserted(6903900, "Unexpected node in path serialization.");
+    }
+};
+
+std::string serializePath(const ABT& path) {
+    PathDescribeTransport pdt;
+    auto str = algebra::transport<false>(path, pdt);
+    return str;
+}
+
+}  // namespace
+
+class HistogramTransport {
+public:
+    HistogramTransport(std::shared_ptr<stats::CollectionStatistics> stats,
+                       std::unique_ptr<cascades::CardinalityEstimator> fallbackCE)
+        : _stats(stats),
+          _fallbackCE(std::move(fallbackCE)),
+          _arrayOnlyInterval(*defaultConvertPathToInterval(make<PathArr>())) {}
+
+    CEType transport(const ABT& n,
+                     const ScanNode& node,
+                     const cascades::Memo& memo,
+                     const properties::LogicalProps& logicalProps,
+                     CEType /*bindResult*/) {
+        return _stats->getCardinality();
+    }
+
+    /**
+     * This struct is used to track an intermediate representation of the intervals in the
+     * requirements map. In particular, grouping intervals along each path in the map allows us to
+     * determine which paths should be estimated as $elemMatches without relying on a particular
+     * order of entries in the requirements map.
+     */
+    struct SargableConjunct {
+        bool includeScalar;
+        const stats::ArrayHistogram& histogram;
+        std::vector<std::reference_wrapper<const IntervalReqExpr::Node>> intervals;
+    };
+
+    CEType transport(const ABT& n,
+                     const SargableNode& node,
+                     const Metadata& metadata,
+                     const cascades::Memo& memo,
+                     const properties::LogicalProps& logicalProps,
+                     CEType childResult,
+                     CEType /*bindsResult*/,
+                     CEType /*refsResult*/) {
+        // Early out and return 0 since we don't expect to get more results.
+        if (childResult == 0.0) {
+            return 0.0;
+        }
+
+        // Initial first pass through the requirements map to extract information about each path.
+        std::map<std::string, SargableConjunct> conjunctRequirements;
+        for (const auto& [key, req] : node.getReqMap()) {
+            if (req.getIsPerfOnly()) {
+                // Ignore perf-only requirements.
+                continue;
+            }
+
+            const auto serializedPath = serializePath(key._path.ref());
+            const auto& interval = req.getIntervals();
+            const bool isPathArrInterval =
+                (_arrayOnlyInterval == interval) && !pathEndsInTraverse(key._path.ref());
+
+            // Check if we have already seen this path.
+            if (auto conjunctIt = conjunctRequirements.find({serializedPath});
+                conjunctIt != conjunctRequirements.end()) {
+                auto& conjunctReq = conjunctIt->second;
+                if (isPathArrInterval) {
+                    // We should estimate this path's intervals using $elemMatch semantics.
+                    // Don't push back the interval for estimation; instead, we use it to change how
+                    // we estimate other intervals along this path.
+                    conjunctReq.includeScalar = false;
+                } else {
+                    // We will need to estimate this interval.
+                    conjunctReq.intervals.push_back(interval);
+                }
+                continue;
+            }
+
+            // Fallback if there is no histogram.
+            auto histogram = _stats->getHistogram(serializedPath);
+            if (!histogram) {
+                // For now, because of the structure of SargableNode and the implementation of
+                // the fallback (currently HeuristicCE), we can't combine heuristic & histogram
+                // estimates. In this case, default to Heuristic if we don't have a histogram for
+                // any of the predicates.
+                return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
+            }
+
+            // Add this path to the map. If this is not a 'PathArr' interval, add it to the vector
+            // of intervals we will be estimating.
+            SargableConjunct sc{!isPathArrInterval, *histogram, {}};
+            if (sc.includeScalar) {
+                sc.intervals.push_back(interval);
+            }
+            conjunctRequirements.emplace(serializedPath, std::move(sc));
+        }
+
+        std::vector<double> topLevelSelectivities;
+        for (const auto& [_, conjunctReq] : conjunctRequirements) {
+            const CEType totalCard = _stats->getCardinality();
+
+            if (conjunctReq.intervals.empty() && !conjunctReq.includeScalar) {
+                // In this case there is a single 'PathArr' interval for this field.
+                // The selectivity of this interval is: (count of all arrays) / totalCard
+                double pathArrSel = conjunctReq.histogram.getArrayCount() / totalCard;
+                topLevelSelectivities.push_back(pathArrSel);
+            }
+
+            // Intervals are in DNF.
+            for (const IntervalReqExpr::Node& intervalDNF : conjunctReq.intervals) {
+                std::vector<double> disjSelectivities;
+
+                const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes();
+                for (const auto& disjunct : disjuncts) {
+                    const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes();
+
+                    std::vector<double> conjSelectivities;
+                    for (const auto& conjunct : conjuncts) {
+                        const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr();
+                        auto cardinality =
+                            ce::estimateIntervalCardinality(conjunctReq.histogram,
+                                                            interval,
+                                                            childResult,
+                                                            conjunctReq.includeScalar);
+
+                        // We may still not have been able to estimate the interval using
+                        // histograms, for instance if the interval bounds were non-Constant. In
+                        // this case, we should fallback to heuristics.
+                        if (cardinality < 0) {
+                            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
+                        }
+
+                        // We have to convert the cardinality to a selectivity. The histogram
+                        // returns the cardinality for the entire collection; however, fewer records
+                        // may be expected at the SargableNode.
+                        conjSelectivities.push_back(cardinality / totalCard);
+                    }
+
+                    auto backoff = ce::conjExponentialBackoff(std::move(conjSelectivities));
+                    disjSelectivities.push_back(backoff);
+                }
+
+                auto backoff = ce::disjExponentialBackoff(std::move(disjSelectivities));
+                topLevelSelectivities.push_back(backoff);
+            }
+        }
+
+        // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
+        if (!topLevelSelectivities.empty()) {
+            auto backoff = ce::conjExponentialBackoff(std::move(topLevelSelectivities));
+            childResult *= backoff;
+        }
+        return childResult;
+    }
+
+    CEType transport(const ABT& n,
+                     const RootNode& node,
+                     const Metadata& metadata,
+                     const cascades::Memo& memo,
+                     const properties::LogicalProps& logicalProps,
+                     CEType childResult,
+                     CEType /*refsResult*/) {
+        // Root node does not change cardinality.
+        return childResult;
+    }
+
+    /**
+     * Use fallback for other ABT types.
+     */
+    template <typename T, typename... Ts>
+    CEType transport(const ABT& n,
+                     const T& /*node*/,
+                     const Metadata& metadata,
+                     const cascades::Memo& memo,
+                     const properties::LogicalProps& logicalProps,
+                     Ts&&...) {
+        if (canBeLogicalNode<T>()) {
+            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
+        }
+        return 0.0;
+    }
+
+private:
+    std::shared_ptr<stats::CollectionStatistics> _stats;
+    std::unique_ptr<cascades::CardinalityEstimator> _fallbackCE;
+
+    // This is a special interval indicating that we expect to use $elemMatch semantics when
+    // estimating the current path.
+    const IntervalReqExpr::Node _arrayOnlyInterval;
+};
+
+HistogramEstimator::HistogramEstimator(std::shared_ptr<stats::CollectionStatistics> stats,
+                                       std::unique_ptr<cascades::CardinalityEstimator> fallbackCE)
+    : _transport(std::make_unique<HistogramTransport>(stats, std::move(fallbackCE))) {}
+
+HistogramEstimator::~HistogramEstimator() {}
+
+CEType HistogramEstimator::deriveCE(const Metadata& metadata,
+                                    const cascades::Memo& memo,
+                                    const properties::LogicalProps& logicalProps,
+                                    const ABT::reference_type logicalNodeRef) const {
+    return algebra::transport<true>(
+        logicalNodeRef, *this->_transport, metadata, memo, logicalProps);
+}
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/histogram_estimator.h b/src/mongo/db/query/ce/histogram_estimator.h
new file mode 100644
index 00000000000..ebcf008bdd3
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram_estimator.h
@@ -0,0 +1,54 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/optimizer/cascades/interfaces.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+
+namespace mongo::optimizer::ce {
+
+class HistogramTransport;
+
+class HistogramEstimator : public cascades::CardinalityEstimator {
+public:
+    HistogramEstimator(std::shared_ptr<stats::CollectionStatistics> stats,
+                       std::unique_ptr<cascades::CardinalityEstimator> fallbackCE);
+    ~HistogramEstimator();
+
+    CEType deriveCE(const Metadata& metadata,
+                    const cascades::Memo& memo,
+                    const properties::LogicalProps& logicalProps,
+                    ABT::reference_type logicalNodeRef) const final;
+
+private:
+    std::unique_ptr<HistogramTransport> _transport;
+};
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/histogram_estimator_test.cpp b/src/mongo/db/query/ce/histogram_estimator_test.cpp
new file mode 100644
index 00000000000..bdc7d95dea4
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram_estimator_test.cpp
@@ -0,0 +1,1161 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/histogram_estimator.h"
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/ce/test_utils.h"
+#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/db/query/sbe_stage_builder_helpers.h"
+#include "mongo/db/query/stats/collection_statistics_mock.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::optimizer::ce {
+namespace {
+namespace value = sbe::value;
+
+using stats::ArrayHistogram;
+using stats::Bucket;
+using stats::CollectionStatistics;
+using stats::CollectionStatisticsMock;
+using stats::ScalarHistogram;
+using stats::TypeCounts;
+
+std::string collName("test");
+
+class CEHistogramTester : public CETester {
+public:
+    CEHistogramTester(std::string collName, double numRecords)
+        : CETester(collName, numRecords), _stats{new CollectionStatisticsMock(numRecords)} {}
+
+    void addHistogram(const std::string& path, std::shared_ptr<ArrayHistogram> histogram) {
+        _stats->addHistogram(path, histogram);
+    }
+
+protected:
+    std::unique_ptr<cascades::CardinalityEstimator> getEstimator() const override {
+        // making a copy of CollecitonStatistics to override
+        return std::make_unique<HistogramEstimator>(_stats, makeHeuristicCE());
+    }
+
+private:
+    std::shared_ptr<CollectionStatistics> _stats;
+};
+
+struct TestBucket {
+    Value val;
+    double equalFreq;
+    double rangeFreq = 0.0;
+    double ndv = 1.0; /* ndv including bucket boundary*/
+};
+using TestBuckets = std::vector<TestBucket>;
+
+ScalarHistogram getHistogramFromData(TestBuckets testBuckets) {
+    sbe::value::Array bounds;
+    std::vector<Bucket> buckets;
+
+    double cumulativeFreq = 0.0;
+    double cumulativeNDV = 0.0;
+    for (const auto& b : testBuckets) {
+        // Add bucket boundary value to bounds.
+        auto [tag, val] = stage_builder::makeValue(b.val);
+        bounds.push_back(tag, val);
+
+        cumulativeFreq += b.equalFreq + b.rangeFreq;
+        cumulativeNDV += b.ndv;
+
+        // Create a histogram bucket.
+        buckets.emplace_back(b.equalFreq,
+                             b.rangeFreq,
+                             cumulativeFreq,
+                             b.ndv - 1, /* ndv excluding bucket boundary*/
+                             cumulativeNDV);
+    }
+
+    return ScalarHistogram(std::move(bounds), std::move(buckets));
+}
+
+TypeCounts getTypeCountsFromData(TestBuckets testBuckets) {
+    TypeCounts typeCounts;
+    for (const auto& b : testBuckets) {
+        // Add bucket boundary value to bounds.
+        auto sbeVal = stage_builder::makeValue(b.val);
+        auto [tag, val] = sbeVal;
+
+        // Increment count of values for each type tag.
+        if (auto it = typeCounts.find(tag); it != typeCounts.end()) {
+            it->second += b.equalFreq + b.rangeFreq;
+        } else {
+            typeCounts[tag] = b.equalFreq + b.rangeFreq;
+        }
+    }
+    return typeCounts;
+}
+
+std::unique_ptr<ArrayHistogram> getArrayHistogramFromData(TestBuckets testBuckets,
+                                                          TypeCounts additionalScalarData = {}) {
+    TypeCounts dataTypeCounts = getTypeCountsFromData(testBuckets);
+    dataTypeCounts.merge(additionalScalarData);
+    return std::make_unique<ArrayHistogram>(getHistogramFromData(testBuckets),
+                                            std::move(dataTypeCounts));
+}
+
+std::unique_ptr<ArrayHistogram> getArrayHistogramFromData(TestBuckets scalarBuckets,
+                                                          TestBuckets arrayUniqueBuckets,
+                                                          TestBuckets arrayMinBuckets,
+                                                          TestBuckets arrayMaxBuckets,
+                                                          TypeCounts arrayTypeCounts,
+                                                          double totalArrayCount,
+                                                          double emptyArrayCount = 0,
+                                                          TypeCounts additionalScalarData = {}) {
+
+    // Set up scalar type counts.
+    TypeCounts dataTypeCounts = getTypeCountsFromData(scalarBuckets);
+    dataTypeCounts[value::TypeTags::Array] = totalArrayCount;
+    dataTypeCounts.merge(additionalScalarData);
+
+    // Set up histograms.
+    auto arrayMinHist = getHistogramFromData(arrayMinBuckets);
+    auto arrayMaxHist = getHistogramFromData(arrayMaxBuckets);
+    return std::make_unique<ArrayHistogram>(getHistogramFromData(scalarBuckets),
+                                            std::move(dataTypeCounts),
+                                            getHistogramFromData(arrayUniqueBuckets),
+                                            std::move(arrayMinHist),
+                                            std::move(arrayMaxHist),
+                                            std::move(arrayTypeCounts),
+                                            emptyArrayCount);
+}
+
+TEST(CEHistogramTest, AssertSmallMaxDiffHistogramEstimatesAtomicPredicates) {
+    constexpr auto kCollCard = 8;
+    CEHistogramTester t(collName, kCollCard);
+
+    // Construct a histogram with two buckets: one for 3 ints equal to 1, another for 5 strings
+    // equal to "ing".
+    const std::string& str = "ing";
+    t.addHistogram("a",
+                   getArrayHistogramFromData({
+                       {Value(1), 3 /* frequency */},
+                       {Value(str), 5 /* frequency */},
+                   }));
+
+    // Test $eq.
+    ASSERT_MATCH_CE(t, "{a: {$eq: 1}}", 3.0);
+    ASSERT_MATCH_CE(t, "{a: {$eq: 2}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$eq: \"ing\"}}", 5.0);
+    ASSERT_MATCH_CE(t, "{a: {$eq: \"foo\"}}", 0.0);
+
+    // Test case when field doesn't match fieldpath of histogram. This falls back to heuristics.
+    ASSERT_MATCH_CE(t, "{b: {$eq: 1}}", 2.82843);
+
+    // Test $gt.
+    ASSERT_MATCH_CE(t, "{a: {$gt: 3}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 1}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 0}}", 3.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: \"bar\"}}", 5.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: \"ing\"}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: \"zap\"}}", 0.0);
+
+    // Test $lt.
+    ASSERT_MATCH_CE(t, "{a: {$lt: 3}}", 3.0);
+    ASSERT_MATCH_CE(t, "{a: {$lt: 1}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$lt: 0}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$lt: \"bar\"}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$lt: \"ing\"}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$lt: \"zap\"}}", 5.0);
+
+    // Test $gte.
+    ASSERT_MATCH_CE(t, "{a: {$gte: 3}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$gte: 1}}", 3.0);
+    ASSERT_MATCH_CE(t, "{a: {$gte: 0}}", 3.0);
+    ASSERT_MATCH_CE(t, "{a: {$gte: \"bar\"}}", 5.0);
+    ASSERT_MATCH_CE(t, "{a: {$gte: \"ing\"}}", 5.0);
+    ASSERT_MATCH_CE(t, "{a: {$gte: \"zap\"}}", 0.0);
+
+    // Test $lte.
+    ASSERT_MATCH_CE(t, "{a: {$lte: 3}}", 3.0);
+    ASSERT_MATCH_CE(t, "{a: {$lte: 1}}", 3.0);
+    ASSERT_MATCH_CE(t, "{a: {$lte: 0}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$lte: \"bar\"}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$lte: \"ing\"}}", 5.0);
+    ASSERT_MATCH_CE(t, "{a: {$lte: \"zap\"}}", 5.0);
+}
+
+TEST(CEHistogramTest, AssertSmallHistogramEstimatesComplexPredicates) {
+    constexpr auto kCollCard = 9;
+    CEHistogramTester t(collName, kCollCard);
+
+    // Construct a histogram with three int buckets for field 'a'.
+    t.addHistogram("a",
+                   getArrayHistogramFromData({
+                       {Value(1), 3 /* frequency */},
+                       {Value(2), 5 /* frequency */},
+                       {Value(3), 1 /* frequency */},
+                   }));
+
+    // Construct a histogram with two int buckets for field 'b'.
+    t.addHistogram("b",
+                   getArrayHistogramFromData({
+                       {Value(22), 3 /* frequency */},
+                       {Value(33), 6 /* frequency */},
+                   }));
+
+
+    // Test simple conjunctions on one field. Note the first example: the range we expect to see
+    // here is (1, 3); however, the structure in the SargableNode gives us a conjunction of two
+    // intervals instead: (1, "") ^ (nan, 3) This is then estimated using exponential backoff to
+    // give us a less accurate result. The correct cardinality here would be 5.
+    ASSERT_MATCH_CE(t, "{a: {$gt: 1}, a: {$lt: 3}}", 5.66);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 1}, a: {$lte: 3}}", 6.0);
+    ASSERT_MATCH_CE(t, "{a: {$gte: 1}, a: {$lt: 3}}", 8.0);
+    ASSERT_MATCH_CE(t, "{a: {$gte: 1}, a: {$lte: 3}}", 9.0);
+
+    // Test ranges which exclude each other.
+    ASSERT_MATCH_CE(t, "{a: {$lt: 1}, a: {$gt: 3}}", 0.0);
+
+    // Test overlapping ranges. This is a similar case to {a: {$gt: 1}, a: {$lt: 3}} above: we
+    // expect to see the range [2, 2]; instead, we see the range [nan, 2] ^ [2, "").
+    ASSERT_MATCH_CE(t, "{a: {$lte: 2}, a: {$gte: 2}}", 5.66);
+
+    // Test conjunctions over multiple fields for which we have histograms. Here we expect a
+    // cardinality estimated by exponential backoff.
+    ASSERT_MATCH_CE(t, "{a: {$eq: 2}, b: {$eq: 22}}", 2.24);
+    ASSERT_MATCH_CE(t, "{a: {$eq: 11}, b: {$eq: 22}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 11}, a: {$lte: 100}, b: {$eq: 22}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$lt: 3}, a: {$gte: 1}, b: {$lt: 100}, b: {$gt: 30}}", 5.66);
+
+    // Test conjunctions over multiple fields for which we may not have histograms. This falls back
+    // to heuristic estimation.
+    ASSERT_MATCH_CE(t, "{a: {$eq: 2}, c: {$eq: 1}}", 1.73205);
+    ASSERT_MATCH_CE(t, "{c: {$eq: 2}, d: {$eq: 22}}", 1.73205);
+}
+
+TEST(CEHistogramTest, SanityTestEmptyHistogram) {
+    constexpr auto kCollCard = 0;
+    CEHistogramTester t(collName, kCollCard);
+    t.addHistogram("empty", std::make_unique<ArrayHistogram>());
+
+    ASSERT_MATCH_CE(t, "{empty: {$eq: 1.0}}", 0.0);
+    ASSERT_MATCH_CE(t, "{empty: {$lt: 1.0}, empty: {$gt: 0.0}}", 0.0);
+    ASSERT_MATCH_CE(t, "{empty: {$eq: 1.0}, other: {$eq: \"anything\"}}", 0.0);
+    ASSERT_MATCH_CE(t, "{other: {$eq: \"anything\"}, empty: {$eq: 1.0}}", 0.0);
+}
+
+TEST(CEHistogramTest, TestOneBucketOneIntHistogram) {
+    constexpr auto kCollCard = 50;
+    CEHistogramTester t(collName, kCollCard);
+
+    // Create a histogram with a single bucket that contains exactly one int (42) with a frequency
+    // of 50 (equal to the collection cardinality).
+    t.addHistogram("soloInt",
+                   getArrayHistogramFromData({
+                       {Value(42), kCollCard /* frequency */},
+                   }));
+
+    // Check against a variety of intervals that include 42 as a bound.
+    ASSERT_MATCH_CE(t, "{soloInt: {$eq: 42}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$lt: 42}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$lte: 42}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lt: 42}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lte: 42}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lt: 42}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lte: 42}}", kCollCard);
+
+    // Check against a variety of intervals that include 42 only as one bound.
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lt: 43}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 42}, soloInt: {$lte: 43}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lt: 43}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 42}, soloInt: {$lte: 43}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lt: 42}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lte: 42}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lt: 42}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lte: 42}}", kCollCard);
+
+    // Check against a variety of intervals close to 42 using a lower bound of 41 and a higher bound
+    // of 43.
+    ASSERT_MATCH_CE(t, "{soloInt: {$eq: 41}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$eq: 43}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$lt: 43}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$lte: 43}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lt: 43}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lt: 43}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gt: 41}, soloInt: {$lte: 43}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{soloInt: {$gte: 41}, soloInt: {$lte: 43}}", kCollCard);
+
+    // Check against different types.
+    ASSERT_MATCH_CE(t, "{soloInt: {$eq: \"42\"}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$lt: \"42\"}}", 0.0);
+    ASSERT_MATCH_CE(t, "{soloInt: {$lt: 42.1}}", kCollCard);
+}
+
+TEST(CEHistogramTest, TestOneBoundIntRangeHistogram) {
+    constexpr auto kCollCard = 51;
+    CEHistogramTester t(collName, kCollCard);
+    t.addHistogram("intRange",
+                   getArrayHistogramFromData({
+                       {Value(10), 5 /* frequency */},
+                       {Value(20), 1 /* frequency */, 45 /* range frequency */, 10 /* ndv */},
+                   }));
+
+    // Test ranges that overlap only with the lower bound.
+    // Note: 5 values equal 10.
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 10}}", 5.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$lte: 10}}", 5.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$lte: 10}, intRange: {$gte: 10}}", 5.0);
+
+    // Test ranges that overlap only with the upper bound.
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 11}}", 5.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 15}}", 5.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 15.5}}", 5.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 20}}", 1.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 20}}", 1.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 10}}", 46.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 15}}", 28.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 15}}", 23.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 11}, intRange: {$lte: 20}}", 41.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 11}, intRange: {$lte: 20}}", 41.5);
+
+    // Test ranges that partially overlap with the entire histogram.
+    ASSERT_MATCH_CE(t, "{intRange: {$lt: 11}}", 9.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$lt: 15}}", 22.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$lte: 15}}", 27.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 8}, intRange: {$lte: 15}}", 27.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 8}, intRange: {$lte: 15}}", 27.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 8}, intRange: {$lt: 15}}", 22.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 8}, intRange: {$lte: 15}}", 27.5);
+
+    // Test ranges that include all values in the histogram.
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 10}, intRange: {$lte: 20}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 1}, intRange: {$lte: 30}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 1}, intRange: {$lt: 30}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 1}, intRange: {$lte: 30}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 1}, intRange: {$lt: 30}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{intRange: {$lt: 100}}", kCollCard);
+    ASSERT_MATCH_CE(t, "{intRange: {$lte: 100}}", kCollCard);
+
+    // Test ranges that are fully included in the histogram.
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 10.5}}", 5.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 12.5}}", 5.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 19.36}}", 5.0);
+
+    // Test ranges that don't overlap with the histogram.
+    ASSERT_MATCH_CE(t, "{intRange: {$lt: 10}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$lt: 5}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$lte: 5}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 20.1}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$eq: 21}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 21}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 20}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 100}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 30}, intRange: {$lte: 50}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 30}, intRange: {$lt: 50}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 30}, intRange: {$lt: 50}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 30}, intRange: {$lte: 50}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}, intRange: {$lte: 5}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}, intRange: {$lt: 5}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 0}, intRange: {$lt: 5}}", 0.0);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 0}, intRange: {$lte: 5}}", 0.0);
+
+    // Because we don't specify any indexes here, these intervals do not go through simplification.
+    // This means that instead of having one key in the requirements map of the generated sargable
+    // node corresponding to the path "intRange", we have two keys and two ranges, both
+    // corresponding to the same path. As a consequence, we combine the estimates for the intervals
+    // using exponential backoff, which results in an overestimate.
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 11}, intRange: {$lt: 20}}", 41.09);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 11}, intRange: {$lt: 20}}", 41.09);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lt: 15}}", 19.16);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lt: 15}}", 20.42);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lte: 15}}", 23.42);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lte: 15}}", 24.96);
+    ASSERT_MATCH_CE(t, "{intRange: {$lt: 19}, intRange: {$gt: 11}}", 36.53);
+
+    // When we specify that there is a non-multikey index on 'intRange', we expect to see interval
+    // simplification occurring, which should provide a better estimate for the following ranges.
+    t.setIndexes(
+        {{"intRangeIndex",
+          makeIndexDefinition("intRange", CollationOp::Ascending, /* isMultiKey */ false)}});
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 11}, intRange: {$lt: 20}}", 40.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 11}, intRange: {$lt: 20}}", 40.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lt: 15}}", 8.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lt: 15}}", 13.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gt: 12}, intRange: {$lte: 15}}", 13.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$gte: 12}, intRange: {$lte: 15}}", 18.5);
+    ASSERT_MATCH_CE(t, "{intRange: {$lt: 19}, intRange: {$gt: 11}}", 31.0);
+}
+
+TEST(CEHistogramTest, TestHistogramOnNestedPaths) {
+    constexpr auto kCollCard = 50;
+    CEHistogramTester t(collName, kCollCard);
+
+    // Create a histogram with a single bucket that contains exactly one int (42) with a frequency
+    // of 50 (equal to the collection cardinality).
+    t.addHistogram("path",
+                   getArrayHistogramFromData({
+                       {Value(42), kCollCard /* frequency */},
+                   }));
+    t.addHistogram("a.histogram.path",
+                   getArrayHistogramFromData({
+                       {Value(42), kCollCard /* frequency */},
+                   }));
+
+    ASSERT_MATCH_CE(t, "{\"not.a.histogram.path\": {$eq: 42}}", 7.071 /* heuristic */);
+    ASSERT_MATCH_CE(t, "{\"a.histogram.path\": {$eq: 42}}", kCollCard);
+    ASSERT_MATCH_CE(
+        t, "{\"a.histogram.path.with.no.histogram\": {$eq: 42}}", 7.071 /* heuristic */);
+
+    // When a predicate can't be precisely translated to a SargableNode (such as $elemMatch on a
+    // dotted path), we may still be able to translate an over-approximation. We generate a
+    // SargableNode with all predicates marked perfOnly, and keep the original Filter. The Filter
+    // ensures the results are correct, while the SargableNode hopefully will be answerable by an
+    // index.
+    //
+    // On the logical level, perfOnly predicates don't do anything, so we don't consider them in
+    // cardinality estimates. But when we split a SargableNode into an indexed part and a fetch
+    // part, we remove the perfOnly flag from the indexed part, and we should consider them to
+    // estimate how many index keys are returned.
+    //
+    // In this test, we want to exercise the histogram estimate for the SargableNode generated by
+    // $elemMatch on a dotted path. So we create an index on this field to ensure the SargableNode
+    // is split, and the predicates marked non-perfOnly.
+    //
+    // We also mark the index multikey, to prevent non-CE rewrites from removing the predicate
+    // entirely. (This scenario could happen if you remove all the arrays, and refresh the
+    // statistics.)
+    IndexDefinition ix{
+        IndexCollationSpec{
+            IndexCollationEntry{
+                makeIndexPath({"a", "histogram", "path"}),
+                CollationOp::Ascending,
+            },
+        },
+        true /* isMultiKey */,
+    };
+    t.setIndexes({{"a_histogram_path_1", std::move(ix)}});
+    ASSERT_MATCH_CE_NODE(t, "{\"a.histogram.path\": {$elemMatch: {$eq: 42}}}", 0.0, isSargable2);
+}
+
+TEST(CEHistogramTest, TestArrayHistogramOnAtomicPredicates) {
+    constexpr auto kCollCard = 6;
+    CEHistogramTester t(collName, kCollCard);
+    t.addHistogram(
+        "a",
+        // Generate a histogram for this data:
+        // {a: 1}, {a: 2}, {a: [1, 2, 3, 2, 2]}, {a: [10]}, {a: [2, 3, 3, 4, 5, 5, 6]}, {a: []}
+        //  - scalars: [1, 2]
+        //  - unique values: [1, 2, 3], [10], [2, 3, 4, 5, 6]
+        //      -> [1, 2, 2, 3, 3, 4, 5, 6, 10]
+        //  - min values: [1], [10], [2] -> [1, 1, 2, 2, 10]
+        //  - max values: [3], [10], [6] -> [1, 2, 3, 6, 10]
+        getArrayHistogramFromData(
+            {// Scalar buckets.
+             {Value(1), 1 /* frequency */},
+             {Value(2), 1 /* frequency */}},
+            {
+                // Array unique buckets.
+                {Value(1), 1 /* frequency */},
+                {Value(2), 2 /* frequency */},
+                {Value(3), 2 /* frequency */},
+                {Value(4), 1 /* frequency */},
+                {Value(5), 1 /* frequency */},
+                {Value(6), 1 /* frequency */},
+                {Value(10), 1 /* frequency */},
+            },
+            {
+                // Array min buckets.
+                {Value(1), 1 /* frequency */},
+                {Value(2), 1 /* frequency */},
+                {Value(10), 1 /* frequency */},
+            },
+            {
+                // Array max buckets.
+                {Value(3), 1 /* frequency */},
+                {Value(6), 1 /* frequency */},
+                {Value(10), 1 /* frequency */},
+            },
+            {{sbe::value::TypeTags::NumberInt32, 13}},  // Array type counts.
+            3,                                          // 3 arrays total.
+            1                                           // 1 empty array.
+            ));
+
+    // Test simple predicates against 'a'. Note: in the $elemMatch case, we exclude scalar
+    // estimates. Without $elemMatch, we add the array histogram and scalar histogram estimates
+    // together.
+
+    // Test equality predicates.
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$eq: 0}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 1}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 3.0 /* CE */, 2.0 /* $elemMatch CE */, "a", "{$eq: 2}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.0 /* $elemMatch CE */, "a", "{$eq: 3}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 4}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 6}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$eq: 10}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$eq: 11}");
+
+    // Test histogram boundary values.
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$lt: 1}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$lte: 1}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$gt: 10}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$gte: 10}");
+
+    ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lte: 10}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lt: 10}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gt: 1}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 1}");
+
+    ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lte: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lt: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.0 /* $elemMatch CE */, "a", "{$gt: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.40822 /* $elemMatch CE */, "a", "{$gte: 5}");
+
+    ASSERT_EQ_ELEMMATCH_CE(t, 2.45 /* CE */, 2.40822 /* $elemMatch CE */, "a", "{$gt: 2, $lt: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 3.27 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 2, $lt: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 2.45 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gt: 2, $lte: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 3.27 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 2, $lte: 5}");
+}
+
+TEST(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
+    constexpr auto kCollCard = 175;
+    CEHistogramTester t(collName, kCollCard);
+
+    // A scalar histogram with values in the range [1,10], most of which are in the middle bucket.
+    t.addHistogram("scalar",
+                   getArrayHistogramFromData({
+                       {Value(1), 10 /* frequency */},
+                       {Value(2), 10 /* frequency */},
+                       {Value(3), 20 /* frequency */, 120 /* range frequency */, 5 /* ndv */},
+                       {Value(8), 5 /* frequency */, 10 /* range frequency */, 3 /* ndv */},
+                   }));
+
+    // An array histogram built on the following arrays with 35 occurrences of each:
+    // [{[1, 2, 3]: 35}, {[5, 5, 5, 5, 5]: 35}, {[6]: 35}, {[]: 35}, {[8, 9, 10]: 35}]
+    t.addHistogram(
+        "array",
+        getArrayHistogramFromData(
+            {/* No scalar buckets. */},
+            {
+                // Array unique buckets.
+                {Value(2), 35 /* frequency */, 35 /* range frequency */, 2 /* ndv */},
+                {Value(5), 35 /* frequency */, 35 /* range frequency */, 2 /* ndv */},
+                {Value(6), 35 /* frequency */},
+                {Value(10), 35 /* frequency */, 105 /* range frequency */, 3 /* ndv */},
+            },
+            {
+                // Array min buckets.
+                {Value(1), 35 /* frequency */},
+                {Value(5), 35 /* frequency */},
+                {Value(6), 35 /* frequency */},
+                {Value(8), 35 /* frequency */},
+            },
+            {
+                // Array max buckets.
+                {Value(3), 35 /* frequency */},
+                {Value(5), 35 /* frequency */},
+                {Value(6), 35 /* frequency */},
+                {Value(10), 35 /* frequency */},
+            },
+            {{sbe::value::TypeTags::NumberInt32, 420}},  // Array type count = 3*35+5*35+1*35+3*35.
+            kCollCard,                                   // kCollCard arrays total.
+            35                                           // 35 empty arrays
+            ));
+
+    t.addHistogram(
+        "mixed",
+        // The mixed histogram has 87 scalars that follow approximately the same distribution as
+        // in the pure scalar case, and 88 arrays with the following distribution:
+        //  [{[1, 2, 3]: 17}, {[5, 5, 5, 5, 5]: 17}, {[6]: 17}, {[]: 20}, {[8, 9, 10]: 17}]
+        getArrayHistogramFromData(
+            {
+                // Scalar buckets. These are half the number of values from the "scalar" histogram.
+                {Value(1), 5 /* frequency */},
+                {Value(2), 5 /* frequency */},
+                {Value(3), 10 /* frequency */, 60 /* range frequency */, 5 /* ndv */},
+                {Value(8), 2 /* frequency */, 5 /* range frequency */, 3 /* ndv */},
+            },
+            {
+                // Array unique buckets.
+                {Value(2), 17 /* frequency */, 17 /* range frequency */, 2 /* ndv */},
+                {Value(5), 17 /* frequency */, 17 /* range frequency */, 2 /* ndv */},
+                {Value(6), 17 /* frequency */},
+                {Value(10), 17 /* frequency */, 34 /* range frequency */, 3 /* ndv */},
+            },
+            {
+                // Array min buckets.
+                {Value(1), 17 /* frequency */},
+                {Value(5), 17 /* frequency */},
+                {Value(6), 17 /* frequency */},
+                {Value(8), 17 /* frequency */},
+            },
+            {
+                // Array max buckets.
+                {Value(3), 17 /* frequency */},
+                {Value(5), 17 /* frequency */},
+                {Value(6), 17 /* frequency */},
+                {Value(10), 17 /* frequency */},
+            },
+            {{sbe::value::TypeTags::NumberInt32, 289}},  // Array type count = 3*17+5*17+6*17+3*17
+            88,                                          // kCollCard arrays total.
+            20                                           // 20 empty arrays.
+            ));
+
+    // Test cardinality of individual predicates.
+    ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 0.0 /* $elemMatch CE */, "scalar", "{$eq: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 35.0 /* CE */, 35.0 /* $elemMatch CE */, "array", "{$eq: 5}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 19.5 /* CE */, 17.0 /* $elemMatch CE */, "mixed", "{$eq: 5}");
+
+    // Test cardinality of predicate combinations; the following tests make sure we correctly track
+    // which paths have $elemMatches and which don't. Some notes:
+    //  - Whenever we use 'scalar' + $elemMatch, we expect an estimate of 0 because $elemMatch never
+    // returns documents on non-array paths.
+    //  - Whenever we use 'mixed' + $elemMatch, we expect the estimate to decrease because we omit
+    // scalar values in 'mixed' from our estimate.
+    //  - We do not expect the estimate on 'array' to be affected by the presence of $elemMatch,
+    // since we only have array values for this field.
+
+    // Composite predicate on 'scalar' and 'array' fields.
+    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, array: {$eq: 5}}", 2.236);
+    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, array: {$elemMatch: {$eq: 5}}}", 2.236);
+    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 0.0);
+
+    // Composite predicate on 'mixed' and 'array' fields.
+    ASSERT_MATCH_CE(t, "{mixed: {$eq: 5}, array: {$eq: 5}}", 8.721);
+    ASSERT_MATCH_CE(t, "{mixed: {$eq: 5}, array: {$elemMatch: {$eq: 5}}}", 8.721);
+    ASSERT_MATCH_CE(t, "{mixed: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 7.603);
+
+    // Composite predicate on 'scalar' and 'mixed' fields.
+    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$eq: 5}}", 1.669);
+    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$elemMatch: {$eq: 5}}}", 1.559);
+    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$eq: 5}}", 0.0);
+
+    // Composite predicate on all three fields without '$elemMatch' on 'array'.
+    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$eq: 5}, array: {$eq: 5}}", 1.116);
+    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 1.042);
+    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$eq: 5}, array: {$eq: 5}}", 0.0);
+
+    // Composite predicate on all three fields with '$elemMatch' on 'array' (same expected results
+    // as above).
+    ASSERT_MATCH_CE(t, "{scalar: {$eq: 5}, mixed: {$eq: 5}, array: {$elemMatch: {$eq: 5}}}", 1.116);
+
+    // Test case where the same path has both $match and $elemMatch (same as $elemMatch case).
+    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, scalar: {$eq: 5}}", 0.0);
+    ASSERT_MATCH_CE(t, "{mixed: {$elemMatch: {$eq: 5}}, mixed: {$eq: 5}}", 17.0);
+    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 35.0);
+
+    // Test case with multiple predicates and ranges.
+    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$lt: 5}}", 70.2156);
+    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$gt: 5}}", 28.4848);
+
+    // Test multiple $elemMatches.
+    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, array: {$elemMatch: {$eq: 5}}}", 0.0);
+    ASSERT_MATCH_CE(t, "{mixed: {$elemMatch: {$eq: 5}}, array: {$elemMatch: {$eq: 5}}}", 7.603);
+    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$elemMatch: {$eq: 5}}}", 0.0);
+    ASSERT_MATCH_CE(
+        t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 0.0);
+    ASSERT_MATCH_CE(
+        t,
+        "{scalar: {$eq: 5}, mixed: {$elemMatch: {$eq: 5}}, array: {$elemMatch: {$eq: 5}}}",
+        1.042);
+    ASSERT_MATCH_CE(
+        t, "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$eq: 5}, array: {$elemMatch: {$eq: 5}}}", 0.0);
+    ASSERT_MATCH_CE(t,
+                    "{scalar: {$elemMatch: {$eq: 5}}, mixed: {$elemMatch: {$eq: 5}}, array: "
+                    "{$elemMatch: {$eq: 5}}}",
+                    0.0);
+    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$lt: 5}}}", 34.1434);
+    ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$gt: 5}}}", 45.5246);
+
+    // Verify that we still return an estimate of 0.0 for any $elemMatch predicate on a scalar
+    // field when we have a non-multikey index.
+    t.setIndexes({{"aScalarIndex",
+                   makeIndexDefinition("scalar", CollationOp::Ascending, /* isMultiKey */ false)}});
+    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}}", 0.0);
+    ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$gt: 1, $lt: 10}}}", 0.0);
+
+    // Test how we estimate singular PathArr sargable predicate.
+    ASSERT_MATCH_CE_NODE(t, "{array: {$elemMatch: {}}}", 175.0, isSargable);
+    ASSERT_MATCH_CE_NODE(t, "{mixed: {$elemMatch: {}}}", 88.0, isSargable);
+
+    // Take into account both empty and non-empty arrays.
+    auto makePathArrABT = [&](const FieldNameType& fieldName) {
+        const ProjectionName scanProjection{"scan_0"};
+        auto scanNode = make<ScanNode>(scanProjection, collName);
+        auto filterNode =
+            make<FilterNode>(make<EvalFilter>(make<PathGet>(std::move(fieldName), make<PathArr>()),
+                                              make<Variable>(scanProjection)),
+                             std::move(scanNode));
+        return make<RootNode>(
+            properties::ProjectionRequirement{ProjectionNameVector{scanProjection}},
+            std::move(filterNode));
+    };
+
+    // There are no arrays in the 'scalar' field.
+    ABT scalarABT = makePathArrABT("scalar");
+    ASSERT_CE(t, scalarABT, 0.0);
+
+    // About half the values of this field are arrays.
+    ABT mixedABT = makePathArrABT("mixed");
+    ASSERT_CE(t, mixedABT, 88.0);
+
+    // This field is always an array.
+    ABT arrayABT = makePathArrABT("array");
+    ASSERT_CE(t, arrayABT, kCollCard);
+}
+
+TEST(CEHistogramTest, TestMixedElemMatchAndNonElemMatch) {
+    constexpr auto kCollCard = 1;
+    CEHistogramTester t(collName, kCollCard);
+
+    // A very simple histogram encoding a collection with one document {a: [3, 10]}.
+    t.addHistogram("a",
+                   getArrayHistogramFromData({/* No scalar buckets. */},
+                                             {
+                                                 // Array unique buckets.
+                                                 {Value(3), 1 /* frequency */},
+                                                 {Value(10), 1 /* frequency */},
+                                             },
+                                             {
+                                                 // Array min buckets.
+                                                 {Value(3), 1 /* frequency */},
+                                             },
+                                             {
+                                                 // Array max buckets.
+                                                 {Value(10), 1 /* frequency */},
+                                             },
+                                             {{sbe::value::TypeTags::NumberInt32, 2}},
+                                             // Array type counts.
+                                             1,
+                                             0));
+
+    // Tests without indexes.
+    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$gt: 3, $lt: 10}}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10}}", 1.0);
+    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$eq: 3}, $gt: 3, $lt: 10}}", 1.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10, $elemMatch: {$eq: 3}}}", 1.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10, $elemMatch: {$gt: 3, $lt: 10}}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$gt: 3, $lt: 10}, $gt: 3, $lt: 10}}", 0.0);
+
+    // Tests with multikey index (note that the index on "a" must be multikey due to arrays).
+    t.setIndexes(
+        {{"anIndex", makeIndexDefinition("a", CollationOp::Ascending, /* isMultiKey */ true)}});
+    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$gt: 3, $lt: 10}}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10}}", 1.0);
+    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$eq: 3}, $gt: 3, $lt: 10}}", 1.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10, $elemMatch: {$eq: 3}}}", 1.0);
+    ASSERT_MATCH_CE(t, "{a: {$gt: 3, $lt: 10, $elemMatch: {$gt: 3, $lt: 10}}}", 0.0);
+    ASSERT_MATCH_CE(t, "{a: {$elemMatch: {$gt: 3, $lt: 10}, $gt: 3, $lt: 10}}", 0.0);
+}
+
+TEST(CEHistogramTest, TestTypeCounters) {
+    constexpr double kCollCard = 1000.0;
+    CEHistogramTester t(collName, kCollCard);
+
+    // This test is designed such that for each document, we have the following fields:
+    // 1. scalar: Scalar histogram with no buckets, only type-counted data.
+    // 2. array: Array histogram with no buckets, only type-counted data inside of arrays.
+    // 3. mixed: Mixed histogram with no buckets, only type-counted data, both scalars and arrays.
+    constexpr double kNumObj = 200.0;
+    constexpr double kNumNull = 300.0;
+    t.addHistogram("scalar",
+                   getArrayHistogramFromData({/* No histogram data. */},
+                                             {{sbe::value::TypeTags::Object, kNumObj},
+                                              {sbe::value::TypeTags::Null, kNumNull}}));
+    t.addHistogram("array",
+                   getArrayHistogramFromData({/* No scalar buckets. */},
+                                             {/* No array unique buckets. */},
+                                             {/* No array min buckets. */},
+                                             {/* No array max buckets. */},
+                                             {{sbe::value::TypeTags::Object, kNumObj},
+                                              {sbe::value::TypeTags::Null, kNumNull}},
+                                             kCollCard));
+
+    // Count of each type in array type counters for field "mixed".
+    constexpr double kNumObjMA = 50.0;
+    constexpr double kNumNullMA = 100.0;
+    // For the purposes of this test, we have one array of each value of a non-histogrammable type.
+    constexpr double kNumArr = kNumObjMA + kNumNullMA;
+    const TypeCounts mixedArrayTC{{sbe::value::TypeTags::Object, kNumObjMA},
+                                  {sbe::value::TypeTags::Null, kNumNullMA}};
+
+    // Count of each type in scalar type counters for field "mixed".
+    constexpr double kNumObjMS = 150.0;
+    constexpr double kNumNullMS = 200.0;
+    const TypeCounts mixedScalarTC{{sbe::value::TypeTags::Object, kNumObjMS},
+                                   {sbe::value::TypeTags::Null, kNumNullMS}};
+
+    // Quick sanity check of test setup for the "mixed" histogram. The idea is that we want a
+    // portion of objects inside arrays, and the rest as scalars, but we want the total count of
+    // objects to be
+    ASSERT_EQ(kNumObjMA + kNumObjMS, kNumObj);
+    ASSERT_EQ(kNumNullMA + kNumNullMS, kNumNull);
+
+    t.addHistogram("mixed",
+                   getArrayHistogramFromData({/* No scalar buckets. */},
+                                             {/* No array unique buckets. */},
+                                             {/* No array min buckets. */},
+                                             {/* No array max buckets. */},
+                                             mixedArrayTC,
+                                             kNumArr,
+                                             0 /* Empty array count. */,
+                                             mixedScalarTC));
+
+    // Set up indexes.
+    t.setIndexes({{"scalarIndex",
+                   makeIndexDefinition("scalar", CollationOp::Ascending, /* isMultiKey */ false)}});
+    t.setIndexes({{"arrayIndex",
+                   makeIndexDefinition("array", CollationOp::Ascending, /* isMultiKey */ true)}});
+    t.setIndexes({{"mixedIndex",
+                   makeIndexDefinition("mixed", CollationOp::Ascending, /* isMultiKey */ true)}});
+
+    // Tests for scalar type counts only.
+    // For object-only intervals in a scalar histogram, we always return object count, no matter
+    // what the bounds are. Since we have a scalar histogram for "scalar", we expect all $elemMatch
+    // queries to have a cardinality of 0.
+
+    // Test object equality.
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$eq: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$eq: {a: 1}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$eq: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$gt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$gte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$lte: {b: 2, c: 3}}");
+
+    // Test intervals including the empty object. Note that range queries on objects do not generate
+    // point equalities, so these fall back onto logic in interval estimation that identifies that
+    // the generated intervals are subsets of the object type interval. Note: we don't even generate
+    // a SargableNode for the first case. The generated bounds are:
+    // [{}, {}) because {} is the "minimum" value for the object type.
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "scalar", "{$lt: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$gt: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$gte: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "scalar", "{$lte: {}}");
+
+    // Rather than combining the intervals together, in the following cases we generate two
+    // object-only intervals in the requirements map with the following bounds. Each individual
+    // interval is estimated as having a cardinality of 'kNumObj', before we apply conjunctive
+    // exponential backoff to combine them.
+    constexpr double k2ObjCard = 89.4427;  // == 200/1000 * sqrt(200/1000) * 1000
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {}, $lt: {b: 2, c: 3}}");
+
+    // Test intervals including {a: 1}. Similar to the above case, we have two intervals in the
+    // requirements map.
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {a: 1}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {a: 1}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {a: 1}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {a: 1}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {a: 1}, $lte: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {a: 1}, $lte: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gte: {a: 1}, $lt: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, 0.0, "scalar", "{$gt: {a: 1}, $lt: {a: 3}}");
+
+    // Test that for null, we always return null count.
+    // Note that for ranges including null (e.g. {$lt: null}) we don't generate any SargableNodes.
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumNull, 0.0, "scalar", "{$eq: null}");
+
+    // TODO SERVER-70936: Add tests for booleans.
+    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, 0.0, "scalar", "{$eq: true}");
+    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, 0.0, "scalar", "{$eq: false}");
+
+    // Tests for array type counts only.
+    // For object-only intervals in an array histogram, if we're using $elemMatch on an object-only
+    // interval, we always return object count. While we have no scalar type counts for "array",
+    // non-$elemMatch queries should also match objects embedded in arrays, so we still return
+    // object count in that case.
+
+    // Test object equality.
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$eq: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$eq: {a: 1}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$eq: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$gt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$gte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$lte: {b: 2, c: 3}}");
+
+    // Test intervals including the empty object.
+    // Note: we don't even generate a SargableNode for the first case. The generated bounds are:
+    // [{}, {}) because {} is the "minimum" value for the object type.
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "array", "{$lt: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$gt: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$gte: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObj, "array", "{$lte: {}}");
+
+    // Similar to above, here we have two object intervals for non-$elemMatch queries. However, for
+    // $elemMatch queries, we have the following intervals in the requirements map:
+    //  1. [[], BinData(0, )) with CE 1000
+    //  2. The actual object interval, e.g. ({}, {b: 2, c: 3}] with CE 200
+    constexpr double kArrEMCard = kNumObj;  // == 200/1000 * sqrt(1000/1000) * 1000
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {}, $lt: {b: 2, c: 3}}");
+
+    // Test intervals including {a: 1}; similar to above, we have two object intervals.
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {a: 1}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {a: 1}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {a: 1}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {a: 1}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {a: 1}, $lte: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {a: 1}, $lte: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gte: {a: 1}, $lt: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kArrEMCard, "array", "{$gt: {a: 1}, $lt: {a: 3}}");
+
+    // Test that for null, we always return null count.
+    // Note that for ranges including null (e.g. {$lt: null}) we don't generate any SargableNodes.
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumNull, kNumNull, "array", "{$eq: null}");
+
+    // TODO SERVER-70936: Add tests for booleans.
+    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, kNumBool, "array", "{$eq: true}");
+    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, kNumBool, "array", "{$eq: false}");
+
+    // Tests for mixed type counts only. Regular match predicates should be estimated as the sum of
+    // the scalar and array counts (e.g. for objects, 'kNumObj'), while elemMatch predicates
+    // should be estimated without scalars, returning the array type count (for objects this is
+    // 'kNumObjMA').
+
+    // Test object equality.
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$eq: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$eq: {a: 1}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$eq: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$gt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$gte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$lte: {b: 2, c: 3}}");
+
+    // Test intervals including the empty object.
+    // Note: we don't even generate a SargableNode for the first case. The generated bounds are:
+    // [{}, {}) because {} is the "minimum" value for the object type.
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "mixed", "{$lt: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$gt: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$gte: {}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, kNumObjMA, "mixed", "{$lte: {}}");
+
+    // Similar to above, here we have two object intervals for non-$elemMatch queries. However, for
+    // $elemMatch queries, we have the following intervals in the requirements map:
+    //  1. [[], BinData(0, )) with CE 1000
+    //  2. The actual object interval, e.g. ({}, {b: 2, c: 3}] with CE 50
+    constexpr double kMixEMCard = kNumObjMA;  // == 50/1000 * sqrt(1000/1000) * 1000
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {}, $lt: {b: 2, c: 3}}");
+
+    // Test intervals including {a: 1}; similar to above, we have two object intervals.
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {a: 1}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {a: 1}, $lte: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {a: 1}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {a: 1}, $lt: {b: 2, c: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {a: 1}, $lte: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {a: 1}, $lte: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gte: {a: 1}, $lt: {a: 3}}");
+    ASSERT_EQ_ELEMMATCH_CE(t, k2ObjCard, kMixEMCard, "mixed", "{$gt: {a: 1}, $lt: {a: 3}}");
+
+    // Test that for null, we always return null count.
+    // Note that for ranges including null (e.g. {$lt: null}) we don't generate any SargableNodes.
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumNull, kNumNullMA, "mixed", "{$eq: null}");
+
+    // TODO SERVER-70936: Add tests for booleans.
+    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, kNumBoolMA, "mixed", "{$eq: true}");
+    // ASSERT_EQ_ELEMMATCH_CE(t, kNumBool, kNumBoolMA, "mixed", "{$eq: false}");
+
+    // Test combinations of the three fields/ type counters.
+    constexpr double k3ObjCard =
+        59.814;  // == 200/1000 * sqrt(200/1000) * sqrt(sqrt(200/1000)) * 1000
+    constexpr double k4ObjCard = 48.914;
+    ASSERT_MATCH_CE_NODE(t,
+                         "{scalar: {$eq: {a: 1}}, mixed: {$eq: {b: 1}}, array: {$eq: {c: 1}}}",
+                         k3ObjCard,
+                         isSargable3);
+    ASSERT_MATCH_CE_NODE(
+        t,
+        "{scalar: {$eq: {}}, mixed: {$lt: {b: 1}}, array: {$gt: {a: 1}, $lte: {a: 2, b: 4, c: 3}}}",
+        k4ObjCard,
+        isSargable4);
+
+    // Should always get a 0.0 cardinality for an $elemMatch on a scalar predicate.
+    ASSERT_MATCH_CE(t,
+                    "{scalar: {$elemMatch: {$eq: {a: 1}}}, mixed: {$elemMatch: {$eq: {b: 1}}},"
+                    " array: {$elemMatch: {$eq: {c: 1}}}}",
+                    0.0);
+    ASSERT_MATCH_CE(t,
+                    "{scalar: {$elemMatch: {$eq: {}}}, mixed: {$elemMatch: {$lt: {b: 1}}},"
+                    " array: {$elemMatch: {$gt: {a: 1}, $lte: {a: 2, b: 4, c: 3}}}}",
+                    0.0);
+
+    // The 'array' interval estimate is 50, but the 'mixed' interval estimate is 200.
+    constexpr double kArrMixObjEMCard = 22.3607;  // == 50/1000 * sqrt(200/1000) * 1000
+    ASSERT_MATCH_CE_NODE(t,
+                         "{mixed: {$elemMatch: {$eq: {b: 1}}}, array: {$elemMatch: {$eq: {c: 1}}}}",
+                         kArrMixObjEMCard,
+                         isSargable4);
+    ASSERT_MATCH_CE_NODE(t,
+                         "{mixed: {$elemMatch: {$lt: {b: 1}}},"
+                         " array: {$elemMatch: {$gt: {a: 1}, $lte: {a: 2, b: 4, c: 3}}}}",
+                         kArrMixObjEMCard,
+                         isSargable4);
+}
+
+TEST(CEHistogramTest, TestNestedArrayTypeCounterPredicates) {
+    // This test validates the correct behaviour of both the nested-array type counter as well as
+    // combinations of type counters and histogram estimates.
+    constexpr double kCollCard = 1000.0;
+    constexpr double kNumArr = 600.0;      // Total number of arrays.
+    constexpr double kNumNestArr = 500.0;  // Frequency of nested arrays, e.g. [[1, 2, 3]].
+    constexpr double kNumNonNestArr = 100.0;
+    constexpr double kNum1 = 2.0;      // Frequency of 1.
+    constexpr double kNum2 = 3.0;      // Frequency of 2.
+    constexpr double kNum3 = 5.0;      // Frequency of 3.
+    constexpr double kNumArr1 = 20.0;  // Frequency of [1].
+    constexpr double kNumArr2 = 30.0;  // Frequency of [2].
+    constexpr double kNumArr3 = 50.0;  // Frequency of [3].
+    constexpr double kNumObj = 390.0;  // Total number of scalar objects.
+
+    // Sanity test numbers.
+    ASSERT_EQ(kNumArr1 + kNumArr2, kNumArr3);
+    ASSERT_EQ(kNumNonNestArr + kNumNestArr, kNumArr);
+    ASSERT_EQ(kNumObj + kNumArr + kNum1 + kNum2 + kNum3, kCollCard);
+
+    // Define histogram buckets.
+    TestBuckets scalarBuckets{{Value(1), kNum1}, {Value(2), kNum2}, {Value(3), kNum3}};
+    TestBuckets arrUniqueBuckets{{Value(1), kNumArr1}, {Value(2), kNumArr2}, {Value(3), kNumArr3}};
+    TestBuckets arrMinBuckets{{Value(1), kNumArr1}, {Value(2), kNumArr2}, {Value(3), kNumArr3}};
+    TestBuckets arrMaxBuckets{{Value(1), kNumArr1}, {Value(2), kNumArr2}, {Value(3), kNumArr3}};
+
+    // Define type counts.
+    TypeCounts arrayTypeCounts{{sbe::value::TypeTags::Array, kNumNestArr},
+                               {sbe::value::TypeTags::NumberInt32, kNumNonNestArr}};
+    TypeCounts scalarTypeCounts{{sbe::value::TypeTags::Object, kNumObj}};
+
+    CEHistogramTester t(collName, kCollCard);
+    t.addHistogram("na",
+                   getArrayHistogramFromData(std::move(scalarBuckets),
+                                             std::move(arrUniqueBuckets),
+                                             std::move(arrMinBuckets),
+                                             std::move(arrMaxBuckets),
+                                             std::move(arrayTypeCounts),
+                                             kNumArr,
+                                             0 /* Empty array count. */,
+                                             std::move(scalarTypeCounts)));
+    t.setIndexes(
+        {{"index", makeIndexDefinition("na", CollationOp::Ascending, /* isMultiKey */ true)}});
+
+    // Some equality tests on types that are not present in the type counters should return 0.0.
+    // TODO SERVER-70936: Add tests for booleans.
+    // ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "na", "{$eq: false}");
+    // ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "na", "{$eq: true}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "na", "{$eq: null}");
+    // We don't have any objects in arrays, so don't count them.
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumObj, 0.0, "na", "{$eq: {a: 1}}");
+
+    // Quick equality test to see if regular array histogram estimation still works as expected.
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumArr1 + kNum1, kNumArr1, "na", "{$eq: 1}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumArr2 + kNum2, kNumArr2, "na", "{$eq: 2}");
+    ASSERT_EQ_ELEMMATCH_CE(t, kNumArr3 + kNum3, kNumArr3, "na", "{$eq: 3}");
+
+    // Test a range predicate.
+    // - For simple $lt, we correctly return both scalar and array counts that could match.
+    // - For $elemMatch + $lt, we have two entries in the requirements map.
+    //   - The PathArr interval, estimated correctly as 'kNumArr'.
+    //   - The interval {$lt: 3}, estimated as an array histogram range interval.
+    // We then combine the estimates for the two using conjunctive exponential backoff.
+    constexpr double elemMatchRange = 71.5485;
+    ASSERT_EQ_ELEMMATCH_CE(
+        t, kNumArr1 + kNum1 + kNumArr2 + kNum2, elemMatchRange, "na", "{$lt: 3}");
+    ASSERT_EQ_ELEMMATCH_CE(t, 0.0, 0.0, "na", "{$lt: 1}");
+
+    // Test equality to arrays.
+    // - $elemMatch, estimation, as expected, will return the count of nested arrays.
+    // - For the case where we see equality to the array, we have a disjunction of intervals in the
+    // same entry of the SargableNode requirements map. For the case of {$eq: [1]}, for example, we
+    // have: [[1], [1]] U [1, 1]. As a result, we estimate each point interval separately:
+    //   - [[1], [1]]: We estimate the nested array interval as 'kNumNestArr'.
+    //   - [1, 1]: We estimate the regular point interval as 'kNumArr1' + 'kNum1'.
+    // We then combine the results by exponential backoff. Note that we will NOT match {na: 1};
+    // however, because of the way the interval is defined, our estimate suggests that we would.
+    // TODO: is there a way to know this on the CE side?
+    constexpr double kArr1EqCard = 505.531;  // (1 - (1 - 500.0/1000) * sqrt(1 - 22.0/1000)) * 1000
+    constexpr double kArr2EqCard = 508.319;  // (1 - (1 - 500.0/1000) * sqrt(1 - 33.0/1000)) * 1000
+    constexpr double kArr3EqCard = 513.944;  // (1 - (1 - 500.0/1000) * sqrt(1 - 55.0/1000)) * 1000
+    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kArr1EqCard, kNumNestArr, "na", "{$eq: [1]}", isSargable);
+    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kArr2EqCard, kNumNestArr, "na", "{$eq: [2]}", isSargable);
+    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kArr3EqCard, kNumNestArr, "na", "{$eq: [3]}", isSargable);
+    // For the last case, we have the interval [[1, 2, 3], [1, 2, 3]] U [1, 1].
+    // TODO: is this interval semantically correct?
+    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kArr1EqCard, kNumNestArr, "na", "{$eq: [1, 2, 3]}", isSargable);
+
+    // Now, we test the case of nested arrays.
+    // - $elemMatch, once again, returns the number of nested arrays.
+    // - Simple equality generates two intervals. We estimate both intervals using the nested array
+    // type count. For {$eq: [[1, 2, 3]]}, we get:
+    //   - [[1, 2, 3], [1, 2, 3]] U [[[1, 2, 3]]], [[1, 2, 3]]]
+    constexpr double kNestedEqCard =
+        646.447;  // (1 - (1 - 500.0/1000) * sqrt(1 - 500.0/1000)) * 1000
+    ASSERT_EQ_ELEMMATCH_CE_NODE(
+        t, kNestedEqCard, kNumNestArr, "na", "{$eq: [[1, 2, 3]]}", isSargable);
+    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kNestedEqCard, kNumNestArr, "na", "{$eq: [[1]]}", isSargable);
+    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kNestedEqCard, kNumNestArr, "na", "{$eq: [[2]]}", isSargable);
+    ASSERT_EQ_ELEMMATCH_CE_NODE(t, kNestedEqCard, kNumNestArr, "na", "{$eq: [[3]]}", isSargable);
+
+    // Note: we can't convert range queries on arrays to SargableNodes yet. If we ever can, we
+    // should add some more tests here.
+}
+
+TEST(CEHistogramTest, TestFallbackForNonConstIntervals) {
+    // This is a sanity test to validate fallback for an interval with non-const bounds.
+    IntervalRequirement intervalLowNonConst{
+        BoundRequirement(true /*inclusive*/, make<Variable>("v1")),
+        BoundRequirement::makePlusInf()};
+
+    IntervalRequirement intervalHighNonConst{
+        BoundRequirement::makeMinusInf(),
+        BoundRequirement(true /*inclusive*/, make<Variable>("v2"))};
+
+    IntervalRequirement intervalEqNonConst{
+        BoundRequirement(true /*inclusive*/, make<Variable>("v3")),
+        BoundRequirement(true /*inclusive*/, make<Variable>("v3"))};
+
+    const auto estInterval = [](const auto& interval) {
+        ArrayHistogram ah;
+        return estimateIntervalCardinality(
+            ah, interval, 100 /* inputCardinality */, true /* includeScalar */);
+    };
+
+    ASSERT_EQ(estInterval(intervalLowNonConst), -1.0);
+    ASSERT_EQ(estInterval(intervalHighNonConst), -1.0);
+    ASSERT_EQ(estInterval(intervalEqNonConst), -1.0);
+}
+}  // namespace
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/histogram_interpolation_test.cpp b/src/mongo/db/query/ce/histogram_interpolation_test.cpp
new file mode 100644
index 00000000000..4ad9d38b4e0
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram_interpolation_test.cpp
@@ -0,0 +1,508 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/ce/test_utils.h"
+#include "mongo/db/query/sbe_stage_builder_helpers.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::optimizer::ce {
+namespace {
+namespace value = sbe::value;
+
+using stats::ArrayHistogram;
+using stats::ScalarHistogram;
+using stats::TypeCounts;
+
+TEST(EstimatorTest, ManualHistogram) {
+    std::vector<BucketData> data{{0, 1.0, 1.0, 1.0},
+                                 {10, 1.0, 10.0, 5.0},
+                                 {20, 3.0, 15.0, 3.0},
+                                 {30, 1.0, 10.0, 4.0},
+                                 {40, 2.0, 0.0, 0.0},
+                                 {50, 1.0, 10.0, 5.0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    ASSERT_EQ(55.0, getTotals(hist).card);
+
+    ASSERT_EQ(1.0, estimateIntValCard(hist, 0, EstimationType::kEqual));
+    ASSERT_EQ(2.0, estimateIntValCard(hist, 5, EstimationType::kEqual));
+    ASSERT_EQ(0.0, estimateIntValCard(hist, 35, EstimationType::kEqual));
+
+    ASSERT_EQ(15.5, estimateIntValCard(hist, 15, EstimationType::kLess));
+    ASSERT_EQ(20.5, estimateIntValCard(hist, 15, EstimationType::kLessOrEqual));
+    ASSERT_EQ(28, estimateIntValCard(hist, 20, EstimationType::kLess));
+    ASSERT_EQ(31.0, estimateIntValCard(hist, 20, EstimationType::kLessOrEqual));
+
+    ASSERT_EQ(42, estimateIntValCard(hist, 10, EstimationType::kGreater));
+    ASSERT_EQ(43, estimateIntValCard(hist, 10, EstimationType::kGreaterOrEqual));
+    ASSERT_EQ(19, estimateIntValCard(hist, 25, EstimationType::kGreater));
+    ASSERT_EQ(21.5, estimateIntValCard(hist, 25, EstimationType::kGreaterOrEqual));
+}
+
+TEST(EstimatorTest, UniformIntEstimate) {
+    // This hard-codes a maxdiff histogram with 10 buckets built off a uniform int distribution with
+    // a minimum of 0, a maximum of 1000, and 70 distinct values.
+    std::vector<BucketData> data{{2, 1, 0, 0},
+                                 {57, 3, 2, 1},
+                                 {179, 5, 10, 6},
+                                 {317, 5, 9, 6},
+                                 {344, 3, 0, 0},
+                                 {558, 4, 19, 12},
+                                 {656, 2, 4, 3},
+                                 {798, 3, 7, 4},
+                                 {951, 5, 17, 7},
+                                 {986, 1, 0, 0}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    // Predicates over bucket bound.
+    double expectedCard = estimateIntValCard(hist, 558, EstimationType::kEqual);
+    ASSERT_EQ(4.0, expectedCard);
+    expectedCard = estimateIntValCard(hist, 558, EstimationType::kLess);
+    ASSERT_EQ(57.0, expectedCard);
+    expectedCard = estimateIntValCard(hist, 558, EstimationType::kLessOrEqual);
+    ASSERT_EQ(61.0, expectedCard);
+
+    // Predicates over value inside of a bucket.
+
+    // Query: [{$match: {a: {$eq: 530}}}].
+    expectedCard = estimateIntValCard(hist, 530, EstimationType::kEqual);
+    ASSERT_APPROX_EQUAL(1.6, expectedCard, 0.1);  // Actual: 1.
+
+    // Query: [{$match: {a: {$lt: 530}}}].
+    expectedCard = estimateIntValCard(hist, 530, EstimationType::kLess);
+    ASSERT_APPROX_EQUAL(52.9, expectedCard, 0.1);  // Actual: 50.
+
+    // Query: [{$match: {a: {$lte: 530}}}].
+    expectedCard = estimateIntValCard(hist, 530, EstimationType::kLessOrEqual);
+    ASSERT_APPROX_EQUAL(54.5, expectedCard, 0.1);  // Actual: 51.
+
+    // Query: [{$match: {a: {$eq: 400}}}].
+    expectedCard = estimateIntValCard(hist, 400, EstimationType::kEqual);
+    ASSERT_APPROX_EQUAL(1.6, expectedCard, 0.1);  // Actual: 1.
+
+    // Query: [{$match: {a: {$lt: 400}}}].
+    expectedCard = estimateIntValCard(hist, 400, EstimationType::kLess);
+    ASSERT_APPROX_EQUAL(41.3, expectedCard, 0.1);  // Actual: 39.
+
+    // Query: [{$match: {a: {$lte: 400}}}].
+    expectedCard = estimateIntValCard(hist, 400, EstimationType::kLessOrEqual);
+    ASSERT_APPROX_EQUAL(43.0, expectedCard, 0.1);  // Actual: 40.
+}
+
+TEST(EstimatorTest, NormalIntEstimate) {
+    // This hard-codes a maxdiff histogram with 10 buckets built off a normal int distribution with
+    // a minimum of 0, a maximum of 1000, and 70 distinct values.
+    std::vector<BucketData> data{{2, 1, 0, 0},
+                                 {317, 8, 20, 15},
+                                 {344, 2, 0, 0},
+                                 {388, 3, 0, 0},
+                                 {423, 4, 2, 2},
+                                 {579, 4, 12, 8},
+                                 {632, 3, 2, 1},
+                                 {696, 3, 5, 3},
+                                 {790, 5, 4, 2},
+                                 {993, 1, 21, 9}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    // Predicates over bucket bound.
+    double expectedCard = estimateIntValCard(hist, 696, EstimationType::kEqual);
+    ASSERT_EQ(3.0, expectedCard);
+    expectedCard = estimateIntValCard(hist, 696, EstimationType::kLess);
+    ASSERT_EQ(66.0, expectedCard);
+    expectedCard = estimateIntValCard(hist, 696, EstimationType::kLessOrEqual);
+    ASSERT_EQ(69.0, expectedCard);
+
+    // Predicates over value inside of a bucket.
+
+    // Query: [{$match: {a: {$eq: 150}}}].
+    expectedCard = estimateIntValCard(hist, 150, EstimationType::kEqual);
+    ASSERT_APPROX_EQUAL(1.3, expectedCard, 0.1);  // Actual: 1.
+
+    // Query: [{$match: {a: {$lt: 150}}}].
+    expectedCard = estimateIntValCard(hist, 150, EstimationType::kLess);
+    ASSERT_APPROX_EQUAL(9.1, expectedCard, 0.1);  // Actual: 9.
+
+    // Query: [{$match: {a: {$lte: 150}}}].
+    expectedCard = estimateIntValCard(hist, 150, EstimationType::kLessOrEqual);
+    ASSERT_APPROX_EQUAL(10.4, expectedCard, 0.1);  // Actual: 10.
+}
+
+TEST(EstimatorTest, UniformStrEstimate) {
+    // This hard-codes a maxdiff histogram with 10 buckets built off a uniform string distribution
+    // with a minimum length of 3, a maximum length of 5, and 80 distinct values.
+    std::vector<BucketData> data{{{"0ejz", 2, 0, 0},
+                                  {"8DCaq", 3, 4, 4},
+                                  {"Cy5Kw", 3, 3, 3},
+                                  {"WXX7w", 3, 31, 20},
+                                  {"YtzS", 2, 0, 0},
+                                  {"fuK", 5, 13, 7},
+                                  {"gLkp", 3, 0, 0},
+                                  {"ixmVx", 2, 6, 2},
+                                  {"qou", 1, 9, 6},
+                                  {"z2b", 1, 9, 6}}};
+    const ScalarHistogram hist = createHistogram(data);
+
+    // Predicates over value inside of a bucket.
+    const auto [tag, value] = value::makeNewString("TTV"_sd);
+    value::ValueGuard vg(tag, value);
+
+    // Query: [{$match: {a: {$eq: 'TTV'}}}].
+    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(1.55, expectedCard, 0.1);  // Actual: 2.
+
+    // Query: [{$match: {a: {$lt: 'TTV'}}}].
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(39.8, expectedCard, 0.1);  // Actual: 39.
+
+    // Query: [{$match: {a: {$lte: 'TTV'}}}].
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_APPROX_EQUAL(41.3, expectedCard, 0.1);  // Actual: 41.
+}
+
+TEST(EstimatorTest, NormalStrEstimate) {
+    // This hard-codes a maxdiff histogram with 10 buckets built off a normal string distribution
+    // with a minimum length of 3, a maximum length of 5, and 80 distinct values.
+    std::vector<BucketData> data{{
+        {"0ejz", 1, 0, 0},
+        {"4FGjc", 3, 5, 3},
+        {"9bU3", 2, 3, 2},
+        {"Cy5Kw", 3, 3, 3},
+        {"Lm4U", 2, 11, 5},
+        {"TTV", 5, 14, 8},
+        {"YtzS", 2, 3, 2},
+        {"o9cD4", 6, 26, 16},
+        {"qfmnP", 1, 4, 2},
+        {"xqbi", 2, 4, 4},
+    }};
+    const ScalarHistogram hist = createHistogram(data);
+
+    // Predicates over bucket bound.
+    auto [tag, value] = value::makeNewString("TTV"_sd);
+    value::ValueGuard vg(tag, value);
+
+    // Query: [{$match: {a: {$eq: 'TTV'}}}].
+    double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(5.0, expectedCard, 0.1);  // Actual: 5.
+
+    // Query: [{$match: {a: {$lt: 'TTV'}}}].
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(47.0, expectedCard, 0.1);  // Actual: 47.
+
+    // Query: [{$match: {a: {$lte: 'TTV'}}}].
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_APPROX_EQUAL(52.0, expectedCard, 0.1);  // Actual: 52.
+
+    // Predicates over value inside of a bucket.
+    std::tie(tag, value) = value::makeNewString("Pfa"_sd);
+
+    // Query: [{$match: {a: {$eq: 'Pfa'}}}].
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(1.75, expectedCard, 0.1);  // Actual: 2.
+
+    // Query: [{$match: {a: {$lt: 'Pfa'}}}].
+    expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+    ASSERT_APPROX_EQUAL(38.3, expectedCard, 0.1);  // Actual: 35.
+
+    // Query: [{$match: {a: {$lte: 'Pfa'}}}].
+    expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+    ASSERT_APPROX_EQUAL(40.0, expectedCard, 0.1);  // Actual: 37.
+}
+
+TEST(EstimatorTest, UniformIntStrEstimate) {
+    // This hard-codes a maxdiff histogram with 20 buckets built off of a uniform distribution with
+    // two types occurring with equal probability:
+    // - 100 distinct ints between 0 and 1000, and
+    // - 100 distinct strings of length between 2 and 5.
+    std::vector<BucketData> data{{
+        {2, 3, 0, 0},       {19, 4, 1, 1},      {226, 2, 49, 20},  {301, 5, 12, 4},
+        {317, 3, 0, 0},     {344, 2, 3, 1},     {423, 5, 18, 6},   {445, 3, 0, 0},
+        {495, 3, 4, 2},     {542, 5, 9, 3},     {696, 3, 44, 19},  {773, 4, 11, 5},
+        {805, 2, 8, 4},     {931, 5, 21, 8},    {998, 4, 21, 3},   {"8N4", 5, 31, 14},
+        {"MIb", 5, 45, 17}, {"Zgi", 3, 55, 22}, {"pZ", 6, 62, 25}, {"yUwxz", 5, 29, 12},
+    }};
+    const ScalarHistogram hist = createHistogram(data);
+    const ArrayHistogram arrHist(
+        hist, TypeCounts{{value::TypeTags::NumberInt64, 254}, {value::TypeTags::StringSmall, 246}});
+
+    // Predicates over value inside of the last numeric bucket.
+
+    // Query: [{$match: {a: {$eq: 993}}}].
+    double expectedCard = estimateIntValCard(hist, 993, EstimationType::kEqual);
+    ASSERT_APPROX_EQUAL(7.0, expectedCard, 0.1);  // Actual: 9.
+
+    // Query: [{$match: {a: {$lt: 993}}}].
+    expectedCard = estimateIntValCard(hist, 993, EstimationType::kLess);
+    ASSERT_APPROX_EQUAL(241.4, expectedCard, 0.1);  // Actual: 241.
+
+    // Query: [{$match: {a: {$lte: 993}}}].
+    expectedCard = estimateIntValCard(hist, 993, EstimationType::kLessOrEqual);
+    ASSERT_APPROX_EQUAL(248.4, expectedCard, 0.1);  // Actual: 250.
+
+    // Predicates over value inside of the first string bucket.
+    auto [tag, value] = value::makeNewString("04e"_sd);
+    value::ValueGuard vg(tag, value);
+
+    // Query: [{$match: {a: {$eq: '04e'}}}].
+    expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+    ASSERT_APPROX_EQUAL(2.2, expectedCard, 0.1);  // Actual: 3.
+
+    value::TypeTags lowTag = value::TypeTags::NumberInt64;
+    value::Value lowVal = 100000000;
+
+    // Type bracketing: low value of different type than the bucket bound.
+    // Query: [{$match: {a: {$eq: 100000000}}}].
+    expectedCard = estimateCardEq(arrHist, lowTag, lowVal, true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(0.0, expectedCard, 0.1);  // Actual: 0.
+
+    // No interpolation for inequality to values inside the first string bucket, fallback to half of
+    // the bucket frequency.
+
+    // Query: [{$match: {a: {$lt: '04e'}}}].
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     lowTag,
+                                     lowVal,
+                                     false /* highInclusive */,
+                                     tag,
+                                     value,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(13.3, expectedCard, 0.1);  // Actual: 0.
+
+    // Query: [{$match: {a: {$lte: '04e'}}}].
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     lowTag,
+                                     lowVal,
+                                     true /* highInclusive */,
+                                     tag,
+                                     value,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(15.5, expectedCard, 0.1);  // Actual: 3.
+
+    // Value towards the end of the bucket gets the same half bucket estimate.
+    std::tie(tag, value) = value::makeNewString("8B5"_sd);
+
+    // Query: [{$match: {a: {$lt: '8B5'}}}].
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     lowTag,
+                                     lowVal,
+                                     false /* highInclusive */,
+                                     tag,
+                                     value,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(13.3, expectedCard, 0.1);  // Actual: 24.
+
+    // Query: [{$match: {a: {$lte: '8B5'}}}].
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     lowTag,
+                                     lowVal,
+                                     true /* highInclusive */,
+                                     tag,
+                                     value,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(15.5, expectedCard, 0.1);  // Actual: 29.
+}
+
+TEST(EstimatorTest, UniformIntArrayOnlyEstimate) {
+    // This hard-codes a maxdiff histogram with 10 buckets built off of an array distribution with
+    // arrays between 3 and 5 elements long, each containing 100 distinct ints uniformly distributed
+    // between 0 and 1000. There are no scalar elements.
+    std::vector<BucketData> scalarData{{}};
+    const ScalarHistogram scalarHist = createHistogram(scalarData);
+
+    std::vector<BucketData> minData{{
+        {5, 3, 0, 0},   {19, 5, 2, 1},  {57, 4, 4, 3},  {116, 7, 13, 7}, {198, 3, 15, 6},
+        {228, 2, 3, 2}, {254, 4, 0, 0}, {280, 2, 2, 1}, {335, 3, 5, 3},  {344, 2, 0, 0},
+        {388, 3, 0, 0}, {420, 2, 0, 0}, {454, 1, 6, 3}, {488, 2, 1, 1},  {530, 1, 0, 0},
+        {561, 1, 0, 0}, {609, 1, 0, 0}, {685, 1, 0, 0}, {713, 1, 0, 0},  {758, 1, 0, 0},
+    }};
+    const ScalarHistogram minHist = createHistogram(minData);
+
+    std::vector<BucketData> maxData{{
+        {301, 1, 0, 0},  {408, 2, 0, 0}, {445, 1, 0, 0}, {605, 2, 0, 0},  {620, 1, 0, 0},
+        {665, 1, 1, 1},  {687, 3, 0, 0}, {704, 2, 6, 2}, {718, 2, 2, 1},  {741, 2, 1, 1},
+        {752, 2, 0, 0},  {823, 7, 3, 3}, {827, 1, 0, 0}, {852, 3, 0, 0},  {864, 5, 0, 0},
+        {909, 7, 12, 5}, {931, 2, 3, 1}, {939, 3, 0, 0}, {970, 2, 12, 4}, {998, 1, 10, 4},
+    }};
+    const ScalarHistogram maxHist = createHistogram(maxData);
+
+    std::vector<BucketData> uniqueData{{
+        {5, 3, 0, 0},    {19, 6, 2, 1},    {57, 4, 4, 3},    {116, 7, 15, 8},  {228, 2, 38, 13},
+        {254, 7, 0, 0},  {269, 10, 0, 0},  {280, 7, 3, 1},   {306, 4, 1, 1},   {317, 4, 0, 0},
+        {344, 2, 19, 5}, {423, 2, 27, 8},  {507, 2, 22, 13}, {704, 8, 72, 34}, {718, 6, 3, 1},
+        {758, 3, 13, 4}, {864, 7, 35, 14}, {883, 4, 0, 0},   {939, 5, 32, 10}, {998, 1, 24, 9},
+    }};
+    const ScalarHistogram uniqueHist = createHistogram(uniqueData);
+
+    const ArrayHistogram arrHist(scalarHist,
+                                 TypeCounts{{value::TypeTags::Array, 100}},
+                                 uniqueHist,
+                                 minHist,
+                                 maxHist,
+                                 TypeCounts{},
+                                 0);
+
+    // Query in the middle of the domain: estimate from ArrayUnique histogram.
+    value::TypeTags lowTag = value::TypeTags::NumberInt64;
+    value::Value lowVal = 500;
+    value::TypeTags highTag = value::TypeTags::NumberInt64;
+    value::Value highVal = 600;
+
+    // Test interpolation for query: [{$match: {a: {$elemMatch: {$gt: 500, $lt: 600}}}}].
+    double expectedCard = estimateCardRange(arrHist,
+                                            false /* lowInclusive */,
+                                            lowTag,
+                                            lowVal,
+                                            false /* highInclusive */,
+                                            highTag,
+                                            highVal,
+                                            false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(27.0, expectedCard, 0.1);  // actual 21.
+
+    // Test interpolation for query: [{$match: {a: {$gt: 500, $lt: 600}}}].
+    // Note: although there are no scalars, the estimate is different than the
+    // above since we use different formulas.
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     lowTag,
+                                     lowVal,
+                                     false /* highInclusive */,
+                                     highTag,
+                                     highVal,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(92.0, expectedCard, 0.1);  // actual 92.
+
+    // Query at the end of the domain: more precise estimates from ArrayMin, ArrayMax histograms.
+    lowVal = 10;
+    highVal = 110;
+
+    // Test interpolation for query: [{$match: {a: {$elemMatch: {$gt: 10, $lt: 110}}}}].
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     lowTag,
+                                     lowVal,
+                                     false /* highInclusive */,
+                                     highTag,
+                                     highVal,
+                                     false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(24.1, expectedCard, 0.1);  // actual 29.
+
+    // Test interpolation for query: [{$match: {a: {$gt: 10, $lt: 110}}}].
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     lowTag,
+                                     lowVal,
+                                     false /* highInclusive */,
+                                     highTag,
+                                     highVal,
+                                     true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(27.8, expectedCard, 0.1);  // actual 31.
+}
+
+TEST(EstimatorTest, UniformIntMixedArrayEstimate) {
+    // This hard-codes a maxdiff histogram with 20 buckets built off of a mixed distribution split
+    // with equal probability between:
+    // - an array distribution between 3 and 5 elements long, each containing 80 distinct ints
+    // uniformly distributed between 0 and 1000, and
+    // - a uniform int distribution with 80 distinct ints between 0 and 1000.
+    std::vector<BucketData> scalarData{{
+        {25, 1, 0, 0},  {41, 2, 0, 0},  {142, 2, 3, 3},  {209, 3, 3, 1}, {243, 1, 2, 1},
+        {296, 3, 4, 3}, {321, 5, 4, 2}, {480, 3, 9, 8},  {513, 3, 3, 2}, {554, 1, 0, 0},
+        {637, 3, 3, 2}, {666, 2, 1, 1}, {697, 2, 2, 1},  {750, 3, 3, 2}, {768, 4, 0, 0},
+        {791, 4, 3, 3}, {851, 2, 2, 2}, {927, 2, 10, 6}, {958, 3, 2, 1}, {980, 3, 0, 0},
+    }};
+    const ScalarHistogram scalarHist = createHistogram(scalarData);
+
+    std::vector<BucketData> minData{{
+        {3, 3, 0, 0},   {5, 8, 0, 0},   {9, 3, 0, 0},   {19, 2, 0, 0},  {49, 7, 4, 2},
+        {69, 6, 0, 0},  {115, 3, 5, 3}, {125, 2, 0, 0}, {146, 1, 2, 1}, {198, 2, 4, 3},
+        {214, 2, 0, 0}, {228, 3, 0, 0}, {260, 3, 4, 1}, {280, 1, 2, 2}, {330, 2, 2, 1},
+        {344, 6, 0, 0}, {388, 2, 0, 0}, {420, 2, 0, 0}, {461, 2, 8, 4}, {696, 1, 2, 1},
+    }};
+    const ScalarHistogram minHist = createHistogram(minData);
+
+    std::vector<BucketData> maxData{{
+        {301, 1, 0, 0}, {445, 1, 0, 0},  {491, 1, 0, 0}, {533, 3, 0, 0},  {605, 3, 0, 0},
+        {620, 2, 0, 0}, {647, 3, 0, 0},  {665, 4, 0, 0}, {713, 3, 10, 4}, {741, 3, 0, 0},
+        {814, 3, 2, 2}, {839, 2, 1, 1},  {864, 1, 2, 2}, {883, 3, 0, 0},  {893, 7, 0, 0},
+        {898, 5, 0, 0}, {909, 1, 12, 3}, {931, 2, 2, 1}, {953, 6, 3, 2},  {993, 1, 7, 5},
+    }};
+    const ScalarHistogram maxHist = createHistogram(maxData);
+
+    std::vector<BucketData> uniqueData{{
+        {3, 3, 0, 0},     {19, 5, 11, 2},   {49, 7, 5, 3},    {69, 8, 0, 0},    {75, 3, 0, 0},
+        {125, 2, 10, 5},  {228, 3, 27, 14}, {260, 4, 5, 1},   {344, 6, 36, 13}, {423, 4, 20, 8},
+        {605, 4, 61, 28}, {665, 8, 12, 6},  {758, 4, 41, 16}, {768, 5, 0, 0},   {776, 3, 0, 0},
+        {864, 3, 15, 10}, {883, 8, 0, 0},   {911, 2, 28, 6},  {953, 6, 8, 4},   {993, 1, 7, 5},
+    }};
+    const ScalarHistogram uniqueHist = createHistogram(uniqueData);
+
+    TypeCounts typeCounts{{value::TypeTags::NumberInt64, 106}, {value::TypeTags::Array, 94}};
+    const ArrayHistogram arrHist(scalarHist,
+                                 typeCounts,
+                                 uniqueHist,
+                                 minHist,
+                                 maxHist,
+                                 TypeCounts{{value::TypeTags::NumberInt64, 375}},
+                                 0);
+
+    value::TypeTags lowTag = value::TypeTags::NumberInt64;
+    value::Value lowVal = 500;
+    value::TypeTags highTag = value::TypeTags::NumberInt64;
+    value::Value highVal = 550;
+
+    // Test interpolation for query: [{$match: {a: {$gt: 500, $lt: 550}}}].
+    double expectedCard = estimateCardRange(arrHist,
+                                            false /* lowInclusive */,
+                                            lowTag,
+                                            lowVal,
+                                            false /* highInclusive */,
+                                            highTag,
+                                            highVal,
+                                            true /* includeScalar */);
+    ASSERT_APPROX_EQUAL(92.9, expectedCard, 0.1);  // Actual: 94.
+
+    // Test interpolation for query: [{$match: {a: {$elemMatch: {$gt: 500, $lt: 550}}}}].
+    expectedCard = estimateCardRange(arrHist,
+                                     false /* lowInclusive */,
+                                     lowTag,
+                                     lowVal,
+                                     false /* highInclusive */,
+                                     highTag,
+                                     highVal,
+                                     false /* includeScalar */);
+    ASSERT_APPROX_EQUAL(11.0, expectedCard, 0.1);  // Actual: 8.
+}
+
+}  // namespace
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
new file mode 100644
index 00000000000..25d1658807d
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
@@ -0,0 +1,496 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+
+#include "mongo/db/exec/sbe/abt/abt_lower.h"
+#include "mongo/db/pipeline/abt/utils.h"
+
+#include "mongo/db/query/optimizer/rewrites/const_eval.h"
+#include "mongo/db/query/optimizer/syntax/expr.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
+#include "mongo/db/query/optimizer/utils/interval_utils.h"
+#include "mongo/db/query/stats/value_utils.h"
+
+namespace mongo::optimizer::ce {
+namespace value = sbe::value;
+
+using stats::ArrayHistogram;
+using stats::Bucket;
+using stats::compareValues;
+using stats::sameTypeBracket;
+using stats::ScalarHistogram;
+using stats::valueToDouble;
+
+std::pair<value::TypeTags, value::Value> getConstTypeVal(const ABT& abt) {
+    const auto* constant = abt.cast<Constant>();
+    tassert(7051102, "Interval ABTs passed in for estimation must have Constant bounds.", constant);
+    return constant->get();
+};
+
+boost::optional<std::pair<value::TypeTags, value::Value>> getBound(
+    const BoundRequirement& boundReq) {
+    const ABT& bound = boundReq.getBound();
+    if (bound.is<Constant>()) {
+        return getConstTypeVal(bound);
+    }
+    return boost::none;
+};
+
+IntervalRequirement getMinMaxIntervalForType(value::TypeTags type) {
+    // Note: This function works based on the assumption that there are no intervals that include
+    // values from more than one type. That is why the MinMax interval of a type will include all
+    // possible intervals over that type.
+
+    auto&& [min, minInclusive] = getMinMaxBoundForType(true /*isMin*/, type);
+    tassert(7051103, str::stream() << "Type " << type << " has no minimum", min);
+
+    auto&& [max, maxInclusive] = getMinMaxBoundForType(false /*isMin*/, type);
+    tassert(7051104, str::stream() << "Type " << type << " has no maximum", max);
+
+    return IntervalRequirement{BoundRequirement(minInclusive, *min),
+                               BoundRequirement(maxInclusive, *max)};
+}
+
+bool isIntervalSubsetOfType(const IntervalRequirement& interval, value::TypeTags type) {
+    // Create a conjunction of the interval and the min-max interval for the type as input for the
+    // intersection function.
+    auto intervals =
+        IntervalReqExpr::make<IntervalReqExpr::Disjunction>(IntervalReqExpr::NodeVector{
+            IntervalReqExpr::make<IntervalReqExpr::Conjunction>(IntervalReqExpr::NodeVector{
+                IntervalReqExpr::make<IntervalReqExpr::Atom>(interval),
+                IntervalReqExpr::make<IntervalReqExpr::Atom>(getMinMaxIntervalForType(type))})});
+
+    return intersectDNFIntervals(intervals, ConstEval::constFold).has_value();
+}
+
+EstimationResult getTotals(const ScalarHistogram& h) {
+    if (h.empty()) {
+        return {0.0, 0.0};
+    }
+
+    const Bucket& last = h.getBuckets().back();
+    return {last._cumulativeFreq, last._cumulativeNDV};
+}
+
+/**
+ * Helper function that uses linear interpolation to estimate the cardinality and NDV for a value
+ * that falls inside of a histogram bucket.
+ */
+EstimationResult interpolateEstimateInBucket(const ScalarHistogram& h,
+                                             value::TypeTags tag,
+                                             value::Value val,
+                                             EstimationType type,
+                                             size_t bucketIndex) {
+
+    const Bucket& bucket = h.getBuckets().at(bucketIndex);
+    const auto [boundTag, boundVal] = h.getBounds().getAt(bucketIndex);
+
+    double resultCard = bucket._cumulativeFreq - bucket._equalFreq - bucket._rangeFreq;
+    double resultNDV = bucket._cumulativeNDV - bucket._ndv - 1.0;
+
+    // Check if the estimate is at the point of type brackets switch. If the current bucket is the
+    // first bucket of a new type bracket and the value is of another type, estimate cardinality
+    // from the current bucket as 0.
+    //
+    // For example, let bound 1 = 1000, bound 2 = "abc". The value 100000000 falls in bucket 2, the
+    // first bucket for strings, but should not get cardinality/ ndv fraction from it.
+    if (!sameTypeBracket(tag, boundTag)) {
+        if (type == EstimationType::kEqual) {
+            return {0.0, 0.0};
+        } else {
+            return {resultCard, resultNDV};
+        }
+    }
+
+    // Estimate for equality frequency inside of the bucket.
+    const double innerEqFreq = (bucket._ndv == 0.0) ? 0.0 : bucket._rangeFreq / bucket._ndv;
+
+    if (type == EstimationType::kEqual) {
+        return {innerEqFreq, 1.0};
+    }
+
+    // If the value is minimal for its type, and the operation is $lt or $lte return cardinality up
+    // to the previous bucket.
+    auto&& [minConstant, inclusive] = getMinMaxBoundForType(true /*isMin*/, tag);
+    auto [minTag, minVal] = getConstTypeVal(*minConstant);
+    if (compareValues(minTag, minVal, tag, val) == 0) {
+        return {resultCard, resultNDV};
+    }
+
+    // For $lt and $lte operations use linear interpolation to take a fraction of the bucket
+    // cardinality and NDV if there is a preceeding bucket with bound of the same type. Use half of
+    // the bucket estimates otherwise.
+    double ratio = 0.5;
+    if (bucketIndex > 0) {
+        const auto [lowBoundTag, lowBoundVal] = h.getBounds().getAt(bucketIndex - 1);
+        if (sameTypeBracket(lowBoundTag, boundTag)) {
+            double doubleLowBound = valueToDouble(lowBoundTag, lowBoundVal);
+            double doubleUpperBound = valueToDouble(boundTag, boundVal);
+            double doubleVal = valueToDouble(tag, val);
+            ratio = (doubleVal - doubleLowBound) / (doubleUpperBound - doubleLowBound);
+        }
+    }
+
+    const double bucketFreqRatio = bucket._rangeFreq * ratio;
+    resultCard += bucketFreqRatio;
+    resultNDV += bucket._ndv * ratio;
+
+    if (type == EstimationType::kLess) {
+        // Subtract from the estimate the cardinality and ndv corresponding to the equality
+        // operation, if they are larger than the ratio taken from this bucket.
+        const double innerEqFreqCorrection = (bucketFreqRatio < innerEqFreq) ? 0.0 : innerEqFreq;
+        const double innerEqNdv = (bucket._ndv * ratio <= 1.0) ? 0.0 : 1.0;
+        resultCard -= innerEqFreqCorrection;
+        resultNDV -= innerEqNdv;
+    }
+    return {resultCard, resultNDV};
+}
+
+EstimationResult estimate(const ScalarHistogram& h,
+                          value::TypeTags tag,
+                          value::Value val,
+                          EstimationType type) {
+    switch (type) {
+        case EstimationType::kGreater:
+            return getTotals(h) - estimate(h, tag, val, EstimationType::kLessOrEqual);
+
+        case EstimationType::kGreaterOrEqual:
+            return getTotals(h) - estimate(h, tag, val, EstimationType::kLess);
+
+        default:
+            // Continue.
+            break;
+    }
+
+    size_t bucketIndex = 0;
+    {
+        size_t len = h.getBuckets().size();
+        while (len > 0) {
+            const size_t half = len >> 1;
+            const auto [boundTag, boundVal] = h.getBounds().getAt(bucketIndex + half);
+
+            if (compareValues(boundTag, boundVal, tag, val) < 0) {
+                bucketIndex += half + 1;
+                len -= half + 1;
+            } else {
+                len = half;
+            }
+        }
+    }
+    if (bucketIndex == h.getBuckets().size()) {
+        // Value beyond the largest endpoint.
+        switch (type) {
+            case EstimationType::kEqual:
+                return {0.0, 0.0};
+
+            case EstimationType::kLess:
+            case EstimationType::kLessOrEqual:
+                return getTotals(h);
+
+            default:
+                MONGO_UNREACHABLE;
+        }
+    }
+
+    const Bucket& bucket = h.getBuckets().at(bucketIndex);
+    const auto [boundTag, boundVal] = h.getBounds().getAt(bucketIndex);
+    const bool isEndpoint = compareValues(boundTag, boundVal, tag, val) == 0;
+
+    if (isEndpoint) {
+        switch (type) {
+            case EstimationType::kEqual: {
+                return {bucket._equalFreq, 1.0};
+            }
+
+            case EstimationType::kLess: {
+                double resultCard = bucket._cumulativeFreq - bucket._equalFreq;
+                double resultNDV = bucket._cumulativeNDV - 1.0;
+                return {resultCard, resultNDV};
+            }
+
+            case EstimationType::kLessOrEqual: {
+                double resultCard = bucket._cumulativeFreq;
+                double resultNDV = bucket._cumulativeNDV;
+                return {resultCard, resultNDV};
+            }
+
+            default:
+                MONGO_UNREACHABLE;
+        }
+    } else {
+        return interpolateEstimateInBucket(h, tag, val, type, bucketIndex);
+    }
+}
+
+/**
+ * Returns how many values of the given type are known by the array histogram.
+ */
+double getTypeCard(const ArrayHistogram& ah, value::TypeTags tag, bool includeScalar) {
+    double count = 0.0;
+
+    // TODO SERVER-70936: booleans are estimated by different type counters (unless in arrays).
+    if (tag == sbe::value::TypeTags::Boolean) {
+        uasserted(7051101, "Cannot estimate boolean types yet with histogram CE.");
+    }
+
+    // Note that if we are asked by the optimizer to estimate an interval whose bounds are  arrays,
+    // this means we are trying to estimate equality on nested arrays. In this case, we do not want
+    // to include the "scalar" type counter for the array type, because this will cause us to
+    // estimate the nested array case as counting all arrays, regardless of whether or not they are
+    // nested.
+    if (includeScalar && tag != value::TypeTags::Array) {
+        auto typeIt = ah.getTypeCounts().find(tag);
+        if (typeIt != ah.getTypeCounts().end()) {
+            count += typeIt->second;
+        }
+    }
+    if (ah.isArray()) {
+        auto typeIt = ah.getArrayTypeCounts().find(tag);
+        if (typeIt != ah.getArrayTypeCounts().end()) {
+            count += typeIt->second;
+        }
+    }
+    return count;
+}
+
+/**
+ * Estimates equality to the given tag/value using histograms.
+ */
+double estimateCardEq(const ArrayHistogram& ah,
+                      value::TypeTags tag,
+                      value::Value val,
+                      bool includeScalar) {
+    double card = 0.0;
+    if (includeScalar) {
+        card = estimate(ah.getScalar(), tag, val, EstimationType::kEqual).card;
+    }
+    if (ah.isArray()) {
+        card += estimate(ah.getArrayUnique(), tag, val, EstimationType::kEqual).card;
+    }
+    return card;
+}
+
+static EstimationResult estimateRange(const ScalarHistogram& histogram,
+                                      bool lowInclusive,
+                                      value::TypeTags tagLow,
+                                      value::Value valLow,
+                                      bool highInclusive,
+                                      value::TypeTags tagHigh,
+                                      value::Value valHigh) {
+    const EstimationType highType =
+        highInclusive ? EstimationType::kLessOrEqual : EstimationType::kLess;
+    const EstimationResult highEstimate = estimate(histogram, tagHigh, valHigh, highType);
+
+    const EstimationType lowType =
+        lowInclusive ? EstimationType::kLess : EstimationType::kLessOrEqual;
+    const EstimationResult lowEstimate = estimate(histogram, tagLow, valLow, lowType);
+
+    return highEstimate - lowEstimate;
+}
+
+/**
+ * Compute an estimate for range query on array data with formula:
+ * Card(ArrayMin(a < valHigh)) - Card(ArrayMax(a < valLow))
+ */
+static EstimationResult estimateRangeQueryOnArray(const ScalarHistogram& histogramAmin,
+                                                  const ScalarHistogram& histogramAmax,
+                                                  bool lowInclusive,
+                                                  value::TypeTags tagLow,
+                                                  value::Value valLow,
+                                                  bool highInclusive,
+                                                  value::TypeTags tagHigh,
+                                                  value::Value valHigh) {
+    const EstimationType highType =
+        highInclusive ? EstimationType::kLessOrEqual : EstimationType::kLess;
+    const EstimationResult highEstimate = estimate(histogramAmin, tagHigh, valHigh, highType);
+
+    const EstimationType lowType =
+        lowInclusive ? EstimationType::kLess : EstimationType::kLessOrEqual;
+    const EstimationResult lowEstimate = estimate(histogramAmax, tagLow, valLow, lowType);
+
+    return highEstimate - lowEstimate;
+}
+
+double estimateCardRange(const ArrayHistogram& ah,
+                         /* Define lower bound. */
+                         bool lowInclusive,
+                         value::TypeTags tagLow,
+                         value::Value valLow,
+                         /* Define upper bound. */
+                         bool highInclusive,
+                         value::TypeTags tagHigh,
+                         value::Value valHigh,
+                         bool includeScalar,
+                         EstimationAlgo estimationAlgo) {
+    uassert(6695701,
+            "Low bound must not be higher than high",
+            compareValues(tagLow, valLow, tagHigh, valHigh) <= 0);
+
+    // Helper lambda to shorten code for legibility.
+    auto estRange = [&](const ScalarHistogram& h) {
+        return estimateRange(h, lowInclusive, tagLow, valLow, highInclusive, tagHigh, valHigh);
+    };
+
+    double result = 0.0;
+    if (ah.isArray()) {
+
+        if (includeScalar) {
+            // Range query on array data.
+            const EstimationResult rangeCardOnArray = estimateRangeQueryOnArray(ah.getArrayMin(),
+                                                                                ah.getArrayMax(),
+                                                                                lowInclusive,
+                                                                                tagLow,
+                                                                                valLow,
+                                                                                highInclusive,
+                                                                                tagHigh,
+                                                                                valHigh);
+            result += rangeCardOnArray.card;
+        } else {
+            // $elemMatch query on array data.
+            const auto arrayMinEst = estRange(ah.getArrayMin());
+            const auto arrayMaxEst = estRange(ah.getArrayMax());
+            const auto arrayUniqueEst = estRange(ah.getArrayUnique());
+
+            // ToDo: try using ah.getArrayCount() - ah.getEmptyArrayCount();
+            // when the number of empty arrays is provided by the statistics.
+            const double totalArrayCount = ah.getArrayCount();
+
+            uassert(
+                6715101, "Array histograms should contain at least one array", totalArrayCount > 0);
+            switch (estimationAlgo) {
+                case EstimationAlgo::HistogramV1: {
+                    const double arrayUniqueDensity = (arrayUniqueEst.ndv == 0.0)
+                        ? 0.0
+                        : (arrayUniqueEst.card / std::sqrt(arrayUniqueEst.ndv));
+                    result =
+                        std::max(std::max(arrayMinEst.card, arrayMaxEst.card), arrayUniqueDensity);
+                    break;
+                }
+                case EstimationAlgo::HistogramV2: {
+                    const double avgArraySize =
+                        getTotals(ah.getArrayUnique()).card / totalArrayCount;
+                    const double adjustedUniqueCard = (avgArraySize == 0.0)
+                        ? 0.0
+                        : std::min(arrayUniqueEst.card / pow(avgArraySize, 0.2), totalArrayCount);
+                    result =
+                        std::max(std::max(arrayMinEst.card, arrayMaxEst.card), adjustedUniqueCard);
+                    break;
+                }
+                case EstimationAlgo::HistogramV3: {
+                    const double adjustedUniqueCard =
+                        0.85 * std::min(arrayUniqueEst.card, totalArrayCount);
+                    result =
+                        std::max(std::max(arrayMinEst.card, arrayMaxEst.card), adjustedUniqueCard);
+                    break;
+                }
+                default:
+                    MONGO_UNREACHABLE;
+            }
+        }
+    }
+
+    if (includeScalar) {
+        const auto scalarEst = estRange(ah.getScalar());
+        result += scalarEst.card;
+    }
+
+    return result;
+}
+
+double estimateIntervalCardinality(const ArrayHistogram& ah,
+                                   const IntervalRequirement& interval,
+                                   CEType childResult,
+                                   bool includeScalar) {
+    if (interval.isFullyOpen()) {
+        return childResult;
+    } else if (interval.isEquality()) {
+        auto maybeConstBound = getBound(interval.getLowBound());
+        if (!maybeConstBound) {
+            return kInvalidEstimate;
+        }
+
+        auto [tag, val] = *maybeConstBound;
+        if (stats::canEstimateTypeViaHistogram(tag)) {
+            return estimateCardEq(ah, tag, val, includeScalar);
+        }
+
+        // Otherwise, we return the cardinality for the type of the intervals.
+        return getTypeCard(ah, tag, includeScalar);
+    }
+
+    // Otherwise, we have a range.
+    auto lowBound = interval.getLowBound();
+    auto maybeConstLowBound = getBound(lowBound);
+    if (!maybeConstLowBound) {
+        return kInvalidEstimate;
+    }
+
+    auto highBound = interval.getHighBound();
+    auto maybeConstHighBound = getBound(highBound);
+    if (!maybeConstHighBound) {
+        return kInvalidEstimate;
+    }
+
+    auto [lowTag, lowVal] = *maybeConstLowBound;
+    auto [highTag, highVal] = *maybeConstHighBound;
+
+    // Check if we estimated this interval using histograms. One of the tags may not be of a type we
+    // know how to estimate using histograms; however, it should still be possible to estimate the
+    // interval if the other one is of the appropriate type.
+    if (stats::canEstimateTypeViaHistogram(lowTag) || stats::canEstimateTypeViaHistogram(highTag)) {
+        return estimateCardRange(ah,
+                                 lowBound.isInclusive(),
+                                 lowTag,
+                                 lowVal,
+                                 highBound.isInclusive(),
+                                 highTag,
+                                 highVal,
+                                 includeScalar);
+    }
+
+    // Otherwise, this interval was not in our histogram. We may be able to estimate this interval
+    // via type counts- if so, we just return the total count for the type.
+
+    // If the bound tags are equal, we can estimate this in the same way that we do equalities on
+    // non-histogrammable types. Otherwise, we need to figure out which type(s) are included by this
+    // range.
+    if (lowTag == highTag || isIntervalSubsetOfType(interval, lowTag)) {
+        return getTypeCard(ah, lowTag, includeScalar);
+    } else if (isIntervalSubsetOfType(interval, highTag)) {
+        return getTypeCard(ah, highTag, includeScalar);
+    }
+
+    // If we reach here, we've given up estimating, because our interval intersected both high & low
+    // type intervals (and possibly more types).
+    // TODO: could we aggregate type counts across all intersected types here?
+    return 0.0;
+}
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.h b/src/mongo/db/query/ce/histogram_predicate_estimation.h
new file mode 100644
index 00000000000..763f6c13a5e
--- /dev/null
+++ b/src/mongo/db/query/ce/histogram_predicate_estimation.h
@@ -0,0 +1,106 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/optimizer/defs.h"
+#include "mongo/db/query/optimizer/index_bounds.h"
+#include "mongo/db/query/stats/array_histogram.h"
+
+namespace mongo::optimizer::ce {
+
+constexpr double kInvalidEstimate = -1.0;
+
+enum class EstimationType { kEqual, kLess, kLessOrEqual, kGreater, kGreaterOrEqual };
+enum class EstimationAlgo { HistogramV1, HistogramV2, HistogramV3 };
+
+const stdx::unordered_map<EstimationType, std::string> estimationTypeName = {
+    {EstimationType::kEqual, "eq"},
+    {EstimationType::kLess, "lt"},
+    {EstimationType::kLessOrEqual, "lte"},
+    {EstimationType::kGreater, "gt"},
+    {EstimationType::kGreaterOrEqual, "gte"}};
+
+struct EstimationResult {
+    double card;
+    double ndv;
+
+    EstimationResult operator-(const EstimationResult& other) const {
+        return {card - other.card, ndv - other.ndv};
+    }
+};
+
+/**
+ * Returns cumulative total statistics for a histogram.
+ */
+EstimationResult getTotals(const stats::ScalarHistogram& h);
+
+/**
+ * Compute an estimate for a given value and estimation type. Use linear interpolation for values
+ * that fall inside of histogram buckets.
+ */
+EstimationResult estimate(const stats::ScalarHistogram& h,
+                          sbe::value::TypeTags tag,
+                          sbe::value::Value val,
+                          EstimationType type);
+
+/**
+ * Given an array histogram, an interval, and the input cardinality, estimates the cardinality of
+ * the interval.
+ */
+double estimateIntervalCardinality(const stats::ArrayHistogram& estimator,
+                                   const IntervalRequirement& interval,
+                                   CEType inputCardinality,
+                                   bool includeScalar);
+
+/**
+ * Estimates the cardinality of an equality predicate given an ArrayHistogram and an SBE value and
+ * type tag pair.
+ */
+double estimateCardEq(const stats::ArrayHistogram& ah,
+                      sbe::value::TypeTags tag,
+                      sbe::value::Value val,
+                      bool includeScalar);
+
+/**
+ * Estimates the cardinality of a range predicate given an ArrayHistogram and a range predicate.
+ * Set 'includeScalar' to true to indicate whether or not the provided range should include no-array
+ * values. The other fields define the range of the estimation.
+ */
+double estimateCardRange(const stats::ArrayHistogram& ah,
+                         bool lowInclusive,
+                         sbe::value::TypeTags tagLow,
+                         sbe::value::Value valLow,
+                         bool highInclusive,
+                         sbe::value::TypeTags tagHigh,
+                         sbe::value::Value valHigh,
+                         bool includeScalar,
+                         EstimationAlgo estAlgo = EstimationAlgo::HistogramV2);
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/max_diff.cpp b/src/mongo/db/query/ce/max_diff.cpp
deleted file mode 100644
index 3c265620771..00000000000
--- a/src/mongo/db/query/ce/max_diff.cpp
+++ /dev/null
@@ -1,376 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include <algorithm>
-#include <cmath>
-#include <cstddef>
-#include <iostream>
-#include <limits>
-#include <map>
-#include <ostream>
-#include <queue>
-
-#include "mongo/base/string_data.h"
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/bson/bsontypes.h"
-#include "mongo/db/exec/sbe/values/bson.h"
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/max_diff.h"
-#include "mongo/db/query/ce/value_utils.h"
-#include "mongo/util/assert_util.h"
-
-
-namespace mongo::ce {
-
-std::string printDistribution(const DataDistribution& distr, size_t nElems) {
-    std::ostringstream os;
-    for (size_t i = 0; i < std::min(nElems, distr._freq.size()); ++i) {
-        os << "{val: " << distr._bounds[i].get() << ", " << distr._freq[i].toString() << "}\n";
-    }
-    return os.str();
-}
-
-static double valueSpread(value::TypeTags tag1,
-                          value::Value val1,
-                          value::TypeTags tag2,
-                          value::Value val2) {
-    double doubleVal1 = valueToDouble(tag1, val1);
-    double doubleVal2 = valueToDouble(tag2, val2);
-    uassert(6660502,
-            "Data distribution values must be monotonically increasing.",
-            doubleVal2 >= doubleVal1);
-    return doubleVal2 - doubleVal1;
-}
-
-DataDistribution getDataDistribution(const std::vector<SBEValue>& sortedInput) {
-    if (sortedInput.empty()) {
-        return {};
-    }
-
-    DataDistribution result;
-    value::TypeTags prevTag;
-    value::Value prevValue;
-    bool first = true;
-
-    // Aggregate the values in a sorted dataset into a frequency distribution.
-    size_t idx = 0;
-    for (size_t i = 0; i < sortedInput.size(); i++) {
-        const auto v = sortedInput[i].get();
-        const auto comparison = first ? 1 : compareValues(v.first, v.second, prevTag, prevValue);
-        first = false;
-
-        if (comparison != 0) {
-            uassert(6660550, "Input is not sorted", comparison > 0);
-            prevTag = v.first;
-            prevValue = v.second;
-
-            const auto [tagCopy, valCopy] = copyValue(v.first, v.second);
-            result._bounds.emplace_back(tagCopy, valCopy);
-            result._freq.emplace_back(idx, 1);
-            ++idx;
-        } else {
-            ++result._freq.back()._freq;
-        }
-    }
-
-    // Calculate the area for all values in the data distribution.
-    // The current minimum and maximum areas of the values of a type class.
-    double maxArea = 0.0;
-
-    for (size_t i = 0; i + 1 < result._freq.size(); ++i) {
-        const auto v1 = result._bounds[i];
-        const auto v2 = result._bounds[i + 1];
-        const bool newTypeClass = !sameTypeClass(v1.getTag(), v2.getTag());
-
-        if (newTypeClass) {
-            const auto res = result.typeClassBounds.emplace(i, maxArea);
-            uassert(6660551, "There can't be duplicate type class bounds.", res.second);
-            maxArea = 0.0;
-        } else if (i == 0) {
-            const double spread =
-                valueSpread(v1.getTag(), v1.getValue(), v2.getTag(), v2.getValue());
-            maxArea = result._freq[i]._freq * spread;
-        }
-
-        if (i == 0 || newTypeClass) {
-            // Make sure we insert bucket boundaries between different types, and also make sure
-            // first value is picked for a boundary.
-            result._freq[i]._area = std::numeric_limits<double>::infinity();
-        } else {
-            const double spread =
-                valueSpread(v1.getTag(), v1.getValue(), v2.getTag(), v2.getValue());
-            result._freq[i]._area = result._freq[i]._freq * spread;
-            maxArea = std::max(maxArea, result._freq[i]._area);
-        }
-    }
-
-    // Make sure last value is picked as a histogram bucket boundary.
-    result._freq.back()._area = std::numeric_limits<double>::infinity();
-    const auto res = result.typeClassBounds.emplace(result._freq.size(), maxArea);
-    uassert(6660503, "There can't be duplicate type class bounds.", res.second);
-
-    // Compute normalized areas. If the spread is 0, the area may also be 0. This could happen,
-    // for instance, if there is only a single value of a given type,
-    size_t beginIdx = 0;
-    for (const auto [endIdx, area] : result.typeClassBounds) {
-        for (size_t i = beginIdx; i < endIdx; ++i) {
-            result._freq[i]._normArea = area > 0.0 ? (result._freq[i]._area / area) : 0.0;
-        }
-        beginIdx = endIdx;
-    }
-
-    // std::cout << "Distribution sorted by value:\n"
-    //           << printDistribution(result, result._freq.size()) << "\n"
-    //           << std::flush;
-
-    return result;
-}
-
-// TODO: This doesn't seem right -- it looks like we're sorting on the frequency,
-//       not the difference between buckets
-static std::vector<ValFreq> generateTopKBuckets(const DataDistribution& dataDistrib,
-                                                size_t numBuckets) {
-    struct AreaComparator {
-        bool operator()(const ValFreq& a, const ValFreq& b) const {
-            return a._normArea > b._normArea;
-        }
-    };
-    std::priority_queue<ValFreq, std::vector<ValFreq>, AreaComparator> pq;
-
-    for (const auto& valFreq : dataDistrib._freq) {
-        if (pq.size() < numBuckets) {
-            pq.emplace(valFreq);
-        } else if (AreaComparator()(valFreq, pq.top())) {
-            pq.pop();
-            pq.emplace(valFreq);
-        }
-    }
-
-    std::vector<ValFreq> result;
-    while (!pq.empty()) {
-        result.push_back(pq.top());
-        pq.pop();
-    }
-
-    std::sort(result.begin(), result.end(), [](const ValFreq& a, const ValFreq& b) {
-        return a._idx < b._idx;
-    });
-
-    return result;
-}
-
-ScalarHistogram genMaxDiffHistogram(const DataDistribution& dataDistrib, size_t numBuckets) {
-    if (dataDistrib._freq.empty()) {
-        return {};
-    }
-
-    std::vector<ValFreq> topKBuckets = generateTopKBuckets(dataDistrib, numBuckets);
-    uassert(6660504,
-            "Must have bucket boundary on first value",
-            topKBuckets[0]._idx == dataDistrib._freq[0]._idx);
-    uassert(6660505,
-            "Must have bucket boundary on last value",
-            topKBuckets.back()._idx == dataDistrib._freq.back()._idx);
-
-    std::vector<Bucket> buckets;
-    value::Array bounds;
-
-    // Create histogram buckets out of the top-K bucket values.
-    size_t startBucketIdx = 0;
-    double cumulativeFreq = 0.0;
-    double cumulativeNDV = 0.0;
-    for (size_t i = 0; i < std::min(dataDistrib._freq.size(), numBuckets); i++) {
-        const size_t bucketBoundIdx = topKBuckets[i]._idx;
-        const double freq = dataDistrib._freq.at(bucketBoundIdx)._freq;
-
-        // Compute per-bucket statistics.
-        double rangeFreq = 0.0;
-        double ndv = 0.0;
-        while (startBucketIdx < bucketBoundIdx) {
-            rangeFreq += dataDistrib._freq[startBucketIdx++]._freq;
-            ++ndv;
-        }
-        cumulativeFreq += rangeFreq + freq;
-        cumulativeNDV += ndv + 1.0;
-
-        // Add a histogram bucket.
-        const auto v = dataDistrib._bounds[startBucketIdx];
-        const auto [copyTag, copyVal] = value::copyValue(v.getTag(), v.getValue());
-        bounds.push_back(copyTag, copyVal);
-        buckets.emplace_back(freq, rangeFreq, cumulativeFreq, ndv, cumulativeNDV);
-        startBucketIdx++;
-    }
-
-    return {std::move(bounds), std::move(buckets)};
-}
-
-/**
- * Helper for getting the input for constructing an array histogram for an array estimator using the
- * values in an array. For each value in `arrayElements`, update the min, max, and unique value
- * vectors. These will be used to generate the corresponding histograms for array values.
- */
-void updateMinMaxUniqArrayVals(std::vector<SBEValue>& arrayElements,
-                               std::vector<SBEValue>& arrayMinData,
-                               std::vector<SBEValue>& arrayMaxData,
-                               std::vector<SBEValue>& arrayUniqueData) {
-
-    if (arrayElements.size() == 0) {
-        return;
-    }
-
-    sortValueVector(arrayElements);
-
-    // Emit values for arrayMin and arrayMax histograms.
-    {
-        boost::optional<SBEValue> prev;
-        for (const auto& element : arrayElements) {
-            if (!prev) {
-                arrayMinData.push_back(element);
-            } else if (!sameTypeClass(prev->getTag(), element.getTag())) {
-                arrayMaxData.push_back(*prev);
-                arrayMinData.push_back(element);
-            }
-            prev = element;
-        }
-        if (prev) {
-            arrayMaxData.push_back(*prev);
-        }
-    }
-
-    // Emit values for arrayUnique histogram.
-    {
-        boost::optional<SBEValue> prev;
-        for (const auto& element : arrayElements) {
-            if (!prev ||
-                compareValues(
-                    prev->getTag(), prev->getValue(), element.getTag(), element.getValue()) < 0) {
-                arrayUniqueData.push_back(element);
-                prev = element;
-            }
-        }
-    }
-}
-
-ArrayHistogram createArrayEstimator(const std::vector<SBEValue>& arrayData, size_t nBuckets) {
-    // Values that will be used as inputs to histogram generation code.
-    std::vector<SBEValue> scalarData;
-    std::vector<SBEValue> arrayMinData;
-    std::vector<SBEValue> arrayMaxData;
-    std::vector<SBEValue> arrayUniqueData;
-
-    // Type counters.
-    TypeCounts typeCounts;
-    TypeCounts arrayTypeCounts;
-
-    // Value counters.
-    double emptyArrayCount = 0;
-    double trueCount = 0;
-    double falseCount = 0;
-
-    for (const auto& v : arrayData) {
-        const auto val = v.getValue();
-        const auto tag = v.getTag();
-
-        // Increment type counters.
-        auto tagCount = typeCounts.insert({tag, 1});
-        if (!tagCount.second) {
-            ++tagCount.first->second;
-        }
-
-        if (tag == value::TypeTags::Array) {
-            // If we have an array, we can construct min, max, and unique histograms from its
-            // elements, provided that they are histogrammable.
-            std::vector<SBEValue> arrayElements;
-
-            value::Array* arr = value::getArrayView(val);
-            size_t arrSize = arr->size();
-            if (arrSize == 0) {
-                ++emptyArrayCount;
-                continue;
-            }
-
-            for (size_t i = 0; i < arrSize; i++) {
-                const auto [tag, val] = arr->getAt(i);
-
-                // Increment array type tag counts.
-                auto arrTagCount = arrayTypeCounts.insert({tag, 1});
-                if (!arrTagCount.second) {
-                    ++arrTagCount.first->second;
-                }
-
-                if (!canEstimateTypeViaHistogram(tag)) {
-                    // If the elements of this array are not histogrammable, then we can only update
-                    // the array type counters
-                    continue;
-                }
-
-                const auto [tagCopy, valCopy] = value::copyValue(tag, val);
-                arrayElements.emplace_back(tagCopy, valCopy);
-            }
-            updateMinMaxUniqArrayVals(arrayElements, arrayMinData, arrayMaxData, arrayUniqueData);
-
-        } else if (tag == value::TypeTags::Boolean) {
-            // If we have a boolean, we also have counters for true and false values we should
-            // increment here.
-            if (value::bitcastTo<bool>(val)) {
-                trueCount++;
-            } else {
-                falseCount++;
-            }
-            continue;
-
-        } else if (!canEstimateTypeViaHistogram(tag)) {
-            // If we have a non-histogrammable type, we can only increment the type counters for it;
-            // we cannot build a scalar histogram on it.
-            continue;
-
-        } else {
-            // Assume non-arrays are scalars. Emit values for the scalar histogram.
-            scalarData.push_back(v);
-        }
-    }
-
-    // Lambda helper to construct histogram from an unsorted value vector.
-    const auto makeHistogram = [&nBuckets](std::vector<SBEValue>& values) {
-        sortValueVector(values);
-        return genMaxDiffHistogram(getDataDistribution(values), nBuckets);
-    };
-
-    return {makeHistogram(scalarData),
-            std::move(typeCounts),
-            makeHistogram(arrayUniqueData),
-            makeHistogram(arrayMinData),
-            makeHistogram(arrayMaxData),
-            std::move(arrayTypeCounts),
-            emptyArrayCount,
-            trueCount,
-            falseCount};
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/max_diff.h b/src/mongo/db/query/ce/max_diff.h
deleted file mode 100644
index ab69f7001eb..00000000000
--- a/src/mongo/db/query/ce/max_diff.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <utility>
-#include <vector>
-
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/scalar_histogram.h"
-#include "mongo/db/query/ce/value_utils.h"
-
-namespace mongo::ce {
-
-struct ValFreq {
-    ValFreq(size_t idx, size_t freq) : _idx(idx), _freq(freq), _area(-1.0), _normArea(-1) {}
-
-    std::string toString() const {
-        std::ostringstream os;
-        os << "idx: " << _idx << ", freq: " << _freq << ", area: " << _area
-           << ", normArea: " << _normArea;
-        return os.str();
-    }
-
-    size_t _idx;       // Original index according to value order.
-    size_t _freq;      // Frequency of the value.
-    double _area;      // Derived as: spread * frequency
-    double _normArea;  // Area normalized to the maximum in a type class.
-};
-
-struct DataDistribution {
-    std::vector<SBEValue> _bounds;
-    std::vector<ValFreq> _freq;
-    // The min/max areas of each type class. The key is the index of the last boundary of the class.
-    std::map<size_t, double> typeClassBounds;
-};
-
-/**
-    Given a set of values sorted in BSON order, generate a data distribution consisting of
-    counts for each value with the values in sorted order
-*/
-DataDistribution getDataDistribution(const std::vector<SBEValue>& sortedInput);
-
-/**
-    Given a data distribution, generate a scalar histogram with the supplied number of buckets
-*/
-ScalarHistogram genMaxDiffHistogram(const DataDistribution& dataDistrib, size_t numBuckets);
-
-/**
-    Given a vector containing SBEValues, generate a set of statistics to summarize the supplied
-    data. Histograms will use the supplied number of buckets.
-*/
-ArrayHistogram createArrayEstimator(const std::vector<SBEValue>& arrayData, size_t nBuckets);
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/maxdiff_histogram_test.cpp b/src/mongo/db/query/ce/maxdiff_histogram_test.cpp
index 2f1e2185f8d..80364fea0bb 100644
--- a/src/mongo/db/query/ce/maxdiff_histogram_test.cpp
+++ b/src/mongo/db/query/ce/maxdiff_histogram_test.cpp
@@ -27,30 +27,36 @@
  *    it in the license file.
  */
 
-#include <string>
-#include <vector>
-
 #include "mongo/db/concurrency/lock_state.h"
 #include "mongo/db/exec/sbe/abt/sbe_abt_test_util.h"
 #include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/ce_test_utils.h"
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/db/query/ce/max_diff.h"
-#include "mongo/db/query/ce/maxdiff_test_utils.h"
-#include "mongo/db/query/ce/rand_utils.h"
-#include "mongo/db/query/ce/rand_utils_new.h"
-#include "mongo/db/query/ce/scalar_histogram.h"
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/ce/test_utils.h"
 #include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/max_diff.h"
+#include "mongo/db/query/stats/maxdiff_test_utils.h"
+#include "mongo/db/query/stats/rand_utils.h"
+#include "mongo/db/query/stats/rand_utils_new.h"
+#include "mongo/db/query/stats/scalar_histogram.h"
 #include "mongo/logv2/log_component.h"
 #include "mongo/logv2/log_component_settings.h"
 #include "mongo/logv2/log_severity.h"
 #include "mongo/unittest/unittest.h"
 
-namespace mongo::ce::statistics {
+namespace mongo::optimizer::ce {
 namespace {
+namespace value = sbe::value;
+
+using stats::ArrayHistogram;
+using stats::Bucket;
+using stats::DataDistribution;
+using stats::genFixedValueArray;
+using stats::getDataDistribution;
+using stats::makeHistogram;
+using stats::makeInt64Value;
+using stats::ScalarHistogram;
 
-using namespace sbe;
 const double kTolerance = 0.001;
 
 class HistogramTest : public ServiceContextTest {};
@@ -266,4 +272,4 @@ TEST_F(HistogramTest, MaxDiffEmptyArrays) {
 }
 
 }  // namespace
-}  // namespace mongo::ce::statistics
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/maxdiff_test_utils.cpp b/src/mongo/db/query/ce/maxdiff_test_utils.cpp
deleted file mode 100644
index b27cbb6ec93..00000000000
--- a/src/mongo/db/query/ce/maxdiff_test_utils.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/maxdiff_test_utils.h"
-
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/max_diff.h"
-
-namespace mongo::ce {
-
-static std::vector<std::string> convertToJSON(const std::vector<SBEValue>& input) {
-    std::vector<std::string> result;
-
-    for (size_t i = 0; i < input.size(); i++) {
-        const auto [objTag, objVal] = value::makeNewObject();
-        value::ValueGuard vg(objTag, objVal);
-
-        const auto [tag, val] = input[i].get();
-        // Copy the value because objVal owns its value, and the ValueGuard releases not only
-        // objVal, but also its Value (in the case below - copyVal).
-        const auto [copyTag, copyVal] = value::copyValue(tag, val);
-        value::getObjectView(objVal)->push_back("a", copyTag, copyVal);
-
-        std::ostringstream os;
-        os << std::make_pair(objTag, objVal);
-        result.push_back(os.str());
-    }
-
-    return result;
-}
-
-size_t getActualCard(OperationContext* opCtx,
-                     const std::vector<SBEValue>& input,
-                     const std::string& query) {
-    return mongo::optimizer::runPipeline(opCtx, query, convertToJSON(input)).size();
-}
-
-std::string makeMatchExpr(const SBEValue& val, EstimationType cmpOp) {
-    std::stringstream matchExpr;
-    std::string cmpOpName = estimationTypeName.at(cmpOp);
-    matchExpr << "[{$match: {a: {$" << cmpOpName << ": " << val.get() << "}}}]";
-    return matchExpr.str();
-}
-
-ScalarHistogram makeHistogram(std::vector<SBEValue>& randData, size_t nBuckets) {
-    sortValueVector(randData);
-    const DataDistribution& dataDistrib = getDataDistribution(randData);
-    return genMaxDiffHistogram(dataDistrib, nBuckets);
-}
-
-std::string printValueArray(const std::vector<SBEValue>& values) {
-    std::stringstream strStream;
-    for (size_t i = 0; i < values.size(); ++i) {
-        strStream << " " << values[i].get();
-    }
-    return strStream.str();
-}
-
-std::string plotArrayEstimator(const ArrayHistogram& estimator, const std::string& header) {
-    std::ostringstream os;
-    os << header << "\n";
-    if (!estimator.getScalar().empty()) {
-        os << "Scalar histogram:\n" << estimator.getScalar().plot();
-    }
-    if (!estimator.getArrayUnique().empty()) {
-        os << "Array unique histogram:\n" << estimator.getArrayUnique().plot();
-    }
-    if (!estimator.getArrayMin().empty()) {
-        os << "Array min histogram:\n" << estimator.getArrayMin().plot();
-    }
-    if (!estimator.getArrayMax().empty()) {
-        os << "Array max histogram:\n" << estimator.getArrayMax().plot();
-    }
-    if (!estimator.getTypeCounts().empty()) {
-        os << "Per scalar data type value counts: ";
-        for (auto tagCount : estimator.getTypeCounts()) {
-            os << tagCount.first << "=" << tagCount.second << " ";
-        }
-    }
-    if (!estimator.getArrayTypeCounts().empty()) {
-        os << "\nPer array data type value counts: ";
-        for (auto tagCount : estimator.getArrayTypeCounts()) {
-            os << tagCount.first << "=" << tagCount.second << " ";
-        }
-    }
-    if (estimator.isArray()) {
-        os << "\nEmpty array count: " << estimator.getEmptyArrayCount();
-    }
-    os << "\n";
-
-    return os.str();
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/maxdiff_test_utils.h b/src/mongo/db/query/ce/maxdiff_test_utils.h
deleted file mode 100644
index 4ea1244da02..00000000000
--- a/src/mongo/db/query/ce/maxdiff_test_utils.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <string>
-#include <vector>
-
-
-#include "mongo/db/exec/sbe/abt/sbe_abt_test_util.h"
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/histogram_estimation.h"
-#include "mongo/db/query/ce/scalar_histogram.h"
-#include "mongo/db/query/ce/value_utils.h"
-
-namespace mongo::ce {
-
-class ArrayHistogram;
-
-/**
-    Given a list of SBE values and a query, create a collection containing the data,
-    and count the results from the supplied query.
- */
-size_t getActualCard(OperationContext* opCtx,
-                     const std::vector<SBEValue>& input,
-                     const std::string& query);
-
-/**
-    Given a value and a comparison operator, generate a match expression reflecting
-    x cmpOp val.
-*/
-std::string makeMatchExpr(const SBEValue& val, EstimationType cmpOp);
-
-/**
-    Given a vector of values, create a histogram reflection the distribution of the vector
-    with the supplied number of buckets.
-*/
-ScalarHistogram makeHistogram(std::vector<SBEValue>& randData, size_t nBuckets);
-
-/**
-    Serialize a vector of values.
-*/
-std::string printValueArray(const std::vector<SBEValue>& values);
-
-/**
-    Plot a set of statistics as stored in ArrayHistogram.
-*/
-std::string plotArrayEstimator(const ArrayHistogram& estimator, const std::string& header);
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/rand_utils.cpp b/src/mongo/db/query/ce/rand_utils.cpp
deleted file mode 100644
index 7f317904298..00000000000
--- a/src/mongo/db/query/ce/rand_utils.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include <algorithm>
-#include <cstddef>
-#include <string>
-#include <vector>
-
-#include "mongo/db/query/ce/rand_utils.h"
-
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/util/assert_util.h"
-
-namespace mongo::ce {
-
-const std::string DatasetDescriptor::_alphabet =
-    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-
-DatasetDescriptor::DatasetDescriptor(const DataTypeDistribution& dataTypeDistribution,
-                                     size_t intNDV,
-                                     int minInt,
-                                     int maxInt,
-                                     size_t strNDV,
-                                     size_t minStrLen,
-                                     size_t maxStrLen,
-                                     std::shared_ptr<DatasetDescriptor> nestedDataDescriptor,
-                                     double reuseScalarsRatio,
-                                     size_t arrNDV,
-                                     size_t minArrLen,
-                                     size_t maxArrLen)
-    : _gen{42},
-      _reuseScalarsRatio(reuseScalarsRatio),
-      _intNDV(std::min(intNDV, static_cast<size_t>(std::abs(maxInt - minInt)))),
-      _uniformIntDist{minInt, maxInt},
-      _arrNDV(arrNDV),
-      _uniformArrSizeDist{minArrLen, maxArrLen},
-      _nestedDataDescriptor(nestedDataDescriptor) {
-    uassert(6660520, "Maximum integer number must be >= the minimum one.", (maxInt >= minInt));
-    uassert(6660521, "Maximum string size must be >= the minimum one.", (maxStrLen >= minStrLen));
-    uassert(6660522,
-            "Array specs must be 0 if there is no array data descriptor.",
-            _nestedDataDescriptor || (arrNDV == 0 && minArrLen == 0 && maxArrLen == 0));
-    uassert(6660523,
-            "Nested arrays requires sensible array lengths",
-            !_nestedDataDescriptor || maxArrLen >= minArrLen);
-    uassert(6660524, "Recursive descriptors are not allowed.", nestedDataDescriptor.get() != this);
-    uassert(6660525,
-            "reuseScalarsRatio is a probability, must be in [0, 1].",
-            reuseScalarsRatio >= 0 && reuseScalarsRatio <= 1.0);
-
-    // Compute absolute ranges given relative weights of each value type.
-    double sumWeights = 0;
-    for (const auto& weightedType : dataTypeDistribution) {
-        sumWeights += weightedType.second;
-    }
-    double sumRelativeWeights = 0;
-    auto lastKey = dataTypeDistribution.crbegin()->first;
-    for (auto it = dataTypeDistribution.cbegin(); it != dataTypeDistribution.cend(); ++it) {
-        const auto weightedType = *it;
-        if (weightedType.first != lastKey) {
-            sumRelativeWeights += weightedType.second / sumWeights;
-            uassert(6660526, "The sum of weights can't be >= 1", sumRelativeWeights < 1);
-        } else {
-            // Due to rounding errors the last relative weight may not be exactly 1.0. Set it
-            // to 1.0.
-            sumRelativeWeights = 1.0;
-        }
-        _dataTypeDistribution.emplace(sumRelativeWeights, weightedType.first);
-    }
-
-    // Generate a set of random integers.
-    mongo::stdx::unordered_set<int> tmpIntSet;
-    tmpIntSet.reserve(_intNDV);
-    if (_intNDV == intNDV) {
-        for (int i = minInt; i <= maxInt; ++i) {
-            tmpIntSet.insert(i);  // This is a dense set of all ints the range.
-        }
-    } else {
-        size_t randCount = 0;
-        while (tmpIntSet.size() < _intNDV && randCount < 10 * _intNDV) {
-            int randInt = _uniformIntDist(_gen);
-            ++randCount;
-            tmpIntSet.insert(randInt);
-        }
-    }
-    uassert(
-        6660527, "Too few integers generated.", (double)tmpIntSet.size() / (double)_intNDV > 0.99);
-    _intSet.reserve(tmpIntSet.size());
-    _intSet.insert(_intSet.end(), tmpIntSet.begin(), tmpIntSet.end());
-    _uniformIntIdxDist.param(
-        std::uniform_int_distribution<size_t>::param_type(0, _intSet.size() - 1));
-
-    // Generate a set of random strings with random sizes so that each string can be chosen
-    // multiple times in the test data set.
-    _stringSet.reserve(strNDV);
-    std::uniform_int_distribution<size_t> uniformStrSizeDistr{minStrLen, maxStrLen};
-    for (size_t i = 0; i < strNDV; ++i) {
-        size_t len = uniformStrSizeDistr(_gen);
-        const auto randStr = genRandomString(len);
-        _stringSet.push_back(randStr);
-    }
-    _uniformStrIdxDist.param(
-        std::uniform_int_distribution<size_t>::param_type(0, _stringSet.size() - 1));
-
-    // Generate a set of random arrays that are chosen from when generating array data.
-    fillRandomArraySet();
-}
-
-std::vector<SBEValue> DatasetDescriptor::genRandomDataset(size_t nElems,
-                                                          DatasetDescriptor* parentDesc) {
-    std::vector<SBEValue> randValues;
-    randValues.reserve(nElems);
-    DatasetDescriptor* curDesc = this;
-
-    if (parentDesc) {
-        double reuseProb = _uniformRandProbability(_gen);
-        if (reuseProb < parentDesc->_reuseScalarsRatio) {
-            curDesc = parentDesc;
-        }
-    }
-
-    for (size_t i = 0; i < nElems; ++i) {
-        // Get the data type of the current value to be generated.
-        value::TypeTags genTag = this->getRandDataType();
-        // Generate a random value of the corresponding type.
-        switch (genTag) {
-            case value::TypeTags::NumberInt64: {
-                size_t idx = curDesc->_uniformIntIdxDist(_gen);
-                auto randInt = curDesc->_intSet.at(idx);
-                const auto [tag, val] = makeInt64Value(randInt);
-                randValues.emplace_back(tag, val);
-                break;
-            }
-            case value::TypeTags::StringBig:
-            case value::TypeTags::StringSmall: {
-                size_t idx = curDesc->_uniformStrIdxDist(_gen);
-                const auto randStr = curDesc->_stringSet.at(idx);
-                const auto [tag, val] = value::makeNewString(randStr);
-                const auto [copyTag, copyVal] = value::copyValue(tag, val);
-                randValues.emplace_back(copyTag, copyVal);
-                break;
-            }
-            case value::TypeTags::Array: {
-                if (_nestedDataDescriptor) {
-                    const auto randArray = genRandomArray();
-                    auto [arrayTag, arrayVal] = value::makeNewArray();
-                    value::Array* arr = value::getArrayView(arrayVal);
-                    for (const auto& elem : randArray) {
-                        const auto [copyTag, copyVal] =
-                            value::copyValue(elem.getTag(), elem.getValue());
-                        arr->push_back(copyTag, copyVal);
-                    }
-                    randValues.emplace_back(arrayTag, arrayVal);
-                }
-                break;
-            }
-            default:
-                uasserted(6660528, "Unsupported data type");
-        }
-    }
-
-    return randValues;
-}
-
-std::string DatasetDescriptor::genRandomString(size_t len) {
-    std::string randStr;
-    randStr.reserve(len);
-    for (size_t i = 0; i < len; ++i) {
-        size_t idx = _uniformCharIdxDist(_gen);
-        const char ch = _alphabet[idx];
-        randStr += ch;
-    }
-
-    return randStr;
-}
-
-std::vector<SBEValue> DatasetDescriptor::genRandomArray() {
-    uassert(6660529,
-            "There must be a nested data descriptor for random array generation.",
-            _nestedDataDescriptor);
-    if (_arrNDV == 0) {
-        size_t randArraySize = _uniformArrSizeDist(_gen);
-        return _nestedDataDescriptor->genRandomDataset(randArraySize, this);
-    } else {
-        size_t idx = _uniformArrIdxDist(_gen);
-        return _arraySet.at(idx);
-    }
-}
-
-void DatasetDescriptor::fillRandomArraySet() {
-    for (size_t i = 0; i < _arrNDV; ++i) {
-        size_t randArraySize = _uniformArrSizeDist(_gen);
-        const auto randArray = _nestedDataDescriptor->genRandomDataset(randArraySize, this);
-        _arraySet.push_back(randArray);
-    }
-
-    if (_arrNDV > 0) {
-        _uniformArrIdxDist.param(
-            std::uniform_int_distribution<size_t>::param_type(0, _arraySet.size() - 1));
-    }
-}
-
-/**
-    Generate a random string. It is possible (even expected) that the same parameters
-    will generate different strings on successive calls
-*/
-std::string genRandomString(size_t len, std::mt19937_64& gen, size_t seed) {
-    std::string randStr;
-    randStr.reserve(len);
-    const constexpr char* kAlphabet =
-        "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-    std::uniform_int_distribution<size_t> uniformDist{0, std::strlen(kAlphabet) - 1};
-
-    for (size_t i = 0; i < len; ++i) {
-        size_t idx = uniformDist(gen);
-        const char ch = kAlphabet[idx];
-        randStr += ch;
-    }
-
-    return randStr;
-}
-
-/**
-    Generate a string. This string will be deterministic in that the same
-    parameters will always generate the same string, even on different platforms.
-*/
-std::string genString(size_t len, size_t seed) {
-    std::string str;
-    str.reserve(len);
-
-    const constexpr char* kAlphabet =
-        "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-    const int kAlphabetLength = strlen(kAlphabet);
-
-    unsigned long long rand = seed;
-    for (size_t i = 0; i < len; ++i) {
-        // Library implementations of rand vary by compiler, naturally, Since we still
-        // want the appearance of randomness, but consistency across compilers, we use a linear
-        // congruential generator to choose characters for the string. The parameters chosen
-        // are from Numerical Recipes. We use the upper 32 bits when calculating the character
-        // index, as the lower 32 are essentially nonrandom -- a weakness of LCGs in general.
-        rand = 3935559000370003845ULL * rand + 269134368944950781ULL;
-
-        int idx = (rand >> 32) % kAlphabetLength;
-        str += kAlphabet[idx];
-    }
-
-    return str;
-}
-
-/**
-    Generate an array of values with the required ratio of int to string. This array will be
-    deterministic in that the same parameters will always generate the same array, even on
-    different platforms.
-*/
-std::vector<SBEValue> genFixedValueArray(size_t nElems, double intRatio, double strRatio) {
-
-    std::vector<SBEValue> values;
-
-    const int intNDV = static_cast<int>(nElems) / 4;
-    for (size_t i = 0; i < std::round(nElems * intRatio); ++i) {
-        const auto [tag, val] = makeInt64Value((i % intNDV) + 1);
-        values.emplace_back(tag, val);
-    }
-
-    if (strRatio == 0.0) {
-        return values;
-    }
-
-    // Generate a set of strings so that each string can be chosen multiple times in the test
-    // data set.
-    const size_t strNDV = nElems / 5;
-    std::vector<std::string> stringSet;
-    stringSet.reserve(strNDV);
-    for (size_t i = 0; i < strNDV; ++i) {
-        const auto randStr = genString(8, i);
-        stringSet.push_back(randStr);
-    }
-
-    for (size_t i = 0; i < std::round(nElems * strRatio); ++i) {
-        size_t idx = i % stringSet.size();
-        const auto randStr = stringSet[idx];
-        const auto [tag, val] = value::makeNewString(randStr);
-        values.emplace_back(tag, val);
-    }
-
-    return values;
-}
-
-std::vector<SBEValue> genRandomValueArray(size_t nElems,
-                                          double intRatio,
-                                          double strRatio,
-                                          size_t seed) {
-    std::vector<SBEValue> randValues;
-    const int intNDV = static_cast<int>(nElems) / 4;
-    const size_t strNDV = nElems / 5;
-    std::vector<std::string> stringSet;
-    stringSet.reserve(strNDV);
-
-    std::mt19937_64 gen{seed};
-    std::uniform_int_distribution<int> uniformDist{1, intNDV};
-
-    for (size_t i = 0; i < std::round(nElems * intRatio); ++i) {
-        const auto [tag, val] = makeInt64Value(uniformDist(gen));
-        randValues.emplace_back(tag, val);
-    }
-
-    // Generate a set of strings so that each string can be chosen multiple times in the test
-    // data set.
-    for (size_t i = 0; i < strNDV; ++i) {
-        const auto randStr = genRandomString(8, gen, seed);
-        stringSet.push_back(randStr);
-    }
-
-    std::uniform_int_distribution<size_t> idxDistr{0, stringSet.size() - 1};
-    for (size_t i = 0; i < std::round(nElems * strRatio); ++i) {
-        size_t idx = idxDistr(gen);
-        const auto randStr = stringSet[idx];
-        const auto [tag, val] = value::makeNewString(randStr);
-        randValues.emplace_back(tag, val);
-    }
-
-    return randValues;
-}
-
-std::vector<SBEValue> nestArrays(const std::vector<SBEValue>& input, size_t emptyArrayCount) {
-    std::vector<SBEValue> result;
-    auto [arrayTag, arrayVal] = value::makeNewArray();
-
-    for (size_t i = 0; i < input.size(); i++) {
-        const auto v = input[i].get();
-        const auto [tagCopy, valCopy] = value::copyValue(v.first, v.second);
-
-        if (i % 10 < 5) {
-            // 50% of values remain scalar.
-            result.emplace_back(tagCopy, valCopy);
-        } else {
-            // 50% of the values are grouped into arrays of size 10.
-            value::Array* arr = value::getArrayView(arrayVal);
-            arr->push_back(tagCopy, valCopy);
-            if (arr->size() == 10) {
-                result.emplace_back(arrayTag, arrayVal);
-                std::tie(arrayTag, arrayVal) = value::makeNewArray();
-            }
-        }
-    }
-
-    for (size_t i = 0; i < emptyArrayCount; ++i) {
-        auto [emptyArrayTag, emptyArrayVal] = value::makeNewArray();
-        result.emplace_back(emptyArrayTag, emptyArrayVal);
-    }
-
-    // It's possible that the array still contains something. If it's empty,
-    // we can safely release it. If not, append it to the result.
-    value::Array* arr = value::getArrayView(arrayVal);
-    if (arr->size() > 0) {
-        result.emplace_back(arrayTag, arrayVal);
-    } else {
-        value::releaseValue(arrayTag, arrayVal);
-    }
-
-    return result;
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/rand_utils.h b/src/mongo/db/query/ce/rand_utils.h
deleted file mode 100644
index 9a9acad5161..00000000000
--- a/src/mongo/db/query/ce/rand_utils.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <random>
-#include <vector>
-
-#include "value_utils.h"
-
-namespace mongo::ce {
-
-class SBEValue;
-
-// A simple histogram describing the distribution of values of each data type.
-using DataTypeDistribution = std::map<value::TypeTags, double>;
-
-/**
-    Describes the distribution of a dataset according to type and weight. Other ctor parameters
-    are used to describe the various data types which can be emitted and correspond to the fields
-    named similarly
- */
-class DatasetDescriptor {
-public:
-    DatasetDescriptor(const DataTypeDistribution& dataTypeDistribution,
-                      size_t intNDV,
-                      int minInt,
-                      int maxInt,
-                      size_t strNDV,
-                      size_t minStrLen,
-                      size_t maxStrLen,
-                      std::shared_ptr<DatasetDescriptor> nestedDataDescriptor = nullptr,
-                      double reuseScalarsRatio = 0,
-                      size_t arrNDV = 0,
-                      size_t minArrLen = 0,
-                      size_t maxArrLen = 0);
-
-    // Generate a random dataset of 'nElems' according to the data distribution characteristics in
-    // this object.
-    std::vector<SBEValue> genRandomDataset(size_t nElems, DatasetDescriptor* parentDesc = nullptr);
-
-private:
-    // Select a random value data type.
-    value::TypeTags getRandDataType() {
-        double key = _uniformRandProbability(_gen);
-        return (*_dataTypeDistribution.upper_bound(key)).second;
-    }
-
-    // Generate a random string with size 'len'.
-    std::string genRandomString(size_t len);
-
-    // Generate a random array with length determined uniformly between minArrLen and maxArrLen
-    std::vector<SBEValue> genRandomArray();
-
-    // Generate a set of random arrays that are chosen from when generating array data.
-    void fillRandomArraySet();
-
-private:
-    using InternalDataTypeDistribution = std::map<double, value::TypeTags>;
-    /*
-     * General distribution charecteristics.
-     */
-
-    // Pseudo-random generator.
-    std::mt19937_64 _gen;
-    // Random probabilities. Used to:
-    // - Select Value data types as random indexes in '_dataTypeDistribution'.
-    // - Select the source of values - either existing scalars or new.
-    std::uniform_real_distribution<double> _uniformRandProbability{0.0, 1.0};
-    // Distribution of different SBE data types. There will be %percent values of each type.
-    InternalDataTypeDistribution _dataTypeDistribution;
-    double _reuseScalarsRatio;
-
-    /*
-     * Integer data parameters.
-     */
-
-    // Number of distinct integer values.
-    const size_t _intNDV;
-    // A set of integers to choose from while generating random integers.
-    std::vector<int> _intSet;
-    // Generator of random integers with uniform distribution.
-    std::uniform_int_distribution<int> _uniformIntDist;
-    // Generator of random indexes into the set of integers '_intSet'.
-    std::uniform_int_distribution<size_t> _uniformIntIdxDist;
-
-    /*
-     * String data parameters.
-     */
-
-    // All strings draw characters from this alphabet.
-    static const std::string _alphabet;
-    // A set of random strings to choose from. In theory there can be duplicates, but this is very
-    // unlikely. We don't care much if there are a few duplicates anyway.
-    std::vector<std::string> _stringSet;
-    // Generator of random indexes into the set of characters '_alphabet'.
-    std::uniform_int_distribution<size_t> _uniformCharIdxDist{0, _alphabet.size() - 1};
-    // Generator of random indexes into the set of strings '_stringSet'.
-    std::uniform_int_distribution<size_t> _uniformStrIdxDist;
-
-    /*
-     * Array data parameters.
-     */
-
-    // Number of distinct arrays.
-    // TODO: currently not used. The idea is to use it in the same way as arrays - pre-generate
-    // '_arrNDV' arrays, then select randomly from this initial set.
-    size_t _arrNDV;
-    // Set of arrays to pick from when generating random data.
-    std::vector<std::vector<SBEValue>> _arraySet;
-    // Generator of random array sizes.
-    std::uniform_int_distribution<size_t> _uniformArrSizeDist;
-    // Descriptor of the dataset within each array.
-    std::shared_ptr<DatasetDescriptor> _nestedDataDescriptor;
-    // Generator of random indexes into the set of arrays '_arraySet'.
-    std::uniform_int_distribution<size_t> _uniformArrIdxDist;
-};  // namespace mongo::ce
-
-/**
-    Generate a pseudorandom string of length n
-    * The alphabet is fixed as [0-9][a-z][A-Z]
-    * Characters are chosed uniformly from the alphabet
-    * Randomness is implemented such that it is independent of the platform,
-        i.e. given the same length and seed on any platform, we will produce the
-        same string.
-*/
-std::string genString(size_t len, size_t seed);
-
-/**
-    Generate a set of elements consisting of strings and ints in the
-    requested ratio. The generated array will contain the same values given the same
-    inputs on all platforms.
- */
-std::vector<SBEValue> genFixedValueArray(size_t nElems, double intRatio, double strRatio);
-
-/**
-    Generate a random string of length len.
-    * The alphabet is fixed as [0-9][a-z][A-Z].
-    * Characters are chosed uniformly from the alphabet.
-    * Generated strings are likely to differ by platform, so derived values depending on them
-      are also likely to change.
- */
-std::string genRandomString(size_t len, std::mt19937_64& gen, size_t seed);
-
-
-/**
-    Generate a uniformly random set of elements consisting of string and ints in the
-    requested ratio. The resulting array is very likely to differ between platforms, even
-    with the same seed. Thus, derived values are also likely to change.
-
-    Prefer genFixedValueArray when comparing derived values against constants.
- */
-std::vector<SBEValue> genRandomValueArray(size_t nElems,
-                                          double intRatio,
-                                          double strRatio,
-                                          size_t seed);
-
-/**
-    Generate a set up values consisting of half scalars, and half arrays of length 10.
-
-    Values contained in the result will be drawn from the input vector.
- */
-std::vector<SBEValue> nestArrays(const std::vector<SBEValue>& input, size_t emptyArrayCount);
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/rand_utils_new.cpp b/src/mongo/db/query/ce/rand_utils_new.cpp
deleted file mode 100644
index 038e69dde04..00000000000
--- a/src/mongo/db/query/ce/rand_utils_new.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include <algorithm>
-#include <cstddef>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "mongo/db/query/ce/rand_utils_new.h"
-
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/util/assert_util.h"
-
-namespace mongo::ce {
-
-const std::string StrDistribution::_alphabet =
-    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-
-void DataTypeDistrNew::generate(std::vector<SBEValue>& randValues, std::mt19937_64& gen) {
-    if (_nullsRatio > 0 && _nullSelector(gen) < _nullsRatio) {
-        auto [tag, val] = makeNullValue();
-        randValues.emplace_back(tag, val);
-    } else {
-        size_t idx = (*_idxDist)(gen);
-        const auto val = _valSet.at(idx);
-        auto [copyTag, copyVal] = copyValue(val.getTag(), val.getValue());
-        randValues.emplace_back(copyTag, copyVal);
-    }
-}
-
-void DataTypeDistrNew::generate(value::Array* randValueArray, std::mt19937_64& gen) {
-    if (_nullsRatio > 0 && _nullSelector(gen) < _nullsRatio) {
-        auto [tag, val] = makeNullValue();
-        randValueArray->push_back(tag, val);
-    } else {
-        size_t idx = (*_idxDist)(gen);
-        const auto val = _valSet.at(idx);
-        auto [copyTag, copyVal] = copyValue(val.getTag(), val.getValue());
-        randValueArray->push_back(copyTag, copyVal);
-    }
-}
-
-IntDistribution::IntDistribution(MixedDistributionDescriptor distrDescriptor,
-                                 double weight,
-                                 size_t ndv,
-                                 int minInt,
-                                 int maxInt,
-                                 double nullsRatio)
-    : DataTypeDistrNew(distrDescriptor,
-                       value::TypeTags::NumberInt64,
-                       weight,
-                       std::min(ndv, static_cast<size_t>(std::abs(maxInt - minInt))),
-                       nullsRatio),
-      _minInt(minInt),
-      _maxInt(maxInt) {
-    uassert(6660507, "Maximum integer number must be >= the minimum one.", (maxInt >= minInt));
-}
-
-void IntDistribution::init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) {
-    std::set<int> tmpIntSet;
-    std::uniform_int_distribution<int> uniformIntDist{_minInt, _maxInt};
-
-    if (_ndv == static_cast<size_t>(std::abs(_maxInt - _minInt))) {
-        // This is a dense set of all ints in the range.
-        for (int i = _minInt; i <= _maxInt; ++i) {
-            tmpIntSet.insert(i);
-        }
-    } else {
-        size_t randCount = 0;
-        while (tmpIntSet.size() < _ndv && randCount < 10 * _ndv) {
-            int randInt = uniformIntDist(gen);
-            ++randCount;
-            tmpIntSet.insert(randInt);
-        }
-    }
-    uassert(6660508, "Too few integers generated.", (double)tmpIntSet.size() / (double)_ndv > 0.99);
-    _valSet.reserve(tmpIntSet.size());
-    for (const auto randInt : tmpIntSet) {
-        const auto [tag, val] = makeInt64Value(randInt);
-        _valSet.emplace_back(tag, val);
-    }
-
-    _idxDist = MixedDistribution::make(_mixedDistrDescriptor, 0, _valSet.size() - 1);
-}
-
-StrDistribution::StrDistribution(MixedDistributionDescriptor distrDescriptor,
-                                 double weight,
-                                 size_t ndv,
-                                 size_t minStrLen,
-                                 size_t maxStrLen,
-                                 double nullsRatio)
-    : DataTypeDistrNew(distrDescriptor, value::TypeTags::StringBig, weight, ndv, nullsRatio),
-      _minStrLen(minStrLen),
-      _maxStrLen(maxStrLen) {
-    uassert(6660509, "Maximum string size must be >= the minimum one.", (maxStrLen >= minStrLen));
-}
-
-void StrDistribution::init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) {
-    // Generate a set of random strings with random sizes between _minStrLen and _maxStrLen.
-    _valSet.reserve(_ndv);
-    std::uniform_int_distribution<size_t> uniformStrSizeDistr{_minStrLen, _maxStrLen};
-    for (size_t i = 0; i < _ndv; ++i) {
-        size_t len = uniformStrSizeDistr(gen);
-        const auto randStr = genRandomString(len, gen);
-        const auto [tag, val] = value::makeNewString(randStr);
-        _valSet.emplace_back(tag, val);
-    }
-
-    _idxDist = MixedDistribution::make(_mixedDistrDescriptor, 0, _valSet.size() - 1);
-}
-
-std::string StrDistribution::genRandomString(size_t len, std::mt19937_64& gen) {
-    std::string randStr;
-    randStr.reserve(len);
-    for (size_t i = 0; i < len; ++i) {
-        size_t idx = _uniformCharIdxDist(gen);
-        const char ch = _alphabet[idx];
-        randStr += ch;
-    }
-
-    return randStr;
-}
-
-ArrDistribution::ArrDistribution(MixedDistributionDescriptor distrDescriptor,
-                                 double weight,
-                                 size_t ndv,
-                                 size_t minArrLen,
-                                 size_t maxArrLen,
-                                 std::unique_ptr<DatasetDescriptorNew> arrayDataDescriptor,
-                                 double reuseScalarsRatio,
-                                 double nullsRatio)
-    : DataTypeDistrNew(distrDescriptor, value::TypeTags::Array, weight, ndv, nullsRatio),
-      _uniformArrSizeDist{minArrLen, maxArrLen},
-      _arrayDataDescriptor(std::move(arrayDataDescriptor)),
-      _reuseScalarsRatio(reuseScalarsRatio) {
-    uassert(6660510,
-            "Array specs must be 0 if there is no array data descriptor.",
-            _arrayDataDescriptor || (ndv == 0 && minArrLen == 0 && maxArrLen == 0));
-    uassert(6660511,
-            "Nested arrays requires sensible array lengths.",
-            !_arrayDataDescriptor || maxArrLen >= minArrLen);
-    uassert(6660512,
-            "reuseScalarsRatio must be in [0, 1].",
-            reuseScalarsRatio >= 0 && reuseScalarsRatio <= 1.0);
-}
-
-void ArrDistribution::init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) {
-    uassert(6660513, "There must always be a parent data descriptor.", parentDesc);
-
-    // Extract the per-type probabilities from the parent descriptor, but set the array probability
-    // to 0 to avoid self-recursion.
-    std::vector<double> parentProbabilities;
-    for (const auto& dtd : parentDesc->_dataTypeDistributions) {
-        double prob = (dtd->tag() == value::TypeTags::Array) ? 0 : dtd->weight();
-        parentProbabilities.push_back(prob);
-    }
-    std::discrete_distribution<size_t> parentDataTypeSelector;
-    parentDataTypeSelector.param(std::discrete_distribution<size_t>::param_type(
-        parentProbabilities.begin(), parentProbabilities.end()));
-
-    // Generate _ndv distinct arrays, and store them in _valSet.
-    for (size_t i = 0; i < _ndv; ++i) {
-        auto [arrayTag, arrayVal] = value::makeNewArray();
-        value::Array* arr = value::getArrayView(arrayVal);
-        size_t randArraySize = _uniformArrSizeDist(gen);
-        arr->reserve(randArraySize);
-        // Generate the data for one random array.
-        for (size_t j = 0; j < randArraySize; ++j) {
-            DataTypeDistrNew* dtd = nullptr;
-            size_t idx;
-            double reuseParentProb = _uniformRandProbability(gen);
-            if (reuseParentProb < _reuseScalarsRatio) {
-                // Pick a random data type descriptor from the parent.
-                idx = parentDataTypeSelector(gen);
-                dtd = parentDesc->_dataTypeDistributions.at(idx).get();
-            } else {
-                idx = _arrayDataDescriptor->_dataTypeSelector(gen);
-                dtd = _arrayDataDescriptor->_dataTypeDistributions.at(idx).get();
-            }
-            dtd->generate(arr, gen);
-        }
-        _valSet.emplace_back(arrayTag, arrayVal);
-    }
-
-    _idxDist = MixedDistribution::make(_mixedDistrDescriptor, 0, _valSet.size() - 1);
-}
-
-DatasetDescriptorNew::DatasetDescriptorNew(TypeDistrVector dataTypeDistributions,
-                                           std::mt19937_64& gen)
-    : _dataTypeDistributions(std::move(dataTypeDistributions)), _gen{gen} {
-
-    // The probability of each type to be chosen. Extracted into a vector in order to setup a
-    // discrete_distribution.
-    std::vector<double> probabilities;
-    probabilities.reserve(_dataTypeDistributions.size());
-    for (auto& dtd : _dataTypeDistributions) {
-        dtd->init(this, gen);
-        probabilities.push_back(dtd->weight());
-    }
-    _dataTypeSelector.param(
-        std::discrete_distribution<size_t>::param_type(probabilities.begin(), probabilities.end()));
-}
-
-DataTypeDistrNew* DatasetDescriptorNew::getRandDataTypeDist() {
-    size_t idx = _dataTypeSelector(_gen);
-    return _dataTypeDistributions[idx].get();
-}
-
-std::vector<SBEValue> DatasetDescriptorNew::genRandomDataset(size_t nElems) {
-    std::vector<SBEValue> randValues;
-    randValues.reserve(nElems);
-
-    for (size_t i = 0; i < nElems; ++i) {
-        DataTypeDistrNew* dtd = getRandDataTypeDist();
-        dtd->generate(randValues, _gen);
-    }
-
-    return randValues;
-}
-
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/rand_utils_new.h b/src/mongo/db/query/ce/rand_utils_new.h
deleted file mode 100644
index 0420b990dce..00000000000
--- a/src/mongo/db/query/ce/rand_utils_new.h
+++ /dev/null
@@ -1,354 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <random>
-#include <vector>
-
-#include "value_utils.h"
-
-namespace mongo::ce {
-
-class SBEValue;
-class DatasetDescriptorNew;
-
-/**
- * A base class for wrappers of STL random distributions that produce size_t values within a range.
- * This class enables polymorphic usage of random distributions, for instance to implement a mix of
- * distributions.
- */
-class RandomDistribution {
-public:
-    RandomDistribution() = default;
-    RandomDistribution(const RandomDistribution&) = default;
-    RandomDistribution(RandomDistribution&&) = default;
-    RandomDistribution& operator=(const RandomDistribution&) = default;
-    RandomDistribution& operator=(RandomDistribution&&) = default;
-    virtual ~RandomDistribution() = default;
-
-    virtual size_t operator()(std::mt19937_64& gen) = 0;
-};
-
-/**
-    A uniform random distribution of size_t within a range
- */
-class UniformDistr : public RandomDistribution {
-public:
-    UniformDistr(size_t min, size_t max) : _distr{min, max}, _min(min), _max(max) {}
-
-    size_t operator()(std::mt19937_64& gen) override {
-        size_t result = _distr(gen);
-        uassert(6660540, "Random index out of range", result >= _min && result <= _max);
-        return result;
-    }
-
-private:
-    std::uniform_int_distribution<size_t> _distr;
-    size_t _min;
-    size_t _max;
-};
-
-/**
- * Wrapper of normal distribution that is guaranteed to produces size_t values within a certain
- * range. The STL class normal_distribution takes a median and standard deviation. This class
- * computes a suitable median and standard deviation from the required [min,max] boundaries.
- */
-class NormalDistr : public RandomDistribution {
-public:
-    NormalDistr(size_t min, size_t max)
-        : _distr{(double)(min + max) / 2.0, (double)(max - min) / 4.0},
-          _backup{min, max},
-          _min((double)min),
-          _max((double)max) {}
-
-    size_t operator()(std::mt19937_64& gen) override {
-        size_t result = std::round(_distr(gen));
-        size_t trials = 0;
-        // If the result is outside the range (an event with low probability), try 10 more times to
-        // get a number in the range.
-        while (!(result >= _min && result <= _max) && trials < 10) {
-            double randNum = _distr(gen);
-            if (randNum < _min) {
-                result = std::ceil(randNum);
-            } else if (randNum > _max) {
-                result = std::floor(randNum);
-            } else {
-                result = std::round(randNum);
-            }
-            ++trials;
-        }
-        if (result < _min && result > _max) {
-            // We couldn't generate a number in [min,max] within 10 attempts. Generate a uniform
-            // number.
-            result = _backup(gen);
-        }
-        uassert(6660541, "Random index out of range", result >= _min && result <= _max);
-        return result;
-    }
-
-private:
-    std::normal_distribution<double> _distr;
-    std::uniform_int_distribution<size_t> _backup;
-    double _min;
-    double _max;
-};
-
-enum class DistrType { kUniform, kNormal };
-
-using MixedDistributionDescriptor = std::vector<std::pair<DistrType, double /*weight*/>>;
-
-/**
- * Generator for mixed distribution, where mixing is on the type of distribution, in the
- * probabilities specified in distrProbabilites
- */
-class MixedDistribution {
-public:
-    MixedDistribution(std::vector<std::unique_ptr<RandomDistribution>> distrMix,
-                      std::vector<double>& distrProbabilities)
-        : _distrMix(std::move(distrMix)) {
-        _distDist.param(std::discrete_distribution<size_t>::param_type(distrProbabilities.begin(),
-                                                                       distrProbabilities.end()));
-    }
-
-    static std::unique_ptr<MixedDistribution> make(MixedDistributionDescriptor& descriptor,
-                                                   size_t min,
-                                                   size_t max) {
-        std::vector<double> distrProbabilities;
-        std::vector<std::unique_ptr<RandomDistribution>> distrMix;
-
-        for (const auto& [distrType, weight] : descriptor) {
-            distrProbabilities.push_back(weight);
-            switch (distrType) {
-                case DistrType::kUniform:
-                    distrMix.emplace_back(std::make_unique<UniformDistr>(min, max));
-                    break;
-                case DistrType::kNormal:
-                    distrMix.emplace_back(std::make_unique<NormalDistr>(min, max));
-                    break;
-                default:
-                    MONGO_UNREACHABLE;
-            }
-        }
-
-        return std::make_unique<MixedDistribution>(std::move(distrMix), distrProbabilities);
-    }
-
-    size_t operator()(std::mt19937_64& gen) {
-        size_t distIdx = _distDist(gen);
-        size_t result = (*_distrMix.at(distIdx))(gen);
-        return result;
-    }
-
-private:
-    // Mix of different distributions. There can be instances of the same type of distribution,
-    // because they can still be defined differently.
-    std::vector<std::unique_ptr<RandomDistribution>> _distrMix;
-    // Distribution of distributions - select the current distribution with a certain probability.
-    std::discrete_distribution<size_t> _distDist;
-};
-
-/**
- * Descriptor of a typed data distribution
- */
-class DataTypeDistrNew {
-public:
-    DataTypeDistrNew(MixedDistributionDescriptor distrDescriptor,
-                     value::TypeTags tag,
-                     double weight,
-                     size_t ndv,
-                     double nullsRatio = 0.0)
-        : _mixedDistrDescriptor(distrDescriptor),
-          _tag(tag),
-          _weight(weight),
-          _ndv(ndv),
-          _nullsRatio(nullsRatio) {
-        uassert(6660542, "NDV must be > 0.", ndv > 0);
-        uassert(6660543, "nullsRatio must be in [0, 1].", nullsRatio >= 0 && nullsRatio <= 1);
-    }
-
-    virtual ~DataTypeDistrNew() = default;
-
-    /**
-     * Generate all unique values that generation chooses from, and store them in '_valSet'.
-     * Different data types provide different implementations.
-     * @todo: The 'parentDesc' parameter is used only by array generation. Consider a different way
-     * of passing it only to that type.
-     */
-    virtual void init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) = 0;
-
-    /**
-     * Generate a single random value, and store it in 'randValues' vector.
-     */
-    void generate(std::vector<SBEValue>& randValues, std::mt19937_64& gen);
-
-    /**
-     * Generate a single random value, and store it in 'randValueArray' array.
-     */
-    void generate(value::Array* randValueArray, std::mt19937_64& gen);
-
-    /**
-     * Custom equality comparison for storage in sets. There can be only datatype in a set.
-     */
-    bool operator==(const DataTypeDistrNew& d) const {
-        return this->_tag == d._tag;
-    }
-
-    value::TypeTags tag() const {
-        return _tag;
-    }
-
-    double weight() const {
-        return _weight;
-    }
-
-protected:
-    MixedDistributionDescriptor _mixedDistrDescriptor;
-    value::TypeTags _tag;
-    // Weight that determines the probability of a value of this type.
-    const double _weight;
-    const size_t _ndv;
-    // A set of (randomly generated) values to choose from when generating random datasets.
-    std::vector<SBEValue> _valSet;
-    // Generator of random indexes into a set of values.
-    // std::uniform_int_distribution<size_t> _idxDist;
-    std::unique_ptr<MixedDistribution> _idxDist;
-    // Percent of null values in the dataset.
-    double _nullsRatio;
-    std::uniform_real_distribution<double> _nullSelector{0, 1};
-
-    friend class DatasetDescriptorNew;
-};
-
-using TypeDistrVector = std::vector<std::unique_ptr<DataTypeDistrNew>>;
-
-/**
- * Integer data distribution.
- */
-class IntDistribution : public DataTypeDistrNew {
-public:
-    IntDistribution(MixedDistributionDescriptor distrDescriptor,
-                    double weight,
-                    size_t ndv,
-                    int minInt,
-                    int maxInt,
-                    double nullsRatio = 0);
-
-    /*
-     * Generate a set of random integers, and store them in _valSet.
-     */
-    void init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) override;
-
-protected:
-    int _minInt;
-    int _maxInt;
-};
-
-/**
- * String data distribution.
- */
-class StrDistribution : public DataTypeDistrNew {
-public:
-    StrDistribution(MixedDistributionDescriptor distrDescriptor,
-                    double weight,
-                    size_t ndv,
-                    size_t minStrLen,
-                    size_t maxStrLen,
-                    double nullsRatio = 0);
-
-    /*
-     * Generate a set of random strings, and store them in _valSet.
-     */
-    void init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) override;
-
-protected:
-    std::string genRandomString(size_t len, std::mt19937_64& gen);
-
-    size_t _minStrLen;
-    size_t _maxStrLen;
-    // All strings draw characters from this alphabet.
-    static const std::string _alphabet;
-    // Generator of random indexes into the set of characters '_alphabet'.
-    std::uniform_int_distribution<size_t> _uniformCharIdxDist{0, _alphabet.size() - 1};
-};
-
-/**
- * SBE array data distribution.
- */
-class ArrDistribution : public DataTypeDistrNew {
-public:
-    ArrDistribution(MixedDistributionDescriptor distrDescriptor,
-                    double weight,
-                    size_t ndv,
-                    size_t minArrLen,
-                    size_t maxArrLen,
-                    std::unique_ptr<DatasetDescriptorNew> arrayDataDescriptor,
-                    double reuseScalarsRatio = 0,
-                    double nullsRatio = 0);
-
-private:
-    void init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) override;
-
-    // Generator of random array sizes.
-    std::uniform_int_distribution<size_t> _uniformArrSizeDist;
-    // Descriptor of the dataset within each array.
-    std::unique_ptr<DatasetDescriptorNew> _arrayDataDescriptor;
-    // Randomly select a parent or a child distribution when generating random
-    std::uniform_real_distribution<double> _uniformRandProbability{0.0, 1.0};
-    double _reuseScalarsRatio;
-};
-
-/**
-    Given a list of tyoed data distibutions, this class is used to generate a vector of values
-    according to the distribution weights.
-*/
-class DatasetDescriptorNew {
-public:
-    DatasetDescriptorNew(TypeDistrVector dataTypeDistributions, std::mt19937_64& gen);
-
-    // Generate a random dataset of 'nElems' according to the data distribution characteristics in
-    // this object.
-    std::vector<SBEValue> genRandomDataset(size_t nElems);
-
-private:
-    // Select a random value data type.
-    DataTypeDistrNew* getRandDataTypeDist();
-
-    // Distribution of different SBE data types. There will be %percent values of each type.
-    // TODO: is it a better idea to store shared_ptr or raw pointers to enable reuse?
-    TypeDistrVector _dataTypeDistributions;
-    // Pseudo-random generator.
-    std::mt19937_64& _gen;
-    // Select a random data type distribution.
-    std::discrete_distribution<size_t> _dataTypeSelector;
-
-    friend class ArrDistribution;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/sampling_estimator.cpp b/src/mongo/db/query/ce/sampling_estimator.cpp
new file mode 100644
index 00000000000..85fac93b0a2
--- /dev/null
+++ b/src/mongo/db/query/ce/sampling_estimator.cpp
@@ -0,0 +1,341 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/sampling_estimator.h"
+
+#include "mongo/db/exec/sbe/abt/abt_lower.h"
+#include "mongo/db/query/cqf_command_utils.h"
+#include "mongo/db/query/optimizer/explain.h"
+#include "mongo/db/query/optimizer/index_bounds.h"
+#include "mongo/db/query/optimizer/props.h"
+#include "mongo/db/query/optimizer/utils/abt_hash.h"
+#include "mongo/db/query/optimizer/utils/memo_utils.h"
+#include "mongo/logv2/log.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
+namespace mongo::optimizer::ce {
+class SamplingPlanExtractor {
+public:
+    SamplingPlanExtractor(const cascades::Memo& memo,
+                          const OptPhaseManager& phaseManager,
+                          const size_t sampleSize)
+        : _memo(memo), _sampleSize(sampleSize), _phaseManager(phaseManager) {}
+
+    void transport(ABT& n, const MemoLogicalDelegatorNode& node) {
+        n = extract(_memo.getLogicalNodes(node.getGroupId()).front());
+    }
+
+    void transport(ABT& n, const ScanNode& /*node*/, ABT& /*binder*/) {
+        // We will lower the scan node in a sampling context here.
+        // TODO: for now just return the documents in random order.
+        n = make<LimitSkipNode>(properties::LimitSkipRequirement(_sampleSize, 0), std::move(n));
+    }
+
+    void transport(ABT& n, const FilterNode& /*node*/, ABT& childResult, ABT& /*exprResult*/) {
+        // Skip over filters.
+        n = childResult;
+    }
+
+    void transport(ABT& /*n*/,
+                   const EvaluationNode& /*node*/,
+                   ABT& /*childResult*/,
+                   ABT& /*exprResult*/) {
+        // Keep Eval nodes.
+    }
+
+    void transport(ABT& n, const SargableNode& node, ABT& childResult, ABT& refs, ABT& binds) {
+        ABT result = childResult;
+        // Retain only output bindings without applying filters.
+        for (const auto& [key, req] : node.getReqMap()) {
+            if (const auto& boundProjName = req.getBoundProjectionName()) {
+                lowerPartialSchemaRequirement(
+                    key,
+                    PartialSchemaRequirement{
+                        boundProjName, IntervalReqExpr::makeSingularDNF(), req.getIsPerfOnly()},
+                    result,
+                    _phaseManager.getPathToInterval());
+            }
+        }
+        std::swap(n, result);
+    }
+
+    void transport(ABT& n, const CollationNode& /*node*/, ABT& childResult, ABT& refs) {
+        // Skip over collation nodes.
+        n = childResult;
+    }
+
+    template <typename T, typename... Ts>
+    void transport(ABT& /*n*/, const T& /*node*/, Ts&&...) {
+        if constexpr (std::is_base_of_v<Node, T>) {
+            uasserted(6624242, "Should not be seeing other types of nodes here.");
+        }
+    }
+
+    ABT extract(ABT node) {
+        algebra::transport<true>(node, *this);
+        return node;
+    }
+
+private:
+    const cascades::Memo& _memo;
+    const size_t _sampleSize;
+    const OptPhaseManager& _phaseManager;
+};
+
+class SamplingTransport {
+    static constexpr size_t kMaxSampleSize = 1000;
+
+public:
+    SamplingTransport(OperationContext* opCtx,
+                      OptPhaseManager phaseManager,
+                      const int64_t numRecords,
+                      std::unique_ptr<cascades::CardinalityEstimator> fallbackCE)
+        : _phaseManager(std::move(phaseManager)),
+          _opCtx(opCtx),
+          _sampleSize(std::min<int64_t>(numRecords, kMaxSampleSize)),
+          _fallbackCE(std::move(fallbackCE)) {}
+
+    CEType transport(const ABT& n,
+                     const FilterNode& node,
+                     const Metadata& metadata,
+                     const cascades::Memo& memo,
+                     const properties::LogicalProps& logicalProps,
+                     CEType childResult,
+                     CEType /*exprResult*/) {
+        if (!properties::hasProperty<properties::IndexingAvailability>(logicalProps)) {
+            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
+        }
+
+        SamplingPlanExtractor planExtractor(memo, _phaseManager, _sampleSize);
+        // Create a plan with all eval nodes so far and the filter last.
+        ABT abtTree = make<FilterNode>(node.getFilter(), planExtractor.extract(n));
+
+        return estimateFilterCE(metadata, memo, logicalProps, n, std::move(abtTree), childResult);
+    }
+
+    CEType transport(const ABT& n,
+                     const SargableNode& node,
+                     const Metadata& metadata,
+                     const cascades::Memo& memo,
+                     const properties::LogicalProps& logicalProps,
+                     CEType childResult,
+                     CEType /*bindResult*/,
+                     CEType /*refsResult*/) {
+        if (!properties::hasProperty<properties::IndexingAvailability>(logicalProps)) {
+            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
+        }
+
+        SamplingPlanExtractor planExtractor(memo, _phaseManager, _sampleSize);
+        ABT extracted = planExtractor.extract(n);
+
+        // Estimate individual requirements separately by potentially re-using cached results.
+        // Here we assume that each requirement is independent.
+        // TODO: consider estimating together the entire set of requirements (but caching!)
+        CEType result = childResult;
+        for (const auto& [key, req] : node.getReqMap()) {
+            if (req.getIsPerfOnly()) {
+                // Ignore perf-only requirements.
+                continue;
+            }
+
+            if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
+                ABT lowered = extracted;
+                // Lower requirement without an output binding.
+                lowerPartialSchemaRequirement(
+                    key,
+                    PartialSchemaRequirement{boost::none /*boundProjectionName*/,
+                                             req.getIntervals(),
+                                             req.getIsPerfOnly()},
+                    lowered,
+                    _phaseManager.getPathToInterval());
+                uassert(6624243, "Expected a filter node", lowered.is<FilterNode>());
+                result =
+                    estimateFilterCE(metadata, memo, logicalProps, n, std::move(lowered), result);
+            }
+        }
+
+        return result;
+    }
+
+    /**
+     * Other ABT types.
+     */
+    template <typename T, typename... Ts>
+    CEType transport(const ABT& n,
+                     const T& /*node*/,
+                     const Metadata& metadata,
+                     const cascades::Memo& memo,
+                     const properties::LogicalProps& logicalProps,
+                     Ts&&...) {
+        if (canBeLogicalNode<T>()) {
+            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
+        }
+        return 0.0;
+    }
+
+    CEType derive(const Metadata& metadata,
+                  const cascades::Memo& memo,
+                  const properties::LogicalProps& logicalProps,
+                  const ABT::reference_type logicalNodeRef) {
+        return algebra::transport<true>(logicalNodeRef, *this, metadata, memo, logicalProps);
+    }
+
+private:
+    CEType estimateFilterCE(const Metadata& metadata,
+                            const cascades::Memo& memo,
+                            const properties::LogicalProps& logicalProps,
+                            const ABT& n,
+                            ABT abtTree,
+                            CEType childResult) {
+        auto it = _selectivityCacheMap.find(abtTree);
+        if (it != _selectivityCacheMap.cend()) {
+            // Cache hit.
+            return it->second * childResult;
+        }
+
+        const auto [success, selectivity] = estimateSelectivity(abtTree);
+        if (!success) {
+            return _fallbackCE->deriveCE(metadata, memo, logicalProps, n.ref());
+        }
+
+        _selectivityCacheMap.emplace(std::move(abtTree), selectivity);
+
+        OPTIMIZER_DEBUG_LOG(6264805,
+                            5,
+                            "CE sampling estimated filter selectivity",
+                            "selectivity"_attr = selectivity);
+        return selectivity * childResult;
+    }
+
+    std::pair<bool, SelectivityType> estimateSelectivity(ABT abtTree) {
+        // Add a group by to count number of documents.
+        const ProjectionName sampleSumProjection = "sum";
+        abtTree =
+            make<GroupByNode>(ProjectionNameVector{},
+                              ProjectionNameVector{sampleSumProjection},
+                              makeSeq(make<FunctionCall>("$sum", makeSeq(Constant::int64(1)))),
+                              std::move(abtTree));
+        abtTree = make<RootNode>(
+            properties::ProjectionRequirement{ProjectionNameVector{sampleSumProjection}},
+            std::move(abtTree));
+
+
+        OPTIMIZER_DEBUG_LOG(6264806,
+                            5,
+                            "Estimate selectivity ABT",
+                            "explain"_attr = ExplainGenerator::explainV2(abtTree));
+
+        _phaseManager.optimize(abtTree);
+
+        auto env = VariableEnvironment::build(abtTree);
+        SlotVarMap slotMap;
+        boost::optional<sbe::value::SlotId> ridSlot;
+        sbe::value::SlotIdGenerator ids;
+        SBENodeLowering g{env,
+                          slotMap,
+                          ridSlot,
+                          ids,
+                          _phaseManager.getMetadata(),
+                          _phaseManager.getNodeToGroupPropsMap(),
+                          _phaseManager.getRIDProjections(),
+                          true /*randomScan*/};
+        auto sbePlan = g.optimize(abtTree);
+        tassert(6624261, "Unexpected rid slot", !ridSlot);
+
+        // TODO: return errors instead of exceptions?
+        uassert(6624244, "Lowering failed", sbePlan != nullptr);
+        uassert(6624245, "Invalid slot map size", slotMap.size() == 1);
+
+        sbePlan->attachToOperationContext(_opCtx);
+        sbe::CompileCtx ctx(std::make_unique<sbe::RuntimeEnvironment>());
+        sbePlan->prepare(ctx);
+
+        std::vector<sbe::value::SlotAccessor*> accessors;
+        for (auto& [name, slot] : slotMap) {
+            accessors.emplace_back(sbePlan->getAccessor(ctx, slot));
+        }
+
+        sbePlan->open(false);
+        ON_BLOCK_EXIT([&] { sbePlan->close(); });
+
+        while (sbePlan->getNext() != sbe::PlanState::IS_EOF) {
+            const auto [tag, value] = accessors.at(0)->getViewOfValue();
+            if (tag == sbe::value::TypeTags::NumberInt64) {
+                // TODO: check if we get exactly one result from the groupby?
+                return {true, static_cast<double>(value) / _sampleSize};
+            }
+            return {false, {}};
+        };
+
+        // If nothing passes the filter, estimate 0.0 selectivity. HashGroup will return 0 results.
+        return {true, 0.0};
+    }
+
+    struct NodeRefHash {
+        size_t operator()(const ABT& node) const {
+            return ABTHashGenerator::generate(node);
+        }
+    };
+
+    struct NodeRefCompare {
+        bool operator()(const ABT& left, const ABT& right) const {
+            return left == right;
+        }
+    };
+
+    // Cache a logical node reference to computed selectivity. Used for Filter and Sargable nodes.
+    opt::unordered_map<ABT, SelectivityType, NodeRefHash, NodeRefCompare> _selectivityCacheMap;
+
+    OptPhaseManager _phaseManager;
+
+    // We don't own this.
+    OperationContext* _opCtx;
+
+    const int64_t _sampleSize;
+    std::unique_ptr<cascades::CardinalityEstimator> _fallbackCE;
+};
+
+SamplingEstimator::SamplingEstimator(OperationContext* opCtx,
+                                     OptPhaseManager phaseManager,
+                                     const int64_t numRecords,
+                                     std::unique_ptr<cascades::CardinalityEstimator> fallbackCE)
+    : _transport(std::make_unique<SamplingTransport>(
+          opCtx, std::move(phaseManager), numRecords, std::move(fallbackCE))) {}
+
+SamplingEstimator::~SamplingEstimator() {}
+
+CEType SamplingEstimator::deriveCE(const Metadata& metadata,
+                                   const cascades::Memo& memo,
+                                   const properties::LogicalProps& logicalProps,
+                                   const ABT::reference_type logicalNodeRef) const {
+    return _transport->derive(metadata, memo, logicalProps, logicalNodeRef);
+}
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/sampling_estimator.h b/src/mongo/db/query/ce/sampling_estimator.h
new file mode 100644
index 00000000000..cf9d0973a39
--- /dev/null
+++ b/src/mongo/db/query/ce/sampling_estimator.h
@@ -0,0 +1,56 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/optimizer/cascades/interfaces.h"
+#include "mongo/db/query/optimizer/opt_phase_manager.h"
+
+namespace mongo::optimizer::ce {
+
+class SamplingTransport;
+
+class SamplingEstimator : public cascades::CardinalityEstimator {
+public:
+    SamplingEstimator(OperationContext* opCtx,
+                      OptPhaseManager phaseManager,
+                      int64_t numRecords,
+                      std::unique_ptr<cascades::CardinalityEstimator> fallbackCE);
+    ~SamplingEstimator();
+
+    CEType deriveCE(const Metadata& metadata,
+                    const cascades::Memo& memo,
+                    const properties::LogicalProps& logicalProps,
+                    ABT::reference_type logicalNodeRef) const final;
+
+private:
+    std::unique_ptr<SamplingTransport> _transport;
+};
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/scalar_histogram.cpp b/src/mongo/db/query/ce/scalar_histogram.cpp
deleted file mode 100644
index 604af42da13..00000000000
--- a/src/mongo/db/query/ce/scalar_histogram.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/scalar_histogram.h"
-#include "mongo/db/exec/sbe/values/bson.h"
-#include "mongo/db/exec/sbe/values/value.h"
-
-namespace mongo::ce {
-
-using namespace sbe;
-
-Bucket::Bucket(
-    double equalFreq, double rangeFreq, double cumulativeFreq, double ndv, double cumulativeNDV)
-    : _equalFreq(equalFreq),
-      _rangeFreq(rangeFreq),
-      _cumulativeFreq(cumulativeFreq),
-      _ndv(ndv),
-      _cumulativeNDV(cumulativeNDV) {
-    uassert(6695702, "Invalid equalFreq", _equalFreq >= 0.0);
-    uassert(6695703, "Invalid rangeFreq", _rangeFreq >= 0.0);
-    uassert(6695704, "Invalid ndv", _ndv <= _rangeFreq);
-    uassert(6695705, "Invalid cumulative frequency", _cumulativeFreq >= _equalFreq + _rangeFreq);
-    uassert(6695706, "Invalid cumulative ndv", _cumulativeNDV >= _ndv + 1.0);
-}
-
-std::string Bucket::toString() const {
-    std::ostringstream os;
-    os << "equalFreq: " << _equalFreq << ", rangeFreq: " << _rangeFreq
-       << ", cumulativeFreq: " << _cumulativeFreq << ", ndv: " << _ndv
-       << ", cumulativeNDV: " << _cumulativeNDV;
-    return os.str();
-}
-
-std::string Bucket::dump() const {
-    std::ostringstream os;
-    os << _equalFreq << ", " << _rangeFreq << ", " << _ndv;
-    return os.str();
-}
-
-BSONObj Bucket::serialize() const {
-    BSONObjBuilder bob;
-    bob.appendNumber("boundaryCount", _equalFreq);
-    bob.appendNumber("rangeCount", _rangeFreq);
-    bob.appendNumber("rangeDistincts", _ndv);
-    bob.appendNumber("cumulativeCount", _cumulativeFreq);
-    bob.appendNumber("cumulativeDistincts", _cumulativeNDV);
-    bob.doneFast();
-    return bob.obj();
-}
-
-ScalarHistogram::ScalarHistogram() : ScalarHistogram({}, {}) {}
-
-ScalarHistogram::ScalarHistogram(const StatsHistogram& histogram) {
-    for (const auto& bucket : histogram.getBuckets()) {
-        Bucket b(bucket.getBoundaryCount(),
-                 bucket.getRangeCount(),
-                 bucket.getCumulativeCount(),
-                 bucket.getRangeDistincts(),
-                 bucket.getCumulativeDistincts());
-        _buckets.push_back(std::move(b));
-    }
-    for (const auto& bound : histogram.getBounds()) {
-        // We cannot insert a view here, because the lifetime of the of the bound is shorter than
-        // that of the histogram. In the case of a larger type, e.g. BigString/bsonString, we need
-        // to copy over the entire string as well, not just a pointer to memory which may be
-        // deallocated before we need it.
-        auto value = sbe::bson::convertFrom<false>(bound.getElement());
-        _bounds.push_back(value.first, value.second);
-    }
-}
-
-ScalarHistogram::ScalarHistogram(value::Array bounds, std::vector<Bucket> buckets)
-    : _bounds(std::move(bounds)), _buckets(std::move(buckets)) {
-    uassert(6695707, "Invalid sizes", bounds.size() == buckets.size());
-}
-
-std::string ScalarHistogram::toString() const {
-    std::ostringstream os;
-    os << "[";
-    for (size_t i = 0; i < _buckets.size(); i++) {
-        os << "{val: " << _bounds.getAt(i) << ", " << _buckets.at(i).toString() << "}";
-        if (_buckets.size() - i > 1)
-            os << ",";
-    }
-    os << "]";
-    return os.str();
-}
-
-std::string ScalarHistogram::plot() const {
-    std::ostringstream os;
-    double maxFreq = 0;
-    const double maxBucketSize = 100;
-
-    for (const auto& bucket : _buckets) {
-        double maxBucketFreq = std::max(bucket._equalFreq, bucket._rangeFreq);
-        maxFreq = std::max(maxFreq, maxBucketFreq);
-    }
-
-    std::vector<std::pair<double, std::string>> headers;
-    size_t maxHeaderSize = 0;
-    for (size_t i = 0; i < _buckets.size(); ++i) {
-        std::ostringstream rngHeader;
-        std::ostringstream eqlHeader;
-        double scaledRngF = maxBucketSize * _buckets[i]._rangeFreq / maxFreq;
-        double scaledEqlF = maxBucketSize * _buckets[i]._equalFreq / maxFreq;
-        rngHeader << _bounds.getAt(i) << ": " << _buckets[i]._rangeFreq;
-        eqlHeader << _bounds.getAt(i) << ": " << _buckets[i]._equalFreq;
-        auto rngStr = rngHeader.str();
-        maxHeaderSize = std::max(maxHeaderSize, rngStr.size());
-        headers.emplace_back(scaledRngF, rngStr);
-        auto eqlStr = eqlHeader.str();
-        maxHeaderSize = std::max(maxHeaderSize, eqlStr.size());
-        headers.emplace_back(scaledEqlF, eqlStr);
-    }
-
-    const std::string maxLine(maxBucketSize + maxHeaderSize + 3, '-');
-    os << maxLine << "\n";
-    for (size_t j = 0; j < headers.size(); ++j) {
-        auto header = headers.at(j);
-        header.second.resize(maxHeaderSize, ' ');
-        const std::string bar(std::round(header.first), '*');
-        os << header.second << " | " << bar << "\n";
-    }
-    os << maxLine << "\n";
-
-    return os.str();
-}
-
-std::string ScalarHistogram::dump() const {
-    std::ostringstream os;
-    os << "Histogram:\n{";
-    for (size_t i = 0; i < _buckets.size(); i++) {
-        os << "{" << _bounds.getAt(i) << ", " << _buckets.at(i).dump() << "},\n";
-    }
-    os << "}";
-    return os.str();
-}
-
-const value::Array& ScalarHistogram::getBounds() const {
-    return _bounds;
-}
-
-const std::vector<Bucket>& ScalarHistogram::getBuckets() const {
-    return _buckets;
-}
-
-BSONObj ScalarHistogram::serialize() const {
-    BSONObjBuilder histogramBuilder;
-
-    // Construct bucket BSON.
-    auto buckets = getBuckets();
-    BSONArrayBuilder bucketsBuilder(histogramBuilder.subarrayStart("buckets"));
-    for (const auto& bucket : buckets) {
-        bucketsBuilder.append(bucket.serialize());
-    }
-    bucketsBuilder.doneFast();
-
-    // Construct bucket bounds BSON.
-    auto bounds = getBounds();
-    BSONArrayBuilder boundsBuilder(histogramBuilder.subarrayStart("bounds"));
-    sbe::bson::convertToBsonObj(boundsBuilder, &bounds);
-    boundsBuilder.doneFast();
-
-    histogramBuilder.doneFast();
-    return histogramBuilder.obj();
-}
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/scalar_histogram.h b/src/mongo/db/query/ce/scalar_histogram.h
deleted file mode 100644
index c368e0dd50b..00000000000
--- a/src/mongo/db/query/ce/scalar_histogram.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/stats_gen.h"
-
-namespace mongo::ce {
-
-/**
- * Statistics related to a single ScalarHistogram bucket. The boundary value is kept in a separate
- * array, so that each bucket has a corresponding boundary value. The reason for this to manage the
- * memory of values.
- */
-struct Bucket {
-    Bucket(double equalFreq,
-           double rangeFreq,
-           double cumulativeFreq,
-           double ndv,
-           double cumulativeNDV);
-
-    std::string toString() const;
-    // Help function to dump the bucket content as needed by histogram creation in the unit tests.
-    std::string dump() const;
-
-    // Frequency of the bound value itself.
-    double _equalFreq;
-
-    // Frequency of other values.
-    double _rangeFreq;
-
-    // Sum of frequencies of preceding buckets to avoid recomputing. Includes both _equalFreq and
-    // _rangeFreq.
-    double _cumulativeFreq;
-
-    // Number of distinct values in this bucket, excludes the bound.
-    double _ndv;
-
-    // Sum of distinct values in preceding buckets including this bucket.
-    double _cumulativeNDV;
-
-    // Serialize to BSON for storage in stats collection.
-    BSONObj serialize() const;
-};
-
-/**
- * A ScalarHistogram over a set of values. The ScalarHistogram consists of two parallel vectors -
- * one with the individual value statistics, and another one with the actual boundary values.
- */
-class ScalarHistogram {
-public:
-    ScalarHistogram();
-    ScalarHistogram(const StatsHistogram& histogram);
-    ScalarHistogram(sbe::value::Array bounds, std::vector<Bucket> buckets);
-
-    // Print a human-readable representation of a histogram.
-    std::string toString() const;
-    std::string plot() const;
-    // Help function to dump the content of the histogram as needed by the manual histogram creation
-    // in the unit tests (without cummulative frequency and NDV).
-    std::string dump() const;
-
-    const sbe::value::Array& getBounds() const;
-    const std::vector<Bucket>& getBuckets() const;
-    // Return the total number of histogrammed values.
-    size_t getCardinality() const {
-        if (_buckets.empty()) {
-            return 0.0;
-        }
-        return _buckets.back()._cumulativeFreq;
-    }
-
-    bool empty() const {
-        return _buckets.empty();
-    }
-
-    // Serialize to BSON for storage in stats collection.
-    BSONObj serialize() const;
-
-    static constexpr size_t kMaxBuckets = 100;
-
-private:
-    // Bucket bounds representing the **highest** value in each bucket.
-    sbe::value::Array _bounds;
-
-    std::vector<Bucket> _buckets;
-};
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/stats.idl b/src/mongo/db/query/ce/stats.idl
deleted file mode 100644
index eb6220d45b9..00000000000
--- a/src/mongo/db/query/ce/stats.idl
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (C) 2022-present MongoDB, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the Server Side Public License, version 1,
-# as published by MongoDB, Inc.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# Server Side Public License for more details.
-#
-# You should have received a copy of the Server Side Public License
-# along with this program. If not, see
-# <http://www.mongodb.com/licensing/server-side-public-license>.
-#
-# As a special exception, the copyright holders give permission to link the
-# code of portions of this program with the OpenSSL library under certain
-# conditions as described in each individual source file and distribute
-# linked combinations including the program with the OpenSSL library. You
-# must comply with the Server Side Public License in all respects for
-# all of the code used other than as permitted herein. If you modify file(s)
-# with this exception, you may extend this exception to your version of the
-# file(s), but you are not obligated to do so. If you do not wish to do so,
-# delete this exception statement from your version. If you delete this
-# exception statement from all source files in the program, then also delete
-# it in the license file.
-#
-global:
-    cpp_namespace: "mongo"
-
-imports:
-    - "mongo/db/basic_types.idl"
-
-structs:
-    StatsBucket:
-        description: "Histogram bucket"
-        fields:
-            boundaryCount:
-                type: double
-            rangeCount:
-                type: double
-            rangeDistincts:
-                type: double
-            cumulativeCount:
-                type: double
-            cumulativeDistincts:
-                type: double
-
-    StatsHistogram:
-        description: "MaxDiff Histogram"
-        fields:
-            buckets:
-                type: array<StatsBucket>
-            bounds:
-                type: array<IDLAnyType>
-
-    TypeTag:
-        description: "SBE types and their corresponding frequencies in the histogram"
-        fields:
-            typeName:
-                type: string
-            count:
-                type: double
-
-    StatsArrayHistogram:
-        description: "Array Histogram"
-        fields:
-            minHistogram:
-                type: StatsHistogram
-            maxHistogram:
-                type: StatsHistogram
-            uniqueHistogram:
-                type: StatsHistogram
-            typeCount:
-                type: array<TypeTag>
-
-    Statistics:
-        description: "Serialized representation of data statistics for a key path"
-        fields:
-            documents:
-                type: double
-            trueCount:
-                type: double
-            falseCount:
-                type: double
-            emptyArrayCount:
-                type: double
-            typeCount:
-                type: array<TypeTag>
-            scalarHistogram:
-                type: StatsHistogram
-            arrayStatistics:
-                type: StatsArrayHistogram
-                optional: true
-
-    StatsPath:
-        description: "Key path to statstics"
-        fields:
-            _id:
-                type: string
-            statistics:
-                type: Statistics
diff --git a/src/mongo/db/query/ce/stats_cache.cpp b/src/mongo/db/query/ce/stats_cache.cpp
deleted file mode 100644
index 2fb2be400a6..00000000000
--- a/src/mongo/db/query/ce/stats_cache.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/query/ce/stats_cache.h"
-
-#include "mongo/db/query/ce/collection_statistics.h"
-#include "mongo/util/read_through_cache.h"
-
-#include "mongo/logv2/log.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
-
-
-namespace mongo {
-using namespace mongo::ce;
-
-namespace {
-
-const auto statsCacheDecoration = ServiceContext::declareDecoration<std::unique_ptr<StatsCache>>();
-
-}  // namespace
-
-StatsCache::StatsCache(ServiceContext* service,
-                       std::unique_ptr<StatsCacheLoader> cacheLoader,
-                       ThreadPoolInterface& threadPool,
-                       int size)
-    : ReadThroughCache(
-          _mutex,
-          service,
-          threadPool,
-          [this](OperationContext* opCtx,
-                 const StatsPathString& statsPath,
-                 const ValueHandle& stats) { return _lookupStats(opCtx, statsPath, stats); },
-          size),
-      _statsCacheLoader(std::move(cacheLoader)) {}
-
-StatsCache::LookupResult StatsCache::_lookupStats(OperationContext* opCtx,
-                                                  const StatsPathString& statsPath,
-                                                  const StatsCacheValueHandle& stats) {
-
-    try {
-        invariant(_statsCacheLoader);
-        auto newStats = _statsCacheLoader->getStats(opCtx, statsPath).get();
-        return LookupResult(std::move(newStats));
-    } catch (const DBException& ex) {
-        if (ex.code() == ErrorCodes::NamespaceNotFound) {
-            return StatsCache::LookupResult(boost::none);
-        }
-        throw;
-    }
-}
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache.h b/src/mongo/db/query/ce/stats_cache.h
deleted file mode 100644
index 65b5bcd19b4..00000000000
--- a/src/mongo/db/query/ce/stats_cache.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/string_data.h"
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/collection_statistics.h"
-#include "mongo/db/query/ce/stats_cache_loader.h"
-#include "mongo/util/concurrency/thread_pool.h"
-#include "mongo/util/read_through_cache.h"
-
-namespace mongo {
-
-using namespace mongo::ce;
-
-using StatsCacheType = ReadThroughCache<StatsPathString, StatsCacheVal>;
-using StatsCacheValueHandle = StatsCacheType::ValueHandle;
-
-/**
- * Collectoin statistics read through cache. It reads from the persitent storage but never wrties to
- * it.
- */
-class StatsCache : public StatsCacheType {
-public:
-    /**
-     * The constructor provides the Service context under which this cache has been instantiated,
-     * and a Thread pool to be used for invoking the blocking 'lookup' calls. The size is the number
-     * of entries the underlying LRU cache will hold.
-     */
-    StatsCache(ServiceContext* service,
-               std::unique_ptr<StatsCacheLoader> cacheLoader,
-               ThreadPoolInterface& threadPool,
-               int size);
-
-    /**
-     *  Returns statsCacheLoader currently used for testing only.
-     */
-    StatsCacheLoader* getStatsCacheLoader() {
-        invariant(_statsCacheLoader);
-
-        return _statsCacheLoader.get();
-    }
-
-private:
-    /**
-     * Reads collection stats from the underlying storage if its not found in the in memory cache.
-     */
-    LookupResult _lookupStats(OperationContext* opCtx,
-                              const StatsPathString& statsPath,
-                              const ValueHandle& stats);
-
-    Mutex _mutex = MONGO_MAKE_LATCH("StatsCache::_mutex");
-
-    std::unique_ptr<StatsCacheLoader> _statsCacheLoader;
-};
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_loader.h b/src/mongo/db/query/ce/stats_cache_loader.h
deleted file mode 100644
index a6ba3935c43..00000000000
--- a/src/mongo/db/query/ce/stats_cache_loader.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/stdx/thread.h"
-
-namespace mongo {
-
-using namespace mongo::ce;
-
-using StatsPathString = std::pair<NamespaceString, std::string>;
-using StatsCacheVal = std::shared_ptr<ArrayHistogram>;
-
-class StatsCacheLoader {
-public:
-    /**
-     * Non-blocking call, which returns CollectionStatistics from the the persistent metadata store.
-     *
-     * If for some reason the asynchronous fetch operation cannot be dispatched (for example on
-     * shutdown), throws a DBException.
-     */
-    virtual SemiFuture<StatsCacheVal> getStats(OperationContext* opCtx,
-                                               const StatsPathString& statsPath) = 0;
-
-    virtual void setStatsReturnValueForTest(StatusWith<StatsCacheVal> swStats){};
-
-    virtual ~StatsCacheLoader() {}
-
-    static constexpr StringData kStatsPrefix = "system.statistics"_sd;
-};
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp b/src/mongo/db/query/ce/stats_cache_loader_impl.cpp
deleted file mode 100644
index bd4d54c4e17..00000000000
--- a/src/mongo/db/query/ce/stats_cache_loader_impl.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/query/ce/stats_cache_loader_impl.h"
-
-#include "mongo/db/dbdirectclient.h"
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/stats_gen.h"
-#include "mongo/logv2/log.h"
-#include "mongo/stdx/thread.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
-
-namespace mongo {
-
-SemiFuture<StatsCacheVal> StatsCacheLoaderImpl::getStats(OperationContext* opCtx,
-                                                         const StatsPathString& statsPath) {
-
-    std::string statsColl(kStatsPrefix + "." + statsPath.first.coll());
-
-    NamespaceString statsNss(statsPath.first.db(), statsColl);
-    DBDirectClient client(opCtx);
-
-
-    FindCommandRequest findRequest{statsNss};
-    BSONObj filter = BSON("_id" << statsPath.second);
-    LOGV2_DEBUG(7085600, 1, "findRequest filter", "filter"_attr = filter.toString());
-    findRequest.setFilter(filter.getOwned());
-
-    try {
-        auto cursor = client.find(std::move(findRequest));
-
-        if (!cursor) {
-            uasserted(ErrorCodes::OperationFailed,
-                      str::stream()
-                          << "Failed to establish a cursor for reading " << statsPath.first.ns()
-                          << ",  path " << statsPath.second << " from local storage");
-        }
-
-        if (cursor->more()) {
-            IDLParserContext ctx("StatsPath");
-            BSONObj document = cursor->nextSafe().getOwned();
-            auto parsedStats = StatsPath::parse(ctx, document);
-            StatsCacheVal statsPtr(new ArrayHistogram(parsedStats.getStatistics()));
-            return makeReadyFutureWith([this, statsPtr] { return statsPtr; }).semi();
-        }
-
-        uasserted(ErrorCodes::NamespaceNotFound,
-                  str::stream() << "Stats does not exists for " << statsNss.ns() << ",  path "
-                                << statsPath.second);
-    } catch (const DBException& ex) {
-        uassertStatusOK(ex.toStatus());
-    }
-    MONGO_UNREACHABLE
-}
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_loader_impl.h b/src/mongo/db/query/ce/stats_cache_loader_impl.h
deleted file mode 100644
index b461d1d51c6..00000000000
--- a/src/mongo/db/query/ce/stats_cache_loader_impl.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/collection_statistics.h"
-#include "mongo/db/query/ce/stats_cache_loader.h"
-#include "mongo/stdx/thread.h"
-
-namespace mongo {
-
-using namespace mongo::ce;
-
-class StatsCacheLoaderImpl : public StatsCacheLoader {
-public:
-    SemiFuture<StatsCacheVal> getStats(OperationContext* opCtx,
-                                       const StatsPathString& statsPath) override;
-};
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_loader_mock.cpp b/src/mongo/db/query/ce/stats_cache_loader_mock.cpp
deleted file mode 100644
index ddf343bd026..00000000000
--- a/src/mongo/db/query/ce/stats_cache_loader_mock.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/query/ce/stats_cache_loader_mock.h"
-
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/collection_statistics.h"
-#include "mongo/stdx/thread.h"
-
-namespace mongo {
-
-const Status StatsCacheLoaderMock::kInternalErrorStatus = {
-    ErrorCodes::InternalError, "Stats cache loader received unexpected request"};
-
-SemiFuture<StatsCacheVal> StatsCacheLoaderMock::getStats(OperationContext* opCtx,
-                                                         const StatsPathString& statsPath) {
-
-    return makeReadyFutureWith([this] { return _swStatsReturnValueForTest; }).semi();
-}
-
-void StatsCacheLoaderMock::setStatsReturnValueForTest(StatusWith<StatsCacheVal> swStats) {
-    _swStatsReturnValueForTest = std::move(swStats);
-}
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_loader_mock.h b/src/mongo/db/query/ce/stats_cache_loader_mock.h
deleted file mode 100644
index 0b105d5858a..00000000000
--- a/src/mongo/db/query/ce/stats_cache_loader_mock.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/collection_statistics.h"
-#include "mongo/db/query/ce/stats_cache_loader.h"
-#include "mongo/stdx/thread.h"
-
-namespace mongo {
-
-using namespace mongo::ce;
-
-class StatsCacheLoaderMock : public StatsCacheLoader {
-public:
-    SemiFuture<StatsCacheVal> getStats(OperationContext* opCtx,
-                                       const StatsPathString& statsPath) override;
-
-    void setStatsReturnValueForTest(StatusWith<StatsCacheVal> swStats);
-
-    static const Status kInternalErrorStatus;
-
-private:
-    StatusWith<StatsCacheVal> _swStatsReturnValueForTest{kInternalErrorStatus};
-};
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_loader_test.cpp b/src/mongo/db/query/ce/stats_cache_loader_test.cpp
deleted file mode 100644
index 9fc003e524a..00000000000
--- a/src/mongo/db/query/ce/stats_cache_loader_test.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/bson/oid.h"
-#include "mongo/db/catalog/collection_write_path.h"
-#include "mongo/db/db_raii.h"
-#include "mongo/db/query/ce/scalar_histogram.h"
-#include "mongo/db/query/ce/stats_cache_loader_impl.h"
-#include "mongo/db/query/ce/stats_cache_loader_test_fixture.h"
-#include "mongo/db/query/ce/stats_gen.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/assert_util.h"
-#include "mongo/util/fail_point.h"
-
-namespace mongo {
-namespace {
-
-class StatsCacheLoaderTest : public StatsCacheLoaderTestFixture {
-protected:
-    void createStatsCollection(NamespaceString nss);
-    StatsCacheLoaderImpl _statsCacheLoader;
-};
-
-void StatsCacheLoaderTest::createStatsCollection(NamespaceString nss) {
-    auto opCtx = operationContext();
-    AutoGetCollection autoColl(opCtx, nss, MODE_IX);
-    auto db = autoColl.ensureDbExists(opCtx);
-    WriteUnitOfWork wuow(opCtx);
-    ASSERT(db->createCollection(opCtx, nss));
-    wuow.commit();
-}
-
-TEST_F(StatsCacheLoaderTest, VerifyStatsLoad) {
-    // Initialize histogram buckets.
-    constexpr double doubleCount = 15.0;
-    constexpr double trueCount = 12.0;
-    constexpr double falseCount = 16.0;
-    constexpr double numDocs = doubleCount + trueCount + falseCount;
-    std::vector<ce::Bucket> buckets{
-        ce::Bucket{1.0, 0.0, 1.0, 0.0, 1.0},
-        ce::Bucket{2.0, 5.0, 8.0, 1.0, 2.0},
-        ce::Bucket{3.0, 4.0, 15.0, 2.0, 6.0},
-    };
-
-    // Initialize histogram bounds.
-    auto [boundsTag, boundsVal] = sbe::value::makeNewArray();
-    sbe::value::ValueGuard boundsGuard{boundsTag, boundsVal};
-    auto bounds = sbe::value::getArrayView(boundsVal);
-    bounds->push_back(sbe::value::TypeTags::NumberDouble, 1.0);
-    bounds->push_back(sbe::value::TypeTags::NumberDouble, 2.0);
-    bounds->push_back(sbe::value::TypeTags::NumberDouble, 3.0);
-
-    // Create a scalar histogram.
-    ce::TypeCounts tc{
-        {sbe::value::TypeTags::NumberDouble, doubleCount},
-        {sbe::value::TypeTags::Boolean, trueCount + falseCount},
-    };
-    ce::ScalarHistogram sh(*bounds, buckets);
-    ce::ArrayHistogram ah(sh, tc, trueCount, falseCount);
-    auto expectedSerialized = ah.serialize();
-
-    // Serialize histogram into a stats path.
-    std::string path = "somePath";
-    auto serialized = stats::makeStatsPath(path, numDocs, ah);
-
-    // Initalize stats collection.
-    NamespaceString nss("test", "stats");
-    std::string statsColl(StatsCacheLoader::kStatsPrefix + "." + nss.coll());
-    NamespaceString statsNss(nss.db(), statsColl);
-    createStatsCollection(statsNss);
-
-    // Write serialized stats path to collection.
-    AutoGetCollection autoColl(operationContext(), statsNss, MODE_IX);
-    const CollectionPtr& coll = autoColl.getCollection();
-    {
-        WriteUnitOfWork wuow(operationContext());
-        ASSERT_OK(collection_internal::insertDocument(
-            operationContext(), coll, InsertStatement(serialized), nullptr));
-        wuow.commit();
-    }
-
-    // Read stats path & verify values are consistent with what we expect.
-    auto actualAH = _statsCacheLoader.getStats(operationContext(), std::make_pair(nss, path)).get();
-    auto actualSerialized = actualAH->serialize();
-
-    ASSERT_BSONOBJ_EQ(expectedSerialized, actualSerialized);
-}
-
-}  // namespace
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_loader_test_fixture.cpp b/src/mongo/db/query/ce/stats_cache_loader_test_fixture.cpp
deleted file mode 100644
index 20510a19203..00000000000
--- a/src/mongo/db/query/ce/stats_cache_loader_test_fixture.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include <memory>
-
-#include "mongo/db/query/ce/stats_cache_loader_test_fixture.h"
-
-#include "mongo/db/repl/replication_coordinator_mock.h"
-#include "mongo/db/repl/storage_interface_impl.h"
-#include "mongo/db/service_context_d_test_fixture.h"
-
-namespace mongo {
-
-void StatsCacheLoaderTestFixture::setUp() {
-    // Set up mongod.
-    ServiceContextMongoDTest::setUp();
-
-    auto service = getServiceContext();
-    _storage = std::make_unique<repl::StorageInterfaceImpl>();
-    _opCtx = cc().makeOperationContext();
-
-    // Set up ReplicationCoordinator and ensure that we are primary.
-    auto replCoord = std::make_unique<repl::ReplicationCoordinatorMock>(service);
-    ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_PRIMARY));
-    repl::ReplicationCoordinator::set(service, std::move(replCoord));
-
-    // Set up oplog collection. If the WT storage engine is used, the oplog collection is expected
-    // to exist when fetching the next opTime (LocalOplogInfo::getNextOpTimes) to use for a write.
-    repl::createOplog(operationContext());
-}
-
-void StatsCacheLoaderTestFixture::tearDown() {
-    _storage.reset();
-    _opCtx.reset();
-
-    // Tear down mongod.
-    ServiceContextMongoDTest::tearDown();
-}
-
-OperationContext* StatsCacheLoaderTestFixture::operationContext() {
-    return _opCtx.get();
-}
-
-repl::StorageInterface* StatsCacheLoaderTestFixture::storageInterface() {
-    return _storage.get();
-}
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_loader_test_fixture.h b/src/mongo/db/query/ce/stats_cache_loader_test_fixture.h
deleted file mode 100644
index 6ffc992b9f8..00000000000
--- a/src/mongo/db/query/ce/stats_cache_loader_test_fixture.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/operation_context.h"
-#include "mongo/db/query/ce/stats_cache_loader.h"
-#include "mongo/db/repl/storage_interface_impl.h"
-#include "mongo/db/service_context_d_test_fixture.h"
-
-namespace mongo {
-
-/**
- * Sets up and provides a repl::StorageInterface and OperationContext.
- * Database data are cleared  between test runs.
- */
-class StatsCacheLoaderTestFixture : public ServiceContextMongoDTest {
-public:
-    explicit StatsCacheLoaderTestFixture(Options options = {})
-        : ServiceContextMongoDTest(std::move(options)) {}
-
-    OperationContext* operationContext();
-    repl::StorageInterface* storageInterface();
-
-protected:
-    void setUp() override;
-    void tearDown() override;
-
-private:
-    ServiceContext::UniqueOperationContext _opCtx;
-    std::unique_ptr<repl::StorageInterface> _storage;
-};
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_cache_test.cpp b/src/mongo/db/query/ce/stats_cache_test.cpp
deleted file mode 100644
index 4e92a9ea2ca..00000000000
--- a/src/mongo/db/query/ce/stats_cache_test.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include <string>
-
-#include "mongo/db/client.h"
-#include "mongo/db/concurrency/locker_noop_service_context_test_fixture.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/query/ce/stats_cache.h"
-#include "mongo/db/query/ce/stats_cache_loader_mock.h"
-#include "mongo/unittest/barrier.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/concurrency/thread_pool.h"
-#include "mongo/util/read_through_cache.h"
-#include "mongo/util/scopeguard.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
-
-namespace mongo {
-namespace {
-
-using unittest::assertGet;
-
-/**
- * Fixture for tests, which do not need to exercise the multi-threading capabilities of the cache
- * and as such do not require control over the creation/destruction of their operation contexts.
- */
-class StatsCacheTest : public LockerNoopServiceContextTest {
-protected:
-    // Extends StatsCache and automatically provides it with a thread  pool, which will be
-    // shutdown and joined before the StatsCache is destroyed (which is part of the  contract of
-    // ReadThroughCache)
-    class CacheWithThreadPool : public StatsCache {
-    public:
-        CacheWithThreadPool(ServiceContext* service,
-                            std::unique_ptr<StatsCacheLoader> cacheLoaderMock,
-                            size_t size)
-            : StatsCache(service, std::move(cacheLoaderMock), _threadPool, size) {
-            _threadPool.startup();
-        }
-
-    private:
-        ThreadPool _threadPool{[] {
-            ThreadPool::Options options;
-            options.poolName = "StatsCacheTest";
-            options.minThreads = 1;
-            options.maxThreads = 1;
-            return options;
-        }()};
-    };
-
-    const ServiceContext::UniqueOperationContext _opCtxHolder{makeOperationContext()};
-    OperationContext* const _opCtx{_opCtxHolder.get()};
-};
-
-TEST(StatsCacheTest, StandaloneValueHandle) {
-    StatsCacheVal statsPtr(new ArrayHistogram());
-    StatsCache::ValueHandle standaloneHandle(std::move(statsPtr));
-    ASSERT(standaloneHandle.isValid());
-}
-
-TEST_F(StatsCacheTest, KeyDoesNotExist) {
-    Status namespaceNotFoundErrorStatus = {ErrorCodes::NamespaceNotFound,
-                                           "The key does not exists"};
-    auto cacheLoaderMock = std::make_unique<StatsCacheLoaderMock>();
-    auto cache = CacheWithThreadPool(getServiceContext(), std::move(cacheLoaderMock), 1);
-    cache.getStatsCacheLoader()->setStatsReturnValueForTest(
-        std::move(namespaceNotFoundErrorStatus));
-    auto handle = cache.acquire(_opCtx, std::make_pair(NamespaceString("db", "coll"), "somePath"));
-    ASSERT(!handle);
-}
-
-/*
-TEST_F(StatsCacheTest, LoadStats) {
-    auto cacheLoaderMock = std::make_unique<StatsCacheLoaderMock>();
-    auto cache = CacheWithThreadPool(getServiceContext(), std::move(cacheLoaderMock), 1);
-
-    auto stats1 = CollectionStatistics(1);
-    auto stats2 = CollectionStatistics(2);
-
-    cache.getStatsCacheLoader()->setStatsReturnValueForTest(std::move(stats1));
-
-    auto handle = cache.acquire(_opCtx, NamespaceString("db", "coll1"));
-    ASSERT(handle.isValid());
-    ASSERT_EQ(1, handle->getCardinality());
-
-    // Make all requests to StatsCacheLoader to throw an exception to ensre that test returns value
-    // from cache.
-    Status internalErrorStatus = {ErrorCodes::InternalError,
-                                  "Stats cache loader received unexpected request"};
-    cache.getStatsCacheLoader()->setStatsReturnValueForTest(std::move(internalErrorStatus));
-
-    handle = cache.acquire(_opCtx, NamespaceString("db", "coll1"));
-    ASSERT(handle.isValid());
-    ASSERT_EQ(1, handle->getCardinality());
-
-    cache.getStatsCacheLoader()->setStatsReturnValueForTest(std::move(stats2));
-    handle = cache.acquire(_opCtx, NamespaceString("db", "coll2"));
-    ASSERT(handle.isValid());
-    ASSERT_EQ(2, handle->getCardinality());
-}
-*/
-
-}  // namespace
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_catalog.cpp b/src/mongo/db/query/ce/stats_catalog.cpp
deleted file mode 100644
index d8b65d09e72..00000000000
--- a/src/mongo/db/query/ce/stats_catalog.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/query/ce/stats_cache.h"
-#include "mongo/db/query/ce/stats_catalog.h"
-
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/collection_statistics.h"
-#include "mongo/util/read_through_cache.h"
-
-#include "mongo/logv2/log.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
-
-namespace mongo {
-using namespace mongo::ce;
-
-namespace {
-
-const auto statsCatalogDecoration =
-    ServiceContext::declareDecoration<std::unique_ptr<StatsCatalog>>();
-
-}  // namespace
-
-StatsCatalog::StatsCatalog(ServiceContext* service,
-                           std::unique_ptr<StatsCacheLoader> statsCacheLoader)
-    : _executor(std::make_shared<ThreadPool>([] {
-          ThreadPool::Options options;
-          options.poolName = "StatsCache";
-          options.minThreads = 0;
-          options.maxThreads = 2;
-          return options;
-      }())),
-      _statsCache(service, std::move(statsCacheLoader), *_executor, 1000) {
-    _executor->startup();
-}
-
-StatsCatalog::~StatsCatalog() {
-    // The executor is used by the StatsCatalog, so it must be joined, before this cache is
-    // destroyed, per the contract of ReadThroughCache.
-    _executor->shutdown();
-    _executor->join();
-}
-
-void StatsCatalog::set(ServiceContext* serviceContext, std::unique_ptr<StatsCatalog> cache) {
-    auto& statsCatalog = statsCatalogDecoration(serviceContext);
-    invariant(!statsCatalog);
-
-    statsCatalog = std::move(cache);
-}
-
-StatsCatalog& StatsCatalog::get(ServiceContext* serviceContext) {
-    auto& statsCatalog = statsCatalogDecoration(serviceContext);
-    invariant(statsCatalog);
-
-    return *statsCatalog;
-}
-
-StatsCatalog& StatsCatalog::get(OperationContext* opCtx) {
-    return get(opCtx->getServiceContext());
-}
-
-StatusWith<std::shared_ptr<ArrayHistogram>> StatsCatalog::getHistogram(OperationContext* opCtx,
-                                                                       const NamespaceString& nss,
-                                                                       const std::string& path) {
-    try {
-        auto handle = _statsCache.acquire(opCtx, std::make_pair(nss, path));
-        uassert(ErrorCodes::NamespaceNotFound,
-                str::stream() << "path " << nss << " : " << path << " not found",
-                handle);
-
-        return *(handle.get());
-    } catch (const DBException& ex) {
-        return ex.toStatus();
-    }
-}
-
-Status StatsCatalog::invalidatePath(const NamespaceString& nss, const std::string& path) {
-    try {
-        _statsCache.invalidateKey(std::make_pair(nss, path));
-        return Status::OK();
-    } catch (const DBException& ex) {
-        return ex.toStatus();
-    }
-}
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_catalog.h b/src/mongo/db/query/ce/stats_catalog.h
deleted file mode 100644
index efd53178c94..00000000000
--- a/src/mongo/db/query/ce/stats_catalog.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/base/string_data.h"
-#include "mongo/db/namespace_string.h"
-#include "mongo/db/query/ce/collection_statistics.h"
-#include "mongo/db/query/ce/stats_cache.h"
-#include "mongo/db/query/ce/stats_cache_loader.h"
-#include "mongo/util/concurrency/thread_pool.h"
-
-namespace mongo {
-
-using namespace mongo::ce;
-
-/**
- * This class owns statsCache and manages executor lifetime.
- */
-class StatsCatalog {
-public:
-    /**
-     * Stores the catalog on the specified service context. May only be called once for the lifetime
-     * of the service context.
-     */
-    static void set(ServiceContext* serviceContext, std::unique_ptr<StatsCatalog> catalog);
-
-    static StatsCatalog& get(ServiceContext* serviceContext);
-    static StatsCatalog& get(OperationContext* opCtx);
-
-    /**
-     * The constructor provides the Service context under which the cache needs to be instantiated,
-     * and a Thread pool to be used for invoking the blocking 'lookup' calls. The size is the number
-     * of entries the underlying LRU cache will hold.
-     */
-    StatsCatalog(ServiceContext* service, std::unique_ptr<StatsCacheLoader> cacheLoader);
-
-    ~StatsCatalog();
-
-    StatusWith<std::shared_ptr<ArrayHistogram>> getHistogram(OperationContext* opCtx,
-                                                             const NamespaceString& nss,
-                                                             const std::string& path);
-
-    Status invalidatePath(const NamespaceString& nss, const std::string& path);
-
-private:
-    /**
-     * The executor is used by the cache.
-     */
-    std::shared_ptr<ThreadPool> _executor;
-    StatsCache _statsCache;
-};
-
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/stats_path_test.cpp b/src/mongo/db/query/ce/stats_path_test.cpp
deleted file mode 100644
index c4ed743e987..00000000000
--- a/src/mongo/db/query/ce/stats_path_test.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/db/exec/sbe/values/bson.h"
-#include "mongo/db/exec/sbe/values/value.h"
-#include "mongo/db/query/ce/array_histogram.h"
-#include "mongo/db/query/ce/scalar_histogram.h"
-#include "mongo/db/query/ce/stats_gen.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/assert_util.h"
-
-namespace mongo {
-namespace {
-
-IDLParserContext ctx("StatsPath");
-
-/**
- *  Validate round trip conversion for histogram bucket
- */
-TEST(StatsPath, BasicValidStatsBucketDouble) {
-    // Create & parse StatsBucket.
-    auto serializedBucket = ce::Bucket{3.0, 4.0, 15.0, 2.0, 6.0}.serialize();
-    auto parsedBucket = StatsBucket::parse(ctx, serializedBucket);
-
-    // Round-trip conversion.
-    auto bucketToBSON = parsedBucket.toBSON();
-    ASSERT_BSONOBJ_EQ(serializedBucket, bucketToBSON);
-}
-
-/**
- *  Validate round-trip conversion for StatsPath datatype.
- */
-TEST(StatsPath, BasicValidStatsPath) {
-    // Initialize histogram buckets.
-    constexpr double doubleCount = 15.0;
-    constexpr double trueCount = 12.0;
-    constexpr double falseCount = 16.0;
-    constexpr double numDocs = doubleCount + trueCount + falseCount;
-    std::vector<ce::Bucket> buckets{
-        ce::Bucket{1.0, 0.0, 1.0, 0.0, 1.0},
-        ce::Bucket{2.0, 5.0, 8.0, 1.0, 2.0},
-        ce::Bucket{3.0, 4.0, 15.0, 2.0, 6.0},
-    };
-
-    // Initialize histogram bounds.
-    auto [boundsTag, boundsVal] = sbe::value::makeNewArray();
-    sbe::value::ValueGuard boundsGuard{boundsTag, boundsVal};
-    auto bounds = sbe::value::getArrayView(boundsVal);
-    bounds->push_back(sbe::value::TypeTags::NumberDouble, 1.0);
-    bounds->push_back(sbe::value::TypeTags::NumberDouble, 2.0);
-    bounds->push_back(sbe::value::TypeTags::NumberDouble, 3.0);
-
-    // Create a scalar histogram.
-    ce::TypeCounts tc{
-        {sbe::value::TypeTags::NumberDouble, doubleCount},
-        {sbe::value::TypeTags::Boolean, trueCount + falseCount},
-    };
-    ce::ScalarHistogram sh(*bounds, buckets);
-    ce::ArrayHistogram ah(sh, tc, trueCount, falseCount);
-
-    // Serialize to BSON.
-    auto serializedPath = stats::makeStatsPath("somePath", numDocs, ah);
-
-    // Parse StatsPath via IDL & serialize to BSON.
-    auto parsedPath = StatsPath::parse(ctx, serializedPath);
-    auto parsedPathToBSON = parsedPath.toBSON();
-
-    // We should end up with the same serialized BSON in the end.
-    ASSERT_BSONOBJ_EQ(serializedPath, parsedPathToBSON);
-}
-
-/**
- *  Validate round-trip conversion for StatsPath datatype.
- */
-TEST(StatsPath, BasicValidEmptyStatsPath) {
-    // Initialize histogram buckets.
-    constexpr double numDocs = 0.0;
-    std::vector<ce::Bucket> buckets;
-
-    // Initialize histogram bounds.
-    auto [boundsTag, boundsVal] = sbe::value::makeNewArray();
-    sbe::value::ValueGuard boundsGuard{boundsTag, boundsVal};
-    auto bounds = sbe::value::getArrayView(boundsVal);
-
-    // Create an empty scalar histogram.
-    ce::TypeCounts tc;
-    ce::ScalarHistogram sh(*bounds, buckets);
-    ce::ArrayHistogram ah(sh, tc);
-
-    // Serialize to BSON.
-    auto serializedPath = stats::makeStatsPath("someEmptyPath", numDocs, ah);
-
-    // Parse StatsPath via IDL & serialize to BSON.
-    auto parsedPath = StatsPath::parse(ctx, serializedPath);
-    auto parsedPathToBSON = parsedPath.toBSON();
-
-    // We should end up with the same serialized BSON in the end.
-    ASSERT_BSONOBJ_EQ(serializedPath, parsedPathToBSON);
-}
-
-}  // namespace
-}  // namespace mongo
diff --git a/src/mongo/db/query/ce/test_utils.cpp b/src/mongo/db/query/ce/test_utils.cpp
new file mode 100644
index 00000000000..55bf1645f12
--- /dev/null
+++ b/src/mongo/db/query/ce/test_utils.cpp
@@ -0,0 +1,214 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/ce/test_utils.h"
+
+#include "mongo/db/pipeline/abt/utils.h"
+#include "mongo/db/query/optimizer/explain.h"
+#include "mongo/db/query/optimizer/metadata_factory.h"
+#include "mongo/db/query/optimizer/opt_phase_manager.h"
+#include "mongo/db/query/optimizer/rewrites/const_eval.h"
+#include "mongo/db/query/optimizer/utils/unit_test_pipeline_utils.h"
+#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/db/query/sbe_stage_builder_helpers.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::optimizer::ce {
+namespace value = sbe::value;
+
+CETester::CETester(std::string collName,
+                   double collCard,
+                   const OptPhaseManager::PhaseSet& optPhases)
+    : _optPhases(optPhases), _hints(), _metadata({}), _collName(collName) {
+    addCollection(collName, collCard);
+}
+
+CEType CETester::getMatchCE(const std::string& queryPredicate,
+                            std::function<bool(const ABT&)> nodePredicate) const {
+    return getCE("[{$match: " + queryPredicate + "}]", nodePredicate);
+}
+
+CEType CETester::getCE(const std::string& pipeline,
+                       std::function<bool(const ABT&)> nodePredicate) const {
+    if constexpr (kCETestLogOnly) {
+        std::cout << "\n\nQuery: " << pipeline << "\n";
+    }
+
+    // Construct ABT from pipeline and optimize.
+    ABT abt = translatePipeline(pipeline, _collName);
+
+    // Get cardinality estimate.
+    return getCE(abt, nodePredicate);
+}
+
+CEType CETester::getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate) const {
+    if constexpr (kCETestLogOnly) {
+        std::cout << ExplainGenerator::explainV2(abt) << std::endl;
+    }
+
+    OptPhaseManager phaseManager{_optPhases,
+                                 _prefixId,
+                                 false /*requireRID*/,
+                                 _metadata,
+                                 getEstimator(),
+                                 makeHeuristicCE(),
+                                 makeCostEstimator(),
+                                 defaultConvertPathToInterval,
+                                 ConstEval::constFold,
+                                 DebugInfo::kDefaultForTests,
+                                 _hints};
+    phaseManager.optimize(abt);
+
+    const auto& memo = phaseManager.getMemo();
+    if constexpr (kCETestLogOnly) {
+        std::cout << ExplainGenerator::explainMemo(memo) << std::endl;
+    }
+
+    auto cht = getEstimator();
+
+    // If we are running no optimization phases, we are ensuring that we get the correct estimate on
+    // the original ABT (usually testing the CE for FilterNodes). The memo won't have any groups for
+    // us to estimate directly yet.
+    if (_optPhases.empty()) {
+        auto card = cht->deriveCE(_metadata, memo, {}, abt.ref());
+
+        if constexpr (kCETestLogOnly) {
+            std::cout << "CE: " << card << std::endl;
+        }
+
+        return card;
+    }
+
+    CEType outCard = kInvalidCardinality;
+    for (size_t groupId = 0; groupId < memo.getGroupCount(); groupId++) {
+        // Note that we always verify CE for MemoLogicalDelegatorNodes when calling getCE().
+
+        // If the 'optPhases' either ends with the MemoSubstitutionPhase or the
+        // MemoImplementationPhase, we should have exactly one logical node per group. However, if
+        // we have indexes, or a $group, we may have multiple logical nodes. In this case, we still
+        // want to pick the first node.
+        const auto& node = memo.getLogicalNodes(groupId).front();
+
+        // This gets the cardinality estimate actually produced during optimization.
+        const auto& logicalProps = memo.getLogicalProps(groupId);
+        auto memoCE = properties::getPropertyConst<properties::CardinalityEstimate>(logicalProps)
+                          .getEstimate();
+
+        // Conversely, here we call deriveCE() on the ABT produced by the optimization phases, which
+        // has all its delegators dereferenced.
+        auto card = cht->deriveCE(_metadata, memo, logicalProps, node.ref());
+
+        if constexpr (!kCETestLogOnly) {
+            // Ensure that the CE stored for the logical nodes of each group is what we would expect
+            // when estimating that node directly. Note that this check will fail if we are testing
+            // histogram estimation and only using the MemoSubstitutionPhase because the memo always
+            // uses heuristic estimation in this case.
+            ASSERT_APPROX_EQUAL(card, memoCE, kMaxCEError);
+        } else {
+            if (std::abs(memoCE - card) > kMaxCEError) {
+                std::cout << "ERROR: CE Group(" << groupId << ") " << card << " vs. " << memoCE
+                          << std::endl;
+                std::cout << ExplainGenerator::explainV2(node) << std::endl;
+            }
+        }
+
+        if (nodePredicate(node)) {
+            // We want to return the cardinality for the memo group matching the 'nodePredicate'.
+            outCard = memoCE;
+        }
+    }
+
+    ASSERT_NOT_EQUALS(outCard, kInvalidCardinality);
+
+    if constexpr (kCETestLogOnly) {
+        std::cout << "CE: " << outCard << std::endl;
+    }
+
+    return outCard;
+}
+
+ScanDefinition& CETester::getCollScanDefinition() {
+    auto it = _metadata._scanDefs.find(_collName);
+    invariant(it != _metadata._scanDefs.end());
+    return it->second;
+}
+
+
+void CETester::setCollCard(double card) {
+    auto& scanDef = getCollScanDefinition();
+    addCollection(_collName, card, scanDef.getIndexDefs());
+}
+
+void CETester::setIndexes(opt::unordered_map<std::string, IndexDefinition> indexes) {
+    auto& scanDef = getCollScanDefinition();
+    addCollection(_collName, scanDef.getCE(), indexes);
+}
+
+void CETester::addCollection(std::string collName,
+                             double numRecords,
+                             opt::unordered_map<std::string, IndexDefinition> indexes) {
+    _metadata._scanDefs.insert_or_assign(collName,
+                                         createScanDef({},
+                                                       indexes,
+                                                       ConstEval::constFold,
+                                                       {DistributionType::Centralized},
+                                                       true /*exists*/,
+                                                       numRecords));
+}
+
+stats::ScalarHistogram createHistogram(const std::vector<BucketData>& data) {
+    value::Array bounds;
+    std::vector<stats::Bucket> buckets;
+
+    double cumulativeFreq = 0.0;
+    double cumulativeNDV = 0.0;
+
+    for (size_t i = 0; i < data.size(); i++) {
+        const auto& item = data.at(i);
+        const auto [tag, val] = stage_builder::makeValue(item._v);
+        bounds.push_back(tag, val);
+
+        cumulativeFreq += item._equalFreq + item._rangeFreq;
+        cumulativeNDV += item._ndv + 1.0;
+        buckets.emplace_back(
+            item._equalFreq, item._rangeFreq, cumulativeFreq, item._ndv, cumulativeNDV);
+    }
+
+    return {std::move(bounds), std::move(buckets)};
+}
+
+double estimateIntValCard(const stats::ScalarHistogram& hist,
+                          const int v,
+                          const EstimationType type) {
+    const auto [tag, val] =
+        std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(v));
+    return estimate(hist, tag, val, type).card;
+};
+
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/test_utils.h b/src/mongo/db/query/ce/test_utils.h
new file mode 100644
index 00000000000..1f84fe9a1a8
--- /dev/null
+++ b/src/mongo/db/query/ce/test_utils.h
@@ -0,0 +1,231 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/optimizer/cascades/interfaces.h"
+#include "mongo/db/query/optimizer/opt_phase_manager.h"
+#include "mongo/db/query/stats/scalar_histogram.h"
+
+namespace mongo::optimizer::ce {
+// Enable this flag to log all estimates, and let all tests pass.
+constexpr bool kCETestLogOnly = false;
+
+const double kMaxCEError = 0.01;
+const CEType kInvalidCardinality = -1.0;
+
+const OptPhaseManager::PhaseSet kDefaultCETestPhaseSet{OptPhase::MemoSubstitutionPhase,
+                                                       OptPhase::MemoExplorationPhase,
+                                                       OptPhase::MemoImplementationPhase};
+
+const OptPhaseManager::PhaseSet kOnlySubPhaseSet{OptPhase::MemoSubstitutionPhase};
+
+const OptPhaseManager::PhaseSet kNoOptPhaseSet{};
+
+/**
+ * Helpful macros for asserting that the CE of a $match predicate is approximately what we were
+ * expecting.
+ */
+
+#define _ASSERT_CE(estimatedCE, expectedCE)                             \
+    if constexpr (kCETestLogOnly) {                                     \
+        if (std::abs(estimatedCE - expectedCE) > kMaxCEError) {         \
+            std::cout << "ERROR: expected " << expectedCE << std::endl; \
+        }                                                               \
+        ASSERT_APPROX_EQUAL(1.0, 1.0, kMaxCEError);                     \
+    } else {                                                            \
+        ASSERT_APPROX_EQUAL(estimatedCE, expectedCE, kMaxCEError);      \
+    }
+#define _PREDICATE(field, predicate) (str::stream() << "{" << field << ": " << predicate "}")
+#define _ELEMMATCH_PREDICATE(field, predicate) \
+    (str::stream() << "{" << field << ": {$elemMatch: " << predicate << "}}")
+
+// This macro verifies the cardinality of a pipeline or an input ABT.
+#define ASSERT_CE(ce, pipeline, expectedCE) _ASSERT_CE(ce.getCE(pipeline), (expectedCE))
+
+// This macro does the same as above but also sets the collection cardinality.
+#define ASSERT_CE_CARD(ce, pipeline, expectedCE, collCard) \
+    ce.setCollCard(collCard);                              \
+    ASSERT_CE(ce, pipeline, expectedCE)
+
+// This macro verifies the cardinality of a pipeline with a single $match predicate.
+#define ASSERT_MATCH_CE(ce, predicate, expectedCE) \
+    _ASSERT_CE(ce.getMatchCE(predicate), (expectedCE))
+
+#define ASSERT_MATCH_CE_NODE(ce, queryPredicate, expectedCE, nodePredicate) \
+    _ASSERT_CE(ce.getMatchCE(queryPredicate, nodePredicate), (expectedCE))
+
+// This macro does the same as above but also sets the collection cardinality.
+#define ASSERT_MATCH_CE_CARD(ce, predicate, expectedCE, collCard) \
+    ce.setCollCard(collCard);                                     \
+    ASSERT_MATCH_CE(ce, predicate, expectedCE)
+
+// This macro tests cardinality of two versions of the predicate; with and without $elemMatch.
+#define ASSERT_EQ_ELEMMATCH_CE(tester, expectedCE, elemMatchExpectedCE, field, predicate) \
+    ASSERT_MATCH_CE(tester, _PREDICATE(field, predicate), expectedCE);                    \
+    ASSERT_MATCH_CE(tester, _ELEMMATCH_PREDICATE(field, predicate), elemMatchExpectedCE)
+
+#define ASSERT_EQ_ELEMMATCH_CE_NODE(tester, expectedCE, elemMatchExpectedCE, field, predicate, n) \
+    ASSERT_MATCH_CE_NODE(tester, _PREDICATE(field, predicate), expectedCE, n);                    \
+    ASSERT_MATCH_CE_NODE(tester, _ELEMMATCH_PREDICATE(field, predicate), elemMatchExpectedCE, n)
+
+// Some commonly used functions for picking nodes in the memo for testing estimation.
+template <size_t NumReq>
+bool isSargableNode(const ABT& n) {
+    if constexpr (NumReq == 0) {
+        return n.is<optimizer::SargableNode>();
+    }
+
+    // Sometimes SargableNodes get split and placed into different memo groups, but we are looking
+    // for a SargableNode with a specific number of predicates. For tests, we only care about
+    // verifying the cardinality of that one.
+    if (auto* sargable = n.cast<optimizer::SargableNode>()) {
+        return sargable->getReqMap().size() == NumReq;
+    }
+    return false;
+}
+const auto isSargable = isSargableNode<0>;
+const auto isSargable1 = isSargableNode<1>;
+const auto isSargable2 = isSargableNode<2>;
+const auto isSargable3 = isSargableNode<3>;
+const auto isSargable4 = isSargableNode<4>;
+const auto isRoot = [](const ABT& n) -> bool { return n.is<optimizer::RootNode>(); };
+
+/**
+ * A test utility class for helping verify the cardinality of CE transports on a given $match
+ * predicate.
+ */
+class CETester {
+public:
+    /**
+     * The tester initializes at least one collection with the name 'collName' and the cardinality
+     * 'numRecords' in the metadata.
+     */
+    CETester(std::string collName,
+             double numRecords,
+             const OptPhaseManager::PhaseSet& optPhases = kDefaultCETestPhaseSet);
+
+    /**
+     * Returns the estimated cardinality of a given 'matchPredicate'.
+     *
+     * 'nodePredicate' identifies the node in the memo we want to estimate.
+     */
+    CEType getMatchCE(const std::string& matchPredicate,
+                      std::function<bool(const ABT&)> nodePredicate = isRoot) const;
+
+    /**
+     * Returns the estimated cardinality of a given 'pipeline'.
+     *
+     * 'nodePredicate' identifies the node in the memo we want to estimate.
+     */
+    CEType getCE(const std::string& pipeline,
+                 std::function<bool(const ABT&)> nodePredicate = isRoot) const;
+
+    /**
+     * Returns the estimated cardinality of a given 'abt'.
+     *
+     * 'nodePredicate' identifies the node in the memo we want to estimate.
+     */
+    CEType getCE(ABT& abt, std::function<bool(const ABT&)> nodePredicate = isRoot) const;
+
+    /**
+     * Updates the cardinality of the collection '_collName'.
+     */
+    void setCollCard(double card);
+
+    /**
+     * Updates the indexes used by the collection '_collName'.
+     */
+    void setIndexes(opt::unordered_map<std::string, IndexDefinition> indexes);
+
+    /**
+     * Adds a ScanDefinition for an additional collection for the test.
+     */
+    void addCollection(std::string collName,
+                       double numRecords,
+                       opt::unordered_map<std::string, IndexDefinition> indexes = {});
+
+    /**
+     * Prevents the optimizer from generating collection scan plans.
+     */
+    void setDisableScan(bool disableScan) {
+        _hints._disableScan = disableScan;
+    }
+
+protected:
+    /**
+     * Subclasses need to override this method to initialize the cardinality estimators they are
+     * testing.
+     */
+    virtual std::unique_ptr<cascades::CardinalityEstimator> getEstimator() const = 0;
+
+private:
+    /**
+     * Helper to find the ScanDefinition of '_collName' in _metadata.
+     */
+    ScanDefinition& getCollScanDefinition();
+
+    // Phases to use when optimizing an input query.
+    const OptPhaseManager::PhaseSet& _optPhases;
+
+    // Used to initialize the OptPhaseManager.
+    mutable PrefixId _prefixId;
+
+    // Allows us to pass hints to the optimizer.
+    QueryHints _hints;
+
+    // Stores the ScanDefinitions for all collections defined in the test.
+    Metadata _metadata;
+
+    // Name of the collection tests will be executed against.
+    std::string _collName;
+};
+
+/**
+ * Test utility for helping with creation of manual histograms in the unit tests.
+ */
+struct BucketData {
+    Value _v;
+    double _equalFreq;
+    double _rangeFreq;
+    double _ndv;
+
+    BucketData(Value v, double equalFreq, double rangeFreq, double ndv)
+        : _v(v), _equalFreq(equalFreq), _rangeFreq(rangeFreq), _ndv(ndv) {}
+    BucketData(const std::string& v, double equalFreq, double rangeFreq, double ndv)
+        : BucketData(Value(v), equalFreq, rangeFreq, ndv) {}
+    BucketData(int v, double equalFreq, double rangeFreq, double ndv)
+        : BucketData(Value(v), equalFreq, rangeFreq, ndv) {}
+};
+
+stats::ScalarHistogram createHistogram(const std::vector<BucketData>& data);
+
+double estimateIntValCard(const stats::ScalarHistogram& hist, int v, EstimationType type);
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce/value_utils.cpp b/src/mongo/db/query/ce/value_utils.cpp
deleted file mode 100644
index 46e3f143b16..00000000000
--- a/src/mongo/db/query/ce/value_utils.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/ce/value_utils.h"
-
-#include "mongo/db/query/ce/max_diff.h"
-#include "mongo/db/query/ce/scalar_histogram.h"
-
-namespace mongo::ce {
-
-using namespace sbe;
-
-SBEValue::SBEValue(value::TypeTags tag, value::Value val) : _tag(tag), _val(val) {}
-
-SBEValue::SBEValue(std::pair<value::TypeTags, value::Value> v) : SBEValue(v.first, v.second) {}
-
-SBEValue::SBEValue(const SBEValue& other) {
-    auto [tag, val] = copyValue(other._tag, other._val);
-    _tag = tag;
-    _val = val;
-}
-
-SBEValue::SBEValue(SBEValue&& other) {
-    _tag = other._tag;
-    _val = other._val;
-
-    other._tag = value::TypeTags::Nothing;
-    other._val = 0;
-}
-
-SBEValue::~SBEValue() {
-    value::releaseValue(_tag, _val);
-}
-
-SBEValue& SBEValue::operator=(const SBEValue& other) {
-    value::releaseValue(_tag, _val);
-
-    auto [tag, val] = copyValue(other._tag, other._val);
-    _tag = tag;
-    _val = val;
-    return *this;
-}
-
-SBEValue& SBEValue::operator=(SBEValue&& other) {
-    value::releaseValue(_tag, _val);
-
-    _tag = other._tag;
-    _val = other._val;
-
-    other._tag = value::TypeTags::Nothing;
-    other._val = 0;
-
-    return *this;
-}
-
-std::pair<value::TypeTags, value::Value> SBEValue::get() const {
-    return std::make_pair(_tag, _val);
-}
-
-value::TypeTags SBEValue::getTag() const {
-    return _tag;
-}
-
-value::Value SBEValue::getValue() const {
-    return _val;
-}
-
-std::pair<value::TypeTags, value::Value> makeInt64Value(int v) {
-    return std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(v));
-};
-
-std::pair<value::TypeTags, value::Value> makeNullValue() {
-    return std::make_pair(value::TypeTags::Null, 0);
-};
-
-bool sameTypeClass(value::TypeTags tag1, value::TypeTags tag2) {
-    if (tag1 == tag2) {
-        return true;
-    }
-
-    static constexpr const char* kTempFieldName = "temp";
-
-    BSONObjBuilder minb1;
-    minb1.appendMinForType(kTempFieldName, value::tagToType(tag1));
-    const BSONObj min1 = minb1.obj();
-
-    BSONObjBuilder minb2;
-    minb2.appendMinForType(kTempFieldName, value::tagToType(tag2));
-    const BSONObj min2 = minb2.obj();
-
-    return min1.woCompare(min2) == 0;
-}
-
-bool sameTypeBracket(value::TypeTags tag1, value::TypeTags tag2) {
-    if (tag1 == tag2) {
-        return true;
-    }
-    return ((value::isNumber(tag1) && value::isNumber(tag2)) ||
-            (value::isString(tag1) && value::isString(tag2)));
-}
-
-int32_t compareValues(value::TypeTags tag1,
-                      value::Value val1,
-                      value::TypeTags tag2,
-                      value::Value val2) {
-    const auto [compareTag, compareVal] = value::compareValue(tag1, val1, tag2, val2);
-    uassert(6660547, "Invalid comparison result", compareTag == value::TypeTags::NumberInt32);
-    return value::bitcastTo<int32_t>(compareVal);
-}
-
-void sortValueVector(std::vector<SBEValue>& sortVector) {
-    const auto cmp = [](const SBEValue& a, const SBEValue& b) {
-        return compareValues(a.getTag(), a.getValue(), b.getTag(), b.getValue()) < 0;
-    };
-    std::sort(sortVector.begin(), sortVector.end(), cmp);
-}
-
-double valueToDouble(value::TypeTags tag, value::Value val) {
-    double result = 0;
-    if (value::isNumber(tag)) {
-        result = value::numericCast<double>(tag, val);
-    } else if (value::isString(tag)) {
-        const StringData sd = value::getStringView(tag, val);
-
-        // Convert a prefix of the string to a double.
-        const size_t maxPrecision = std::min(sd.size(), sizeof(double));
-        for (size_t i = 0; i < maxPrecision; ++i) {
-            const char ch = sd[i];
-            const double charToDbl = ch / std::pow(2, i * 8);
-            result += charToDbl;
-        }
-    } else if (tag == value::TypeTags::Date || tag == value::TypeTags::Timestamp) {
-        int64_t v = value::bitcastTo<int64_t>(val);
-        result = value::numericCast<double>(value::TypeTags::NumberInt64, v);
-
-    } else if (tag == value::TypeTags::ObjectId) {
-        auto objView =
-            ConstDataView(reinterpret_cast<const char*>(sbe::value::getObjectIdView(val)->data()));
-        // Take the first 8 bytes of the ObjectId.
-        // ToDo: consider using the entire ObjectId or other parts of it
-        // 	 auto v = objView.read<LittleEndian<uint64_t>>(sizeof(uint32_t));
-        auto v = objView.read<LittleEndian<uint64_t>>();
-        result = value::numericCast<double>(value::TypeTags::NumberInt64, v);
-    } else {
-        uassert(6844500, "Unexpected value type", false);
-    }
-
-    return result;
-}
-
-bool canEstimateTypeViaHistogram(value::TypeTags tag) {
-    if (sbe::value::isNumber(tag) || value::isString(tag)) {
-        return true;
-    }
-
-    switch (tag) {
-        // Other types that we can/do build histograms on:
-        // - Date/time types.
-        case value::TypeTags::Date:
-        case value::TypeTags::Timestamp:
-        // - ObjectId.
-        case value::TypeTags::ObjectId:
-            return true;
-
-        // Types that can only be estimated via the type-counters.
-        case value::TypeTags::Object:
-        case value::TypeTags::Array:
-        case value::TypeTags::Null:
-        case value::TypeTags::Nothing:
-        case value::TypeTags::Boolean:
-            return false;
-
-        // Trying to estimate any other types should result in an error.
-        default:
-            uasserted(7051100,
-                      str::stream()
-                          << "Type " << tag << " is not supported by histogram estimation.");
-    }
-
-    MONGO_UNREACHABLE;
-}
-
-std::string serialize(value::TypeTags tag) {
-    std::ostringstream os;
-    os << tag;
-    return os.str();
-}
-
-// TODO: does this belong in SBE value utils?
-value::TypeTags deserialize(const std::string& name) {
-    if ("NumberInt32" == name) {
-        return value::TypeTags::NumberInt32;
-    } else if ("NumberInt64" == name) {
-        return value::TypeTags::NumberInt64;
-    } else if ("NumberDecimal" == name) {
-        return value::TypeTags::NumberDecimal;
-    } else if ("NumberDouble" == name) {
-        return value::TypeTags::NumberDouble;
-    } else if ("StringBig" == name) {
-        return value::TypeTags::StringBig;
-    } else if ("StringSmall" == name) {
-        return value::TypeTags::StringSmall;
-    } else if ("bsonString" == name) {
-        return value::TypeTags::bsonString;
-    } else if ("Date" == name) {
-        return value::TypeTags::Date;
-    } else if ("Timestamp" == name) {
-        return value::TypeTags::Timestamp;
-    } else if ("ObjectId" == name) {
-        return value::TypeTags::ObjectId;
-    } else if ("Object" == name) {
-        return value::TypeTags::Object;
-    } else if ("Boolean" == name) {
-        return value::TypeTags::Boolean;
-    } else if ("Array" == name) {
-        return value::TypeTags::Array;
-    } else if ("Null" == name) {
-        return value::TypeTags::Null;
-    } else if ("Nothing" == name) {
-        return value::TypeTags::Nothing;
-    }
-
-    // Trying to deserialize any other types should result in an error.
-    uasserted(6660600,
-              str::stream() << "String " << name << " is not convertable to SBE type tag.");
-}  // namespace mongo::ce
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/value_utils.h b/src/mongo/db/query/ce/value_utils.h
deleted file mode 100644
index 0191b4e2c26..00000000000
--- a/src/mongo/db/query/ce/value_utils.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/exec/sbe/values/value.h"
-
-namespace mongo::ce {
-
-using namespace sbe;
-
-/**
-    Container object for SBE value/tag pairs. Supplied values are owned by this object
-    and are released on destruction
-*/
-class SBEValue {
-public:
-    SBEValue(value::TypeTags tag, value::Value val);
-    SBEValue(std::pair<value::TypeTags, value::Value> v);
-    ~SBEValue();
-
-    SBEValue(const SBEValue& other);
-    SBEValue(SBEValue&& other);
-
-    SBEValue& operator=(const SBEValue& other);
-    SBEValue& operator=(SBEValue&& other);
-
-    std::pair<value::TypeTags, value::Value> get() const;
-    value::TypeTags getTag() const;
-    value::Value getValue() const;
-
-private:
-    value::TypeTags _tag;
-    value::Value _val;
-};
-
-/**
-    Generate an SBE Value pair that represents the supplied int with
-    type Int64
-*/
-std::pair<value::TypeTags, value::Value> makeInt64Value(int v);
-
-/**
-    Generate an SBE Value pair representing a BSON null value
-*/
-std::pair<value::TypeTags, value::Value> makeNullValue();
-
-/**
-    Do the supplied type tags represent the same BSON type?
-*/
-bool sameTypeClass(value::TypeTags tag1, value::TypeTags tag2);
-
-/**
-    Do the supplied type tags represent the same BSON type?
-    TODO: This may be the same as sameTypeClass. @timourk?
-*/
-bool sameTypeBracket(value::TypeTags tag1, value::TypeTags tag2);
-
-/**
-    Compare a pair of SBE values.
-
-    The return will be
-        <0 if val1 < val2 in BSON order
-        0 if val1 == val2 in BSON order
-        >0 if val1 > val2 in BSON order
-*/
-int32_t compareValues(value::TypeTags tag1,
-                      value::Value val1,
-                      value::TypeTags tag2,
-                      value::Value val2);
-
-/**
-    Sort a vector of values in place in BSON order
-*/
-void sortValueVector(std::vector<SBEValue>& sortVector);
-
-/**
-    Convert a value of any supported type into a double according to some metric. This
-    metric will be consistent with ordering in the type.
-*/
-double valueToDouble(value::TypeTags tag, value::Value val);
-
-/**
- * Returns true for types that can be estimated via histograms, and false for types that need type
- * counters. Any other type results in a uassert.
- *
- * NOTE: This should be kept in sync with 'valueToDouble' above.
- */
-bool canEstimateTypeViaHistogram(value::TypeTags tag);
-
-/**
- * Serialize/Deserialize a TypeTag to a string for TypeCount storage in the stats collection.
- */
-std::string serialize(value::TypeTags tag);
-value::TypeTags deserialize(const std::string& name);
-
-}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce_mode_parameter.cpp b/src/mongo/db/query/ce_mode_parameter.cpp
index 2099c65ab1d..f54831bf626 100644
--- a/src/mongo/db/query/ce_mode_parameter.cpp
+++ b/src/mongo/db/query/ce_mode_parameter.cpp
@@ -30,11 +30,11 @@
 #include "mongo/db/query/ce_mode_parameter.h"
 #include "mongo/db/query/query_knobs_gen.h"
 
-namespace mongo::ce {
+namespace mongo::optimizer::ce {
 Status validateCEMode(const std::string& value, const boost::optional<TenantId>&) {
     if (value == kHeuristic || value == kHistogram || value == kSampling) {
         return Status::OK();
     }
     return Status(ErrorCodes::Error{6695700}, "Invalid cardinality estimation mode.");
 }
-}  // namespace mongo::ce
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce_mode_parameter.h b/src/mongo/db/query/ce_mode_parameter.h
index e103b82e927..f253c60e5fd 100644
--- a/src/mongo/db/query/ce_mode_parameter.h
+++ b/src/mongo/db/query/ce_mode_parameter.h
@@ -34,7 +34,7 @@
 #include "mongo/base/status.h"
 #include "mongo/db/tenant_id.h"
 
-namespace mongo::ce {
+namespace mongo::optimizer::ce {
 
 /**
  * Defines cardinality estimation modes.
@@ -45,4 +45,4 @@ const std::string kSampling = "sampling";
 
 Status validateCEMode(const std::string& value, const boost::optional<TenantId>&);
 
-}  // namespace mongo::ce
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/ce_mode_parameter_test.cpp b/src/mongo/db/query/ce_mode_parameter_test.cpp
index 9769127e5b3..011c5f210d3 100644
--- a/src/mongo/db/query/ce_mode_parameter_test.cpp
+++ b/src/mongo/db/query/ce_mode_parameter_test.cpp
@@ -31,7 +31,7 @@
 
 #include "mongo/unittest/unittest.h"
 
-namespace mongo::ce {
+namespace mongo::optimizer::ce {
 
 TEST(CEModeParameterTest, ValidatesValidCEModes) {
     ASSERT_OK(validateCEMode("heuristic", boost::none));
@@ -44,4 +44,4 @@ TEST(CEModeParameterTest, RejectsInvalidCEModes) {
     ASSERT_NOT_OK(validateCEMode("", boost::none));
 }
 
-}  // namespace mongo::ce
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/cost_model/SConscript b/src/mongo/db/query/cost_model/SConscript
index d8ce836096a..44649d53978 100644
--- a/src/mongo/db/query/cost_model/SConscript
+++ b/src/mongo/db/query/cost_model/SConscript
@@ -7,7 +7,7 @@ env = env.Clone()
 env.Library(
     target="query_cost_model",
     source=[
-        'cost_estimator.cpp',
+        'cost_estimator_impl.cpp',
         'cost_model.idl',
         'cost_model_manager.cpp',
         'cost_model_utils.cpp',
diff --git a/src/mongo/db/query/cost_model/cost_estimator.cpp b/src/mongo/db/query/cost_model/cost_estimator.cpp
deleted file mode 100644
index 3dea08ebd1f..00000000000
--- a/src/mongo/db/query/cost_model/cost_estimator.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#include "mongo/db/query/cost_model/cost_estimator.h"
-
-#include "mongo/db/query/optimizer/defs.h"
-
-namespace mongo::cost_model {
-
-using namespace optimizer;
-using namespace optimizer::properties;
-using optimizer::cascades::Memo;
-
-namespace {
-struct CostAndCEInternal {
-    CostAndCEInternal(double cost, CEType ce) : _cost(cost), _ce(ce) {
-        uassert(7034000, "Invalid cost.", !std::isnan(cost) && cost >= 0.0);
-        uassert(7034001, "Invalid cardinality", std::isfinite(ce) && ce >= 0.0);
-    }
-    double _cost;
-    CEType _ce;
-};
-
-class CostDerivation {
-public:
-    CostAndCEInternal operator()(const ABT& /*n*/, const PhysicalScanNode& /*node*/) {
-        // Default estimate for scan.
-        const double collectionScanCost = _coefficients.getScanStartupCost() +
-            _coefficients.getScanIncrementalCost() * _cardinalityEstimate;
-        return {collectionScanCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const CoScanNode& /*node*/) {
-        // Assumed to be free.
-        return {_coefficients.getDefaultStartupCost(), _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const IndexScanNode& node) {
-        const double indexScanCost = _coefficients.getIndexScanStartupCost() +
-            _coefficients.getIndexScanIncrementalCost() * _cardinalityEstimate;
-        return {indexScanCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const SeekNode& /*node*/) {
-        // SeekNode should deliver one result via cardinality estimate override.
-        // TODO: consider using node.getProjectionMap()._fieldProjections.size() to make the cost
-        // dependent on the size of the projection
-        const double seekCost =
-            _coefficients.getSeekStartupCost() + _coefficients.getSeekCost() * _cardinalityEstimate;
-        return {seekCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const MemoLogicalDelegatorNode& node) {
-        const LogicalProps& childLogicalProps = _memo.getLogicalProps(node.getGroupId());
-        // Notice that unlike all physical nodes, this logical node takes it cardinality directly
-        // from the memo group logical property, ignoring _cardinalityEstimate.
-        CEType baseCE = getPropertyConst<CardinalityEstimate>(childLogicalProps).getEstimate();
-
-        if (hasProperty<IndexingRequirement>(_physProps)) {
-            const auto& indexingReq = getPropertyConst<IndexingRequirement>(_physProps);
-            if (indexingReq.getIndexReqTarget() == IndexReqTarget::Seek) {
-                // If we are performing a seek, normalize against the scan group cardinality.
-                const GroupIdType scanGroupId =
-                    getPropertyConst<IndexingAvailability>(childLogicalProps).getScanGroupId();
-                if (scanGroupId == node.getGroupId()) {
-                    baseCE = 1.0;
-                } else {
-                    const CEType scanGroupCE =
-                        getPropertyConst<CardinalityEstimate>(_memo.getLogicalProps(scanGroupId))
-                            .getEstimate();
-                    if (scanGroupCE > 0.0) {
-                        baseCE /= scanGroupCE;
-                    }
-                }
-            }
-        }
-
-        return {0.0, getAdjustedCE(baseCE, _physProps)};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const MemoPhysicalDelegatorNode& /*node*/) {
-        uasserted(7034002, "Should not be costing physical delegator nodes.");
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const FilterNode& node) {
-        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
-        double filterCost = childResult._cost;
-        if (!isTrivialExpr<EvalFilter>(node.getFilter())) {
-            // Non-trivial filter.
-            filterCost += _coefficients.getFilterStartupCost() +
-                _coefficients.getFilterIncrementalCost() * childResult._ce;
-        }
-        return {filterCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const EvaluationNode& node) {
-        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
-        double evalCost = childResult._cost;
-        if (!isTrivialExpr<EvalPath>(node.getProjection())) {
-            // Non-trivial projection.
-            evalCost += _coefficients.getEvalStartupCost() +
-                _coefficients.getEvalIncrementalCost() * _cardinalityEstimate;
-        }
-        return {evalCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const BinaryJoinNode& node) {
-        CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0);
-        CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
-        const double joinCost = _coefficients.getBinaryJoinStartupCost() +
-            _coefficients.getBinaryJoinIncrementalCost() *
-                (leftChildResult._ce + rightChildResult._ce) +
-            leftChildResult._cost + rightChildResult._cost;
-        return {joinCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const HashJoinNode& node) {
-        CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0);
-        CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
-
-        // TODO: distinguish build side and probe side.
-        const double hashJoinCost = _coefficients.getHashJoinStartupCost() +
-            _coefficients.getHashJoinIncrementalCost() *
-                (leftChildResult._ce + rightChildResult._ce) +
-            leftChildResult._cost + rightChildResult._cost;
-        return {hashJoinCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const MergeJoinNode& node) {
-        CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0);
-        CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
-
-        const double mergeJoinCost = _coefficients.getMergeJoinStartupCost() +
-            _coefficients.getMergeJoinIncrementalCost() *
-                (leftChildResult._ce + rightChildResult._ce) +
-            leftChildResult._cost + rightChildResult._cost;
-
-        return {mergeJoinCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const UnionNode& node) {
-        const ABTVector& children = node.nodes();
-        // UnionNode with one child is optimized away before lowering, therefore
-        // its cost is the cost of its child.
-        if (children.size() == 1) {
-            CostAndCEInternal childResult = deriveChild(children[0], 0);
-            return {childResult._cost, _cardinalityEstimate};
-        }
-
-        double totalCost = _coefficients.getUnionStartupCost();
-        // The cost is the sum of the costs of its children and the cost to union each child.
-        for (size_t childIdx = 0; childIdx < children.size(); childIdx++) {
-            CostAndCEInternal childResult = deriveChild(children[childIdx], childIdx);
-            const double childCost = childResult._cost +
-                (childIdx > 0 ? _coefficients.getUnionIncrementalCost() * childResult._ce : 0);
-            totalCost += childCost;
-        }
-        return {totalCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const GroupByNode& node) {
-        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
-        double groupByCost = _coefficients.getGroupByStartupCost();
-
-        // TODO: for now pretend global group by is free.
-        if (node.getType() == GroupNodeType::Global) {
-            groupByCost += childResult._cost;
-        } else {
-            // TODO: consider RepetitionEstimate since this is a stateful operation.
-            groupByCost +=
-                _coefficients.getGroupByIncrementalCost() * childResult._ce + childResult._cost;
-        }
-        return {groupByCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const UnwindNode& node) {
-        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
-        // Unwind probably depends mostly on its output size.
-        const double unwindCost =
-            _coefficients.getUnwindIncrementalCost() * _cardinalityEstimate + childResult._cost;
-        return {unwindCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const UniqueNode& node) {
-        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
-        const double uniqueCost = _coefficients.getUniqueStartupCost() +
-            _coefficients.getUniqueIncrementalCost() * childResult._ce + childResult._cost;
-        return {uniqueCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const CollationNode& node) {
-        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
-        // TODO: consider RepetitionEstimate since this is a stateful operation.
-
-        double logFactor = childResult._ce;
-        double incrConst = _coefficients.getCollationIncrementalCost();
-        if (hasProperty<LimitSkipRequirement>(_physProps)) {
-            if (auto limit = getPropertyConst<LimitSkipRequirement>(_physProps).getAbsoluteLimit();
-                limit < logFactor) {
-                logFactor = limit;
-                incrConst = _coefficients.getCollationWithLimitIncrementalCost();
-            }
-        }
-
-        // Notice that log2(x) < 0 for any x < 1, and log2(1) = 0. Generally it makes sense that
-        // there is no cost to sort 1 document, so the only cost left is the startup cost.
-        const double sortCost = _coefficients.getCollationStartupCost() + childResult._cost +
-            ((logFactor <= 1.0)
-                 ? 0.0
-                 // TODO: The cost formula below is based on 1 field, mix of int and str. Instead we
-                 // have to take into account the number and size of sorted fields.
-                 : incrConst * childResult._ce * std::log2(logFactor));
-        return {sortCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const LimitSkipNode& node) {
-        // Assumed to be free.
-        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
-        const double limitCost = _coefficients.getLimitSkipStartupCost() + childResult._cost +
-            _cardinalityEstimate * _coefficients.getLimitSkipIncrementalCost();
-        return {limitCost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const ExchangeNode& node) {
-        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
-        double localCost = _coefficients.getExchangeStartupCost() +
-            _coefficients.getExchangeIncrementalCost() * _cardinalityEstimate;
-
-        switch (node.getProperty().getDistributionAndProjections()._type) {
-            case DistributionType::Replicated:
-                localCost *= 2.0;
-                break;
-
-            case DistributionType::HashPartitioning:
-            case DistributionType::RangePartitioning:
-                localCost *= 1.1;
-                break;
-
-            default:
-                break;
-        }
-
-        return {localCost + childResult._cost, _cardinalityEstimate};
-    }
-
-    CostAndCEInternal operator()(const ABT& /*n*/, const RootNode& node) {
-        return deriveChild(node.getChild(), 0);
-    }
-
-    /**
-     * Other ABT types.
-     */
-    template <typename T, typename... Ts>
-    CostAndCEInternal operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
-        static_assert(!canBePhysicalNode<T>(), "Physical node must implement its cost derivation.");
-        return {0.0, 0.0};
-    }
-
-    static CostAndCEInternal derive(const Metadata& metadata,
-                                    const Memo& memo,
-                                    const PhysProps& physProps,
-                                    const ABT::reference_type physNodeRef,
-                                    const ChildPropsType& childProps,
-                                    const NodeCEMap& nodeCEMap,
-                                    const CostModelCoefficients& coefficients) {
-        CostAndCEInternal result = deriveInternal(
-            metadata, memo, physProps, physNodeRef, childProps, nodeCEMap, coefficients);
-
-        switch (getPropertyConst<DistributionRequirement>(physProps)
-                    .getDistributionAndProjections()
-                    ._type) {
-            case DistributionType::Centralized:
-            case DistributionType::Replicated:
-                break;
-
-            case DistributionType::RoundRobin:
-            case DistributionType::HashPartitioning:
-            case DistributionType::RangePartitioning:
-            case DistributionType::UnknownPartitioning:
-                result._cost /= metadata._numberOfPartitions;
-                break;
-
-            default:
-                MONGO_UNREACHABLE;
-        }
-
-        return result;
-    }
-
-private:
-    CostDerivation(const Metadata& metadata,
-                   const Memo& memo,
-                   const CEType ce,
-                   const PhysProps& physProps,
-                   const ChildPropsType& childProps,
-                   const NodeCEMap& nodeCEMap,
-                   const CostModelCoefficients& coefficients)
-        : _metadata(metadata),
-          _memo(memo),
-          _physProps(physProps),
-          _cardinalityEstimate(getAdjustedCE(ce, _physProps)),
-          _childProps(childProps),
-          _nodeCEMap(nodeCEMap),
-          _coefficients(coefficients) {}
-
-    template <class T>
-    static bool isTrivialExpr(const ABT& n) {
-        if (n.is<Constant>() || n.is<Variable>()) {
-            return true;
-        }
-        if (const auto* ptr = n.cast<T>(); ptr != nullptr &&
-            ptr->getPath().template is<PathIdentity>() && isTrivialExpr<T>(ptr->getInput())) {
-            return true;
-        }
-        return false;
-    }
-
-    static CostAndCEInternal deriveInternal(const Metadata& metadata,
-                                            const Memo& memo,
-                                            const PhysProps& physProps,
-                                            const ABT::reference_type physNodeRef,
-                                            const ChildPropsType& childProps,
-                                            const NodeCEMap& nodeCEMap,
-                                            const CostModelCoefficients& coefficients) {
-        auto it = nodeCEMap.find(physNodeRef.cast<Node>());
-        bool found = (it != nodeCEMap.cend());
-        uassert(7034003,
-                "Only MemoLogicalDelegatorNode can be missing from nodeCEMap.",
-                found || physNodeRef.is<MemoLogicalDelegatorNode>());
-        const CEType ce = (found ? it->second : 0.0);
-
-        CostDerivation instance(metadata, memo, ce, physProps, childProps, nodeCEMap, coefficients);
-        CostAndCEInternal costCEestimates = physNodeRef.visit(instance);
-        return costCEestimates;
-    }
-
-    CostAndCEInternal deriveChild(const ABT& child, const size_t childIndex) {
-        PhysProps physProps = _childProps.empty() ? _physProps : _childProps.at(childIndex).second;
-        return deriveInternal(
-            _metadata, _memo, physProps, child.ref(), {}, _nodeCEMap, _coefficients);
-    }
-
-    static CEType getAdjustedCE(CEType baseCE, const PhysProps& physProps) {
-        CEType result = baseCE;
-
-        // First: correct for un-enforced limit.
-        if (hasProperty<LimitSkipRequirement>(physProps)) {
-            const auto limit = getPropertyConst<LimitSkipRequirement>(physProps).getAbsoluteLimit();
-            if (result > limit) {
-                result = limit;
-            }
-        }
-
-        // Second: correct for enforced limit.
-        if (hasProperty<LimitEstimate>(physProps)) {
-            const auto limit = getPropertyConst<LimitEstimate>(physProps).getEstimate();
-            if (result > limit) {
-                result = limit;
-            }
-        }
-
-        // Third: correct for repetition.
-        if (hasProperty<RepetitionEstimate>(physProps)) {
-            result *= getPropertyConst<RepetitionEstimate>(physProps).getEstimate();
-        }
-
-        return result;
-    }
-
-    // We don't own this.
-    const Metadata& _metadata;
-    const Memo& _memo;
-    const PhysProps& _physProps;
-    const CEType _cardinalityEstimate;
-    const ChildPropsType& _childProps;
-    const NodeCEMap& _nodeCEMap;
-    const CostModelCoefficients& _coefficients;
-};
-}  // namespace
-
-CostAndCE CostEstimator::deriveCost(const Metadata& metadata,
-                                    const Memo& memo,
-                                    const PhysProps& physProps,
-                                    const ABT::reference_type physNodeRef,
-                                    const ChildPropsType& childProps,
-                                    const NodeCEMap& nodeCEMap) const {
-    const CostAndCEInternal result = CostDerivation::derive(
-        metadata, memo, physProps, physNodeRef, childProps, nodeCEMap, _coefficients);
-    return {CostType::fromDouble(result._cost), result._ce};
-}
-
-}  // namespace mongo::cost_model
diff --git a/src/mongo/db/query/cost_model/cost_estimator.h b/src/mongo/db/query/cost_model/cost_estimator.h
deleted file mode 100644
index 763351324b4..00000000000
--- a/src/mongo/db/query/cost_model/cost_estimator.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- *    Copyright (C) 2022-present MongoDB, Inc.
- *
- *    This program is free software: you can redistribute it and/or modify
- *    it under the terms of the Server Side Public License, version 1,
- *    as published by MongoDB, Inc.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    Server Side Public License for more details.
- *
- *    You should have received a copy of the Server Side Public License
- *    along with this program. If not, see
- *    <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- *    As a special exception, the copyright holders give permission to link the
- *    code of portions of this program with the OpenSSL library under certain
- *    conditions as described in each individual source file and distribute
- *    linked combinations including the program with the OpenSSL library. You
- *    must comply with the Server Side Public License in all respects for
- *    all of the code used other than as permitted herein. If you modify file(s)
- *    with this exception, you may extend this exception to your version of the
- *    file(s), but you are not obligated to do so. If you do not wish to do so,
- *    delete this exception statement from your version. If you delete this
- *    exception statement from all source files in the program, then also delete
- *    it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/query/cost_model/cost_model_gen.h"
-#include "mongo/db/query/optimizer/cascades/interfaces.h"
-#include "mongo/db/query/optimizer/cascades/memo.h"
-
-namespace mongo::cost_model {
-/**
- * Default costing for physical nodes with logical delegator (not-yet-optimized) inputs.
- */
-class CostEstimator : public optimizer::cascades::CostingInterface {
-public:
-    CostEstimator(CostModelCoefficients coefficicients)
-        : _coefficients{std::move(coefficicients)} {}
-
-    optimizer::CostAndCE deriveCost(const optimizer::Metadata& metadata,
-                                    const optimizer::cascades::Memo& memo,
-                                    const optimizer::properties::PhysProps& physProps,
-                                    optimizer::ABT::reference_type physNodeRef,
-                                    const optimizer::ChildPropsType& childProps,
-                                    const optimizer::NodeCEMap& nodeCEMap) const override final;
-
-private:
-    const CostModelCoefficients _coefficients;
-};
-
-}  // namespace mongo::cost_model
diff --git a/src/mongo/db/query/cost_model/cost_estimator_impl.cpp b/src/mongo/db/query/cost_model/cost_estimator_impl.cpp
new file mode 100644
index 00000000000..55d2bf6ab5c
--- /dev/null
+++ b/src/mongo/db/query/cost_model/cost_estimator_impl.cpp
@@ -0,0 +1,418 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/cost_model/cost_estimator_impl.h"
+
+#include "mongo/db/query/optimizer/defs.h"
+
+namespace mongo::cost_model {
+
+using namespace optimizer;
+using namespace optimizer::properties;
+using optimizer::cascades::Memo;
+
+namespace {
+struct CostAndCEInternal {
+    CostAndCEInternal(double cost, CEType ce) : _cost(cost), _ce(ce) {
+        uassert(7034000, "Invalid cost.", !std::isnan(cost) && cost >= 0.0);
+        uassert(7034001, "Invalid cardinality", std::isfinite(ce) && ce >= 0.0);
+    }
+    double _cost;
+    CEType _ce;
+};
+
+class CostDerivation {
+public:
+    CostAndCEInternal operator()(const ABT& /*n*/, const PhysicalScanNode& /*node*/) {
+        // Default estimate for scan.
+        const double collectionScanCost = _coefficients.getScanStartupCost() +
+            _coefficients.getScanIncrementalCost() * _cardinalityEstimate;
+        return {collectionScanCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const CoScanNode& /*node*/) {
+        // Assumed to be free.
+        return {_coefficients.getDefaultStartupCost(), _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const IndexScanNode& node) {
+        const double indexScanCost = _coefficients.getIndexScanStartupCost() +
+            _coefficients.getIndexScanIncrementalCost() * _cardinalityEstimate;
+        return {indexScanCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const SeekNode& /*node*/) {
+        // SeekNode should deliver one result via cardinality estimate override.
+        // TODO: consider using node.getProjectionMap()._fieldProjections.size() to make the cost
+        // dependent on the size of the projection
+        const double seekCost =
+            _coefficients.getSeekStartupCost() + _coefficients.getSeekCost() * _cardinalityEstimate;
+        return {seekCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const MemoLogicalDelegatorNode& node) {
+        const LogicalProps& childLogicalProps = _memo.getLogicalProps(node.getGroupId());
+        // Notice that unlike all physical nodes, this logical node takes it cardinality directly
+        // from the memo group logical property, ignoring _cardinalityEstimate.
+        CEType baseCE = getPropertyConst<CardinalityEstimate>(childLogicalProps).getEstimate();
+
+        if (hasProperty<IndexingRequirement>(_physProps)) {
+            const auto& indexingReq = getPropertyConst<IndexingRequirement>(_physProps);
+            if (indexingReq.getIndexReqTarget() == IndexReqTarget::Seek) {
+                // If we are performing a seek, normalize against the scan group cardinality.
+                const GroupIdType scanGroupId =
+                    getPropertyConst<IndexingAvailability>(childLogicalProps).getScanGroupId();
+                if (scanGroupId == node.getGroupId()) {
+                    baseCE = 1.0;
+                } else {
+                    const CEType scanGroupCE =
+                        getPropertyConst<CardinalityEstimate>(_memo.getLogicalProps(scanGroupId))
+                            .getEstimate();
+                    if (scanGroupCE > 0.0) {
+                        baseCE /= scanGroupCE;
+                    }
+                }
+            }
+        }
+
+        return {0.0, getAdjustedCE(baseCE, _physProps)};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const MemoPhysicalDelegatorNode& /*node*/) {
+        uasserted(7034002, "Should not be costing physical delegator nodes.");
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const FilterNode& node) {
+        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
+        double filterCost = childResult._cost;
+        if (!isTrivialExpr<EvalFilter>(node.getFilter())) {
+            // Non-trivial filter.
+            filterCost += _coefficients.getFilterStartupCost() +
+                _coefficients.getFilterIncrementalCost() * childResult._ce;
+        }
+        return {filterCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const EvaluationNode& node) {
+        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
+        double evalCost = childResult._cost;
+        if (!isTrivialExpr<EvalPath>(node.getProjection())) {
+            // Non-trivial projection.
+            evalCost += _coefficients.getEvalStartupCost() +
+                _coefficients.getEvalIncrementalCost() * _cardinalityEstimate;
+        }
+        return {evalCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const BinaryJoinNode& node) {
+        CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0);
+        CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
+        const double joinCost = _coefficients.getBinaryJoinStartupCost() +
+            _coefficients.getBinaryJoinIncrementalCost() *
+                (leftChildResult._ce + rightChildResult._ce) +
+            leftChildResult._cost + rightChildResult._cost;
+        return {joinCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const HashJoinNode& node) {
+        CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0);
+        CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
+
+        // TODO: distinguish build side and probe side.
+        const double hashJoinCost = _coefficients.getHashJoinStartupCost() +
+            _coefficients.getHashJoinIncrementalCost() *
+                (leftChildResult._ce + rightChildResult._ce) +
+            leftChildResult._cost + rightChildResult._cost;
+        return {hashJoinCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const MergeJoinNode& node) {
+        CostAndCEInternal leftChildResult = deriveChild(node.getLeftChild(), 0);
+        CostAndCEInternal rightChildResult = deriveChild(node.getRightChild(), 1);
+
+        const double mergeJoinCost = _coefficients.getMergeJoinStartupCost() +
+            _coefficients.getMergeJoinIncrementalCost() *
+                (leftChildResult._ce + rightChildResult._ce) +
+            leftChildResult._cost + rightChildResult._cost;
+
+        return {mergeJoinCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const UnionNode& node) {
+        const ABTVector& children = node.nodes();
+        // UnionNode with one child is optimized away before lowering, therefore
+        // its cost is the cost of its child.
+        if (children.size() == 1) {
+            CostAndCEInternal childResult = deriveChild(children[0], 0);
+            return {childResult._cost, _cardinalityEstimate};
+        }
+
+        double totalCost = _coefficients.getUnionStartupCost();
+        // The cost is the sum of the costs of its children and the cost to union each child.
+        for (size_t childIdx = 0; childIdx < children.size(); childIdx++) {
+            CostAndCEInternal childResult = deriveChild(children[childIdx], childIdx);
+            const double childCost = childResult._cost +
+                (childIdx > 0 ? _coefficients.getUnionIncrementalCost() * childResult._ce : 0);
+            totalCost += childCost;
+        }
+        return {totalCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const GroupByNode& node) {
+        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
+        double groupByCost = _coefficients.getGroupByStartupCost();
+
+        // TODO: for now pretend global group by is free.
+        if (node.getType() == GroupNodeType::Global) {
+            groupByCost += childResult._cost;
+        } else {
+            // TODO: consider RepetitionEstimate since this is a stateful operation.
+            groupByCost +=
+                _coefficients.getGroupByIncrementalCost() * childResult._ce + childResult._cost;
+        }
+        return {groupByCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const UnwindNode& node) {
+        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
+        // Unwind probably depends mostly on its output size.
+        const double unwindCost =
+            _coefficients.getUnwindIncrementalCost() * _cardinalityEstimate + childResult._cost;
+        return {unwindCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const UniqueNode& node) {
+        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
+        const double uniqueCost = _coefficients.getUniqueStartupCost() +
+            _coefficients.getUniqueIncrementalCost() * childResult._ce + childResult._cost;
+        return {uniqueCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const CollationNode& node) {
+        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
+        // TODO: consider RepetitionEstimate since this is a stateful operation.
+
+        double logFactor = childResult._ce;
+        double incrConst = _coefficients.getCollationIncrementalCost();
+        if (hasProperty<LimitSkipRequirement>(_physProps)) {
+            if (auto limit = getPropertyConst<LimitSkipRequirement>(_physProps).getAbsoluteLimit();
+                limit < logFactor) {
+                logFactor = limit;
+                incrConst = _coefficients.getCollationWithLimitIncrementalCost();
+            }
+        }
+
+        // Notice that log2(x) < 0 for any x < 1, and log2(1) = 0. Generally it makes sense that
+        // there is no cost to sort 1 document, so the only cost left is the startup cost.
+        const double sortCost = _coefficients.getCollationStartupCost() + childResult._cost +
+            ((logFactor <= 1.0)
+                 ? 0.0
+                 // TODO: The cost formula below is based on 1 field, mix of int and str. Instead we
+                 // have to take into account the number and size of sorted fields.
+                 : incrConst * childResult._ce * std::log2(logFactor));
+        return {sortCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const LimitSkipNode& node) {
+        // Assumed to be free.
+        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
+        const double limitCost = _coefficients.getLimitSkipStartupCost() + childResult._cost +
+            _cardinalityEstimate * _coefficients.getLimitSkipIncrementalCost();
+        return {limitCost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const ExchangeNode& node) {
+        CostAndCEInternal childResult = deriveChild(node.getChild(), 0);
+        double localCost = _coefficients.getExchangeStartupCost() +
+            _coefficients.getExchangeIncrementalCost() * _cardinalityEstimate;
+
+        switch (node.getProperty().getDistributionAndProjections()._type) {
+            case DistributionType::Replicated:
+                localCost *= 2.0;
+                break;
+
+            case DistributionType::HashPartitioning:
+            case DistributionType::RangePartitioning:
+                localCost *= 1.1;
+                break;
+
+            default:
+                break;
+        }
+
+        return {localCost + childResult._cost, _cardinalityEstimate};
+    }
+
+    CostAndCEInternal operator()(const ABT& /*n*/, const RootNode& node) {
+        return deriveChild(node.getChild(), 0);
+    }
+
+    /**
+     * Other ABT types.
+     */
+    template <typename T, typename... Ts>
+    CostAndCEInternal operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
+        static_assert(!canBePhysicalNode<T>(), "Physical node must implement its cost derivation.");
+        return {0.0, 0.0};
+    }
+
+    static CostAndCEInternal derive(const Metadata& metadata,
+                                    const Memo& memo,
+                                    const PhysProps& physProps,
+                                    const ABT::reference_type physNodeRef,
+                                    const ChildPropsType& childProps,
+                                    const NodeCEMap& nodeCEMap,
+                                    const CostModelCoefficients& coefficients) {
+        CostAndCEInternal result = deriveInternal(
+            metadata, memo, physProps, physNodeRef, childProps, nodeCEMap, coefficients);
+
+        switch (getPropertyConst<DistributionRequirement>(physProps)
+                    .getDistributionAndProjections()
+                    ._type) {
+            case DistributionType::Centralized:
+            case DistributionType::Replicated:
+                break;
+
+            case DistributionType::RoundRobin:
+            case DistributionType::HashPartitioning:
+            case DistributionType::RangePartitioning:
+            case DistributionType::UnknownPartitioning:
+                result._cost /= metadata._numberOfPartitions;
+                break;
+
+            default:
+                MONGO_UNREACHABLE;
+        }
+
+        return result;
+    }
+
+private:
+    CostDerivation(const Metadata& metadata,
+                   const Memo& memo,
+                   const CEType ce,
+                   const PhysProps& physProps,
+                   const ChildPropsType& childProps,
+                   const NodeCEMap& nodeCEMap,
+                   const CostModelCoefficients& coefficients)
+        : _metadata(metadata),
+          _memo(memo),
+          _physProps(physProps),
+          _cardinalityEstimate(getAdjustedCE(ce, _physProps)),
+          _childProps(childProps),
+          _nodeCEMap(nodeCEMap),
+          _coefficients(coefficients) {}
+
+    template <class T>
+    static bool isTrivialExpr(const ABT& n) {
+        if (n.is<Constant>() || n.is<Variable>()) {
+            return true;
+        }
+        if (const auto* ptr = n.cast<T>(); ptr != nullptr &&
+            ptr->getPath().template is<PathIdentity>() && isTrivialExpr<T>(ptr->getInput())) {
+            return true;
+        }
+        return false;
+    }
+
+    static CostAndCEInternal deriveInternal(const Metadata& metadata,
+                                            const Memo& memo,
+                                            const PhysProps& physProps,
+                                            const ABT::reference_type physNodeRef,
+                                            const ChildPropsType& childProps,
+                                            const NodeCEMap& nodeCEMap,
+                                            const CostModelCoefficients& coefficients) {
+        auto it = nodeCEMap.find(physNodeRef.cast<Node>());
+        bool found = (it != nodeCEMap.cend());
+        uassert(7034003,
+                "Only MemoLogicalDelegatorNode can be missing from nodeCEMap.",
+                found || physNodeRef.is<MemoLogicalDelegatorNode>());
+        const CEType ce = (found ? it->second : 0.0);
+
+        CostDerivation instance(metadata, memo, ce, physProps, childProps, nodeCEMap, coefficients);
+        CostAndCEInternal costCEestimates = physNodeRef.visit(instance);
+        return costCEestimates;
+    }
+
+    CostAndCEInternal deriveChild(const ABT& child, const size_t childIndex) {
+        PhysProps physProps = _childProps.empty() ? _physProps : _childProps.at(childIndex).second;
+        return deriveInternal(
+            _metadata, _memo, physProps, child.ref(), {}, _nodeCEMap, _coefficients);
+    }
+
+    static CEType getAdjustedCE(CEType baseCE, const PhysProps& physProps) {
+        CEType result = baseCE;
+
+        // First: correct for un-enforced limit.
+        if (hasProperty<LimitSkipRequirement>(physProps)) {
+            const auto limit = getPropertyConst<LimitSkipRequirement>(physProps).getAbsoluteLimit();
+            if (result > limit) {
+                result = limit;
+            }
+        }
+
+        // Second: correct for enforced limit.
+        if (hasProperty<LimitEstimate>(physProps)) {
+            const auto limit = getPropertyConst<LimitEstimate>(physProps).getEstimate();
+            if (result > limit) {
+                result = limit;
+            }
+        }
+
+        // Third: correct for repetition.
+        if (hasProperty<RepetitionEstimate>(physProps)) {
+            result *= getPropertyConst<RepetitionEstimate>(physProps).getEstimate();
+        }
+
+        return result;
+    }
+
+    // We don't own this.
+    const Metadata& _metadata;
+    const Memo& _memo;
+    const PhysProps& _physProps;
+    const CEType _cardinalityEstimate;
+    const ChildPropsType& _childProps;
+    const NodeCEMap& _nodeCEMap;
+    const CostModelCoefficients& _coefficients;
+};
+}  // namespace
+
+CostAndCE CostEstimatorImpl::deriveCost(const Metadata& metadata,
+                                        const Memo& memo,
+                                        const PhysProps& physProps,
+                                        const ABT::reference_type physNodeRef,
+                                        const ChildPropsType& childProps,
+                                        const NodeCEMap& nodeCEMap) const {
+    const CostAndCEInternal result = CostDerivation::derive(
+        metadata, memo, physProps, physNodeRef, childProps, nodeCEMap, _coefficients);
+    return {CostType::fromDouble(result._cost), result._ce};
+}
+
+}  // namespace mongo::cost_model
diff --git a/src/mongo/db/query/cost_model/cost_estimator_impl.h b/src/mongo/db/query/cost_model/cost_estimator_impl.h
new file mode 100644
index 00000000000..0ed094c02a9
--- /dev/null
+++ b/src/mongo/db/query/cost_model/cost_estimator_impl.h
@@ -0,0 +1,56 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/cost_model/cost_model_gen.h"
+#include "mongo/db/query/optimizer/cascades/interfaces.h"
+#include "mongo/db/query/optimizer/cascades/memo.h"
+
+namespace mongo::cost_model {
+/**
+ * Default costing for physical nodes with logical delegator (not-yet-optimized) inputs.
+ */
+class CostEstimatorImpl : public optimizer::cascades::CostEstimator {
+public:
+    CostEstimatorImpl(CostModelCoefficients coefficicients)
+        : _coefficients{std::move(coefficicients)} {}
+
+    optimizer::CostAndCE deriveCost(const optimizer::Metadata& metadata,
+                                    const optimizer::cascades::Memo& memo,
+                                    const optimizer::properties::PhysProps& physProps,
+                                    optimizer::ABT::reference_type physNodeRef,
+                                    const optimizer::ChildPropsType& childProps,
+                                    const optimizer::NodeCEMap& nodeCEMap) const override final;
+
+private:
+    const CostModelCoefficients _coefficients;
+};
+
+}  // namespace mongo::cost_model
diff --git a/src/mongo/db/query/cost_model/cost_estimator_test.cpp b/src/mongo/db/query/cost_model/cost_estimator_test.cpp
index 420da954b06..5b68139a72e 100644
--- a/src/mongo/db/query/cost_model/cost_estimator_test.cpp
+++ b/src/mongo/db/query/cost_model/cost_estimator_test.cpp
@@ -27,7 +27,7 @@
  *    it in the license file.
  */
 
-#include "mongo/db/query/cost_model/cost_estimator.h"
+#include "mongo/db/query/cost_model/cost_estimator_impl.h"
 #include "mongo/db/query/cost_model/cost_model_gen.h"
 #include "mongo/db/query/cost_model/cost_model_utils.h"
 #include "mongo/db/query/optimizer/cascades/memo.h"
@@ -48,7 +48,7 @@ TEST(CostEstimatorTest, PhysicalScanCost) {
     costModel.setScanStartupCost(startupCost);
     costModel.setScanIncrementalCost(scanCost);
 
-    CostEstimator costEstimator{costModel};
+    CostEstimatorImpl costEstimator{costModel};
 
     optimizer::Metadata metadata{{}};
     optimizer::cascades::Memo memo{};
@@ -94,7 +94,7 @@ TEST(CostEstimatorTest, PhysicalScanCostWithAdjustedCE) {
     costModel.setScanStartupCost(startupCost);
     costModel.setScanIncrementalCost(scanCost);
 
-    CostEstimator costEstimator{costModel};
+    CostEstimatorImpl costEstimator{costModel};
 
     optimizer::Metadata metadata{{}};
     optimizer::cascades::Memo memo{};
@@ -130,7 +130,7 @@ TEST(CostEstimatorTest, IndexScanCost) {
     costModel.setIndexScanStartupCost(startupCost);
     costModel.setIndexScanIncrementalCost(indexScanCost);
 
-    CostEstimator costEstimator{costModel};
+    CostEstimatorImpl costEstimator{costModel};
 
     optimizer::Metadata metadata{{}};
     optimizer::cascades::Memo memo{};
@@ -173,7 +173,7 @@ TEST(CostEstimatorTest, FilterAndEvaluationCost) {
     costModel.setEvalIncrementalCost(evalCost);
     costModel.setEvalStartupCost(startupCost);
 
-    CostEstimator costEstimator{costModel};
+    CostEstimatorImpl costEstimator{costModel};
 
     optimizer::Metadata metadata{{}};
     optimizer::cascades::Memo memo{};
@@ -273,7 +273,7 @@ TEST(CostEstimatorTest, MergeJoinCost) {
 
     nodeCEMap[evalNodeRight.cast<optimizer::Node>()] = ce;
 
-    CostEstimator costEstimator{costModel};
+    CostEstimatorImpl costEstimator{costModel};
 
     optimizer::Metadata metadata{{}};
     optimizer::cascades::Memo memo{};
diff --git a/src/mongo/db/query/cqf_get_executor.cpp b/src/mongo/db/query/cqf_get_executor.cpp
index 642a8ad6322..be762fc0b9e 100644
--- a/src/mongo/db/query/cqf_get_executor.cpp
+++ b/src/mongo/db/query/cqf_get_executor.cpp
@@ -35,12 +35,11 @@
 #include "mongo/db/pipeline/abt/document_source_visitor.h"
 #include "mongo/db/pipeline/abt/match_expression_visitor.h"
 #include "mongo/db/pipeline/abt/utils.h"
-#include "mongo/db/query/ce/ce_heuristic.h"
-#include "mongo/db/query/ce/ce_histogram.h"
-#include "mongo/db/query/ce/ce_sampling.h"
-#include "mongo/db/query/ce/collection_statistics_impl.h"
+#include "mongo/db/query/ce/heuristic_estimator.h"
+#include "mongo/db/query/ce/histogram_estimator.h"
+#include "mongo/db/query/ce/sampling_estimator.h"
 #include "mongo/db/query/ce_mode_parameter.h"
-#include "mongo/db/query/cost_model/cost_estimator.h"
+#include "mongo/db/query/cost_model/cost_estimator_impl.h"
 #include "mongo/db/query/cost_model/cost_model_gen.h"
 #include "mongo/db/query/cost_model/cost_model_manager.h"
 #include "mongo/db/query/cost_model/on_coefficients_change_updater_impl.h"
@@ -54,6 +53,7 @@
 #include "mongo/db/query/query_knobs_gen.h"
 #include "mongo/db/query/query_planner_params.h"
 #include "mongo/db/query/sbe_stage_builder.h"
+#include "mongo/db/query/stats/collection_statistics_impl.h"
 #include "mongo/db/query/yield_policy_callbacks_impl.h"
 #include "mongo/logv2/log.h"
 #include "mongo/logv2/log_attr.h"
@@ -65,7 +65,10 @@ MONGO_FAIL_POINT_DEFINE(failConstructingBonsaiExecutor);
 
 namespace mongo {
 using namespace optimizer;
-using cost_model::CostEstimator;
+using ce::HeuristicEstimator;
+using ce::HistogramEstimator;
+using ce::SamplingEstimator;
+using cost_model::CostEstimatorImpl;
 using cost_model::CostModelManager;
 
 static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpecsOptimizer(
@@ -582,9 +585,9 @@ static OptPhaseManager createPhaseManager(const CEMode mode,
                                                     prefixId,
                                                     false /*requireRID*/,
                                                     std::move(metadataForSampling),
-                                                    std::make_unique<ce::HeuristicCE>(),
-                                                    std::make_unique<ce::HeuristicCE>(),
-                                                    std::make_unique<CostEstimator>(costModel),
+                                                    std::make_unique<HeuristicEstimator>(),
+                                                    std::make_unique<HeuristicEstimator>(),
+                                                    std::make_unique<CostEstimatorImpl>(costModel),
                                                     defaultConvertPathToInterval,
                                                     constFold,
                                                     DebugInfo::kDefaultForProd,
@@ -593,12 +596,12 @@ static OptPhaseManager createPhaseManager(const CEMode mode,
                     prefixId,
                     requireRID,
                     std::move(metadata),
-                    std::make_unique<ce::CESamplingTransport>(opCtx,
-                                                              std::move(phaseManagerForSampling),
-                                                              collectionSize,
-                                                              std::make_unique<ce::HeuristicCE>()),
-                    std::make_unique<ce::HeuristicCE>(),
-                    std::make_unique<CostEstimator>(costModel),
+                    std::make_unique<SamplingEstimator>(opCtx,
+                                                        std::move(phaseManagerForSampling),
+                                                        collectionSize,
+                                                        std::make_unique<HeuristicEstimator>()),
+                    std::make_unique<HeuristicEstimator>(),
+                    std::make_unique<CostEstimatorImpl>(costModel),
                     defaultConvertPathToInterval,
                     constFold,
                     DebugInfo::kDefaultForProd,
@@ -610,11 +613,11 @@ static OptPhaseManager createPhaseManager(const CEMode mode,
                     prefixId,
                     requireRID,
                     std::move(metadata),
-                    std::make_unique<ce::CEHistogramTransport>(
-                        std::make_shared<ce::CollectionStatisticsImpl>(collectionSize, nss),
-                        std::make_unique<ce::HeuristicCE>()),
-                    std::make_unique<ce::HeuristicCE>(),
-                    std::make_unique<CostEstimator>(costModel),
+                    std::make_unique<HistogramEstimator>(
+                        std::make_shared<stats::CollectionStatisticsImpl>(collectionSize, nss),
+                        std::make_unique<HeuristicEstimator>()),
+                    std::make_unique<HeuristicEstimator>(),
+                    std::make_unique<CostEstimatorImpl>(costModel),
                     defaultConvertPathToInterval,
                     constFold,
                     DebugInfo::kDefaultForProd,
@@ -625,9 +628,9 @@ static OptPhaseManager createPhaseManager(const CEMode mode,
                     prefixId,
                     requireRID,
                     std::move(metadata),
-                    std::make_unique<ce::HeuristicCE>(),
-                    std::make_unique<ce::HeuristicCE>(),
-                    std::make_unique<CostEstimator>(costModel),
+                    std::make_unique<HeuristicEstimator>(),
+                    std::make_unique<HeuristicEstimator>(),
+                    std::make_unique<CostEstimatorImpl>(costModel),
                     defaultConvertPathToInterval,
                     constFold,
                     DebugInfo::kDefaultForProd,
diff --git a/src/mongo/db/query/optimizer/cascades/interfaces.h b/src/mongo/db/query/optimizer/cascades/interfaces.h
index 2eb2f801b0b..63d20fe9845 100644
--- a/src/mongo/db/query/optimizer/cascades/interfaces.h
+++ b/src/mongo/db/query/optimizer/cascades/interfaces.h
@@ -56,9 +56,9 @@ public:
 /**
  * Interface for deriving CE for a newly added logical node in a new memo group.
  */
-class CEInterface {
+class CardinalityEstimator {
 public:
-    virtual ~CEInterface() = default;
+    virtual ~CardinalityEstimator() = default;
 
     virtual CEType deriveCE(const Metadata& metadata,
                             const Memo& memo,
@@ -69,9 +69,9 @@ public:
 /**
  * Interface for deriving costs and adjusted CE (based on physical props) for a physical node.
  */
-class CostingInterface {
+class CostEstimator {
 public:
-    virtual ~CostingInterface() = default;
+    virtual ~CostEstimator() = default;
     virtual CostAndCE deriveCost(const Metadata& metadata,
                                  const Memo& memo,
                                  const properties::PhysProps& physProps,
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index 7f527d394cf..f1192426a1d 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -88,7 +88,7 @@ LogicalRewriter::LogicalRewriter(const Metadata& metadata,
                                  const PathToIntervalFn& pathToInterval,
                                  const ConstFoldFn& constFold,
                                  const LogicalPropsInterface& logicalPropsDerivation,
-                                 const CEInterface& ceDerivation)
+                                 const CardinalityEstimator& cardinalityEstimator)
     : _activeRewriteSet(std::move(rewriteSet)),
       _groupsPending(),
       _metadata(metadata),
@@ -99,7 +99,7 @@ LogicalRewriter::LogicalRewriter(const Metadata& metadata,
       _pathToInterval(pathToInterval),
       _constFold(constFold),
       _logicalPropsDerivation(logicalPropsDerivation),
-      _ceDerivation(ceDerivation) {
+      _cardinalityEstimator(cardinalityEstimator) {
     initializeRewrites();
 
     if (_activeRewriteSet.count(LogicalRewriteType::SargableSplit) > 0) {
@@ -132,7 +132,7 @@ std::pair<GroupIdType, NodeIdSet> LogicalRewriter::addNode(const ABT& node,
     }
 
     const GroupIdType resultGroupId = _memo.integrate(
-        Memo::Context{&_metadata, &_debugInfo, &_logicalPropsDerivation, &_ceDerivation},
+        Memo::Context{&_metadata, &_debugInfo, &_logicalPropsDerivation, &_cardinalityEstimator},
         node,
         std::move(targetGroupMap),
         insertNodeIds,
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.h b/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
index 0af43abae90..5ed180a222b 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
@@ -61,7 +61,7 @@ public:
                     const PathToIntervalFn& pathToInterval,
                     const ConstFoldFn& constFold,
                     const LogicalPropsInterface& logicalPropsDerivation,
-                    const CEInterface& ceDerivation);
+                    const CardinalityEstimator& cardinalityEstimator);
 
     // This is a transient structure. We do not allow copying or moving.
     LogicalRewriter() = delete;
@@ -130,7 +130,7 @@ private:
     const PathToIntervalFn& _pathToInterval;
     const ConstFoldFn& _constFold;
     const LogicalPropsInterface& _logicalPropsDerivation;
-    const CEInterface& _ceDerivation;
+    const CardinalityEstimator& _cardinalityEstimator;
 
     RewriteFnMap _rewriteMap;
 
diff --git a/src/mongo/db/query/optimizer/cascades/memo.cpp b/src/mongo/db/query/optimizer/cascades/memo.cpp
index 12f43bb2d9d..f847b1df58b 100644
--- a/src/mongo/db/query/optimizer/cascades/memo.cpp
+++ b/src/mongo/db/query/optimizer/cascades/memo.cpp
@@ -535,15 +535,15 @@ private:
 Memo::Context::Context(const Metadata* metadata,
                        const DebugInfo* debugInfo,
                        const LogicalPropsInterface* logicalPropsDerivation,
-                       const CEInterface* ceDerivation)
+                       const CardinalityEstimator* cardinalityEstimator)
     : _metadata(metadata),
       _debugInfo(debugInfo),
       _logicalPropsDerivation(logicalPropsDerivation),
-      _ceDerivation(ceDerivation) {
+      _cardinalityEstimator(cardinalityEstimator) {
     invariant(_metadata != nullptr);
     invariant(_debugInfo != nullptr);
     invariant(_logicalPropsDerivation != nullptr);
-    invariant(_ceDerivation != nullptr);
+    invariant(_cardinalityEstimator != nullptr);
 }
 
 size_t Memo::GroupIdVectorHash::operator()(const Memo::GroupIdVector& v) const {
@@ -617,7 +617,8 @@ void Memo::estimateCE(const Context& ctx, const GroupIdType groupId) {
         ctx._logicalPropsDerivation->deriveProps(*ctx._metadata, nodeRef, nullptr, this, groupId);
     props.merge(logicalProps);
 
-    const CEType estimate = ctx._ceDerivation->deriveCE(*ctx._metadata, *this, props, nodeRef);
+    const CEType estimate =
+        ctx._cardinalityEstimator->deriveCE(*ctx._metadata, *this, props, nodeRef);
     auto ceProp = properties::CardinalityEstimate(estimate);
 
     if (auto sargablePtr = nodeRef.cast<SargableNode>(); sargablePtr != nullptr) {
@@ -630,8 +631,8 @@ void Memo::estimateCE(const Context& ctx, const GroupIdType groupId) {
                                                  ScanParams{},
                                                  sargablePtr->getTarget(),
                                                  sargablePtr->getChild());
-            const CEType singularEst =
-                ctx._ceDerivation->deriveCE(*ctx._metadata, *this, props, singularReq.ref());
+            const CEType singularEst = ctx._cardinalityEstimator->deriveCE(
+                *ctx._metadata, *this, props, singularReq.ref());
             partialSchemaKeyCE.emplace_back(key, singularEst);
         }
     }
diff --git a/src/mongo/db/query/optimizer/cascades/memo.h b/src/mongo/db/query/optimizer/cascades/memo.h
index c2c2b6a2d93..93321bce7e1 100644
--- a/src/mongo/db/query/optimizer/cascades/memo.h
+++ b/src/mongo/db/query/optimizer/cascades/memo.h
@@ -127,13 +127,13 @@ public:
         Context(const Metadata* metadata,
                 const DebugInfo* debugInfo,
                 const LogicalPropsInterface* logicalPropsDerivation,
-                const CEInterface* ceDerivation);
+                const CardinalityEstimator* cardinalityEstimator);
 
         // None of those should be null.
         const Metadata* _metadata;
         const DebugInfo* _debugInfo;
         const LogicalPropsInterface* _logicalPropsDerivation;
-        const CEInterface* _ceDerivation;
+        const CardinalityEstimator* _cardinalityEstimator;
     };
 
     struct Stats {
diff --git a/src/mongo/db/query/optimizer/cascades/physical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/physical_rewriter.cpp
index c0b3423ab0c..bdbe168e146 100644
--- a/src/mongo/db/query/optimizer/cascades/physical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/physical_rewriter.cpp
@@ -110,13 +110,13 @@ PhysicalRewriter::PhysicalRewriter(const Metadata& metadata,
                                    const DebugInfo& debugInfo,
                                    const QueryHints& hints,
                                    const RIDProjectionsMap& ridProjections,
-                                   const CostingInterface& costDerivation,
+                                   const CostEstimator& costEstimator,
                                    const PathToIntervalFn& pathToInterval,
                                    std::unique_ptr<LogicalRewriter>& logicalRewriter)
     : _metadata(metadata),
       _memo(memo),
       _rootGroupId(rootGroupId),
-      _costDerivation(costDerivation),
+      _costEstimator(costEstimator),
       _debugInfo(debugInfo),
       _hints(hints),
       _ridProjections(ridProjections),
@@ -149,7 +149,7 @@ void PhysicalRewriter::costAndRetainBestNode(std::unique_ptr<ABT> node,
                                              const GroupIdType groupId,
                                              PrefixId& prefixId,
                                              PhysOptimizationResult& bestResult) {
-    const CostAndCE nodeCostAndCE = _costDerivation.deriveCost(
+    const CostAndCE nodeCostAndCE = _costEstimator.deriveCost(
         _metadata, _memo, bestResult._physProps, node->ref(), childProps, nodeCEMap);
     const CostType nodeCost = nodeCostAndCE._cost;
     uassert(6624056, "Must get non-infinity cost for physical node.", !nodeCost.isInfinite());
diff --git a/src/mongo/db/query/optimizer/cascades/physical_rewriter.h b/src/mongo/db/query/optimizer/cascades/physical_rewriter.h
index a2fb936442e..50bc831dc46 100644
--- a/src/mongo/db/query/optimizer/cascades/physical_rewriter.h
+++ b/src/mongo/db/query/optimizer/cascades/physical_rewriter.h
@@ -58,7 +58,7 @@ public:
                      const DebugInfo& debugInfo,
                      const QueryHints& hints,
                      const RIDProjectionsMap& ridProjections,
-                     const CostingInterface& costDerivation,
+                     const CostEstimator& costEstimator,
                      const PathToIntervalFn& pathToInterval,
                      std::unique_ptr<LogicalRewriter>& logicalRewriter);
 
@@ -96,7 +96,7 @@ private:
     const Metadata& _metadata;
     Memo& _memo;
     const GroupIdType _rootGroupId;
-    const CostingInterface& _costDerivation;
+    const CostEstimator& _costEstimator;
     const DebugInfo& _debugInfo;
     const QueryHints& _hints;
     const RIDProjectionsMap& _ridProjections;
diff --git a/src/mongo/db/query/optimizer/opt_phase_manager.cpp b/src/mongo/db/query/optimizer/opt_phase_manager.cpp
index 79e6b34201a..2b710a1b2ca 100644
--- a/src/mongo/db/query/optimizer/opt_phase_manager.cpp
+++ b/src/mongo/db/query/optimizer/opt_phase_manager.cpp
@@ -49,9 +49,9 @@ OptPhaseManager::OptPhaseManager(OptPhaseManager::PhaseSet phaseSet,
                                  PrefixId& prefixId,
                                  const bool requireRID,
                                  Metadata metadata,
-                                 std::unique_ptr<CEInterface> explorationCE,
-                                 std::unique_ptr<CEInterface> substitutionCE,
-                                 std::unique_ptr<CostingInterface> costDerivation,
+                                 std::unique_ptr<CardinalityEstimator> explorationCE,
+                                 std::unique_ptr<CardinalityEstimator> substitutionCE,
+                                 std::unique_ptr<CostEstimator> costEstimator,
                                  PathToIntervalFn pathToInterval,
                                  ConstFoldFn constFold,
                                  DebugInfo debugInfo,
@@ -64,14 +64,14 @@ OptPhaseManager::OptPhaseManager(OptPhaseManager::PhaseSet phaseSet,
       _logicalPropsDerivation(std::make_unique<DefaultLogicalPropsDerivation>()),
       _explorationCE(std::move(explorationCE)),
       _substitutionCE(std::move(substitutionCE)),
-      _costDerivation(std::move(costDerivation)),
+      _costEstimator(std::move(costEstimator)),
       _pathToInterval(std::move(pathToInterval)),
       _constFold(std::move(constFold)),
       _physicalNodeId(),
       _requireRID(requireRID),
       _ridProjections(),
       _prefixId(prefixId) {
-    uassert(6624093, "Cost derivation is null", _costDerivation);
+    uassert(6624093, "Cost derivation is null", _costEstimator);
     uassert(7088900, "Exploration CE is null", _explorationCE);
     uassert(7088901, "Substitution CE is null", _substitutionCE);
 
@@ -224,7 +224,7 @@ void OptPhaseManager::runMemoPhysicalRewrite(const OptPhase phase,
                               _debugInfo,
                               _hints,
                               _ridProjections,
-                              *_costDerivation,
+                              *_costEstimator,
                               _pathToInterval,
                               logicalRewriter);
 
diff --git a/src/mongo/db/query/optimizer/opt_phase_manager.h b/src/mongo/db/query/optimizer/opt_phase_manager.h
index 4c6e8c431ae..c1219361f55 100644
--- a/src/mongo/db/query/optimizer/opt_phase_manager.h
+++ b/src/mongo/db/query/optimizer/opt_phase_manager.h
@@ -79,9 +79,9 @@ public:
                     PrefixId& prefixId,
                     bool requireRID,
                     Metadata metadata,
-                    std::unique_ptr<CEInterface> explorationCE,
-                    std::unique_ptr<CEInterface> substitutionCE,
-                    std::unique_ptr<CostingInterface> costDerivation,
+                    std::unique_ptr<CardinalityEstimator> explorationCE,
+                    std::unique_ptr<CardinalityEstimator> substitutionCE,
+                    std::unique_ptr<CostEstimator> costEstimator,
                     PathToIntervalFn pathToInterval,
                     ConstFoldFn constFold,
                     DebugInfo debugInfo,
@@ -172,7 +172,7 @@ private:
     /**
      * Cardinality estimation implementation to be used during the exploraton phase..
      */
-    std::unique_ptr<CEInterface> _explorationCE;
+    std::unique_ptr<CardinalityEstimator> _explorationCE;
 
     /**
      * Cardinality estimation implementation to be used during the substitution phase.
@@ -181,12 +181,12 @@ private:
      * alternatives. Since some CE implementations are expensive (sampling), we let the caller pass
      * a different one for this phase.
      */
-    std::unique_ptr<CEInterface> _substitutionCE;
+    std::unique_ptr<CardinalityEstimator> _substitutionCE;
 
     /**
      * Cost derivation implementation.
      */
-    std::unique_ptr<CostingInterface> _costDerivation;
+    std::unique_ptr<CostEstimator> _costEstimator;
 
     /**
      * Path ABT node to index bounds converter implementation.
diff --git a/src/mongo/db/query/optimizer/utils/ce_math.cpp b/src/mongo/db/query/optimizer/utils/ce_math.cpp
index 936686354ac..4eaa9372c4e 100644
--- a/src/mongo/db/query/optimizer/utils/ce_math.cpp
+++ b/src/mongo/db/query/optimizer/utils/ce_math.cpp
@@ -34,8 +34,7 @@
 #include "mongo/db/query/optimizer/utils/ce_math.h"
 #include "mongo/util/assert_util.h"
 
-namespace mongo::ce {
-
+namespace mongo::optimizer::ce {
 bool validSelectivity(SelectivityType sel) {
     return (sel >= 0.0 && sel <= 1.0);
 }
@@ -82,4 +81,4 @@ SelectivityType disjExponentialBackoff(std::vector<SelectivityType> disjSelectiv
     }
     return 1.0 - sel;
 }
-}  // namespace mongo::ce
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/optimizer/utils/ce_math.h b/src/mongo/db/query/optimizer/utils/ce_math.h
index a29a7d1f34b..44b87b14904 100644
--- a/src/mongo/db/query/optimizer/utils/ce_math.h
+++ b/src/mongo/db/query/optimizer/utils/ce_math.h
@@ -34,9 +34,7 @@
 
 #include "mongo/db/query/optimizer/defs.h"
 
-namespace mongo::ce {
-
-using namespace mongo::optimizer;
+namespace mongo::optimizer::ce {
 
 // Default cardinality when actual collection cardinality is unknown.
 // Mostly used by unit tests.
@@ -68,4 +66,4 @@ SelectivityType conjExponentialBackoff(std::vector<SelectivityType> conjSelectiv
  * exponential backoff.
  */
 SelectivityType disjExponentialBackoff(std::vector<SelectivityType> disjSelectivities);
-}  // namespace mongo::ce
+}  // namespace mongo::optimizer::ce
diff --git a/src/mongo/db/query/optimizer/utils/unit_test_utils.cpp b/src/mongo/db/query/optimizer/utils/unit_test_utils.cpp
index f4b3ba349e5..e261c5ac537 100644
--- a/src/mongo/db/query/optimizer/utils/unit_test_utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/unit_test_utils.cpp
@@ -32,9 +32,9 @@
 #include <fstream>
 
 #include "mongo/db/pipeline/abt/utils.h"
-#include "mongo/db/query/ce/ce_heuristic.h"
-#include "mongo/db/query/ce/ce_hinted.h"
-#include "mongo/db/query/cost_model/cost_estimator.h"
+#include "mongo/db/query/ce/heuristic_estimator.h"
+#include "mongo/db/query/ce/hinted_estimator.h"
+#include "mongo/db/query/cost_model/cost_estimator_impl.h"
 #include "mongo/db/query/cost_model/cost_model_manager.h"
 #include "mongo/db/query/optimizer/explain.h"
 #include "mongo/db/query/optimizer/metadata.h"
@@ -246,16 +246,16 @@ IndexDefinition makeCompositeIndexDefinition(std::vector<TestIndexField> indexFi
     return IndexDefinition{std::move(idxCollSpec), isMultiKey};
 }
 
-std::unique_ptr<CEInterface> makeHeuristicCE() {
-    return std::make_unique<ce::HeuristicCE>();
+std::unique_ptr<CardinalityEstimator> makeHeuristicCE() {
+    return std::make_unique<ce::HeuristicEstimator>();
 }
 
-std::unique_ptr<CEInterface> makeHintedCE(ce::PartialSchemaSelHints hints) {
-    return std::make_unique<ce::HintedCE>(std::move(hints));
+std::unique_ptr<CardinalityEstimator> makeHintedCE(ce::PartialSchemaSelHints hints) {
+    return std::make_unique<ce::HintedEstimator>(std::move(hints));
 }
 
-std::unique_ptr<CostingInterface> makeCosting() {
-    return std::make_unique<cost_model::CostEstimator>(
+std::unique_ptr<CostEstimator> makeCostEstimator() {
+    return std::make_unique<cost_model::CostEstimatorImpl>(
         cost_model::CostModelManager::getDefaultCoefficients());
 }
 
@@ -270,7 +270,7 @@ OptPhaseManager makePhaseManager(OptPhaseManager::PhaseSet phaseSet,
                            std::move(metadata),
                            makeHeuristicCE(),  // primary CE
                            makeHeuristicCE(),  // substitution phase CE, same as primary
-                           makeCosting(),
+                           makeCostEstimator(),
                            defaultConvertPathToInterval,
                            ConstEval::constFold,
                            std::move(debugInfo),
@@ -280,16 +280,16 @@ OptPhaseManager makePhaseManager(OptPhaseManager::PhaseSet phaseSet,
 OptPhaseManager makePhaseManager(OptPhaseManager::PhaseSet phaseSet,
                                  PrefixId& prefixId,
                                  Metadata metadata,
-                                 std::unique_ptr<CEInterface> ceDerivation,
+                                 std::unique_ptr<CardinalityEstimator> ce,
                                  DebugInfo debugInfo,
                                  QueryHints queryHints) {
     return OptPhaseManager{std::move(phaseSet),
                            prefixId,
                            false /*requireRID*/,
                            std::move(metadata),
-                           std::move(ceDerivation),  // primary CE
-                           makeHeuristicCE(),        // substitution phase CE
-                           makeCosting(),
+                           std::move(ce),      // primary CE
+                           makeHeuristicCE(),  // substitution phase CE
+                           makeCostEstimator(),
                            defaultConvertPathToInterval,
                            ConstEval::constFold,
                            std::move(debugInfo),
@@ -308,7 +308,7 @@ OptPhaseManager makePhaseManager(OptPhaseManager::PhaseSet phaseSet,
                            std::move(metadata),
                            makeHeuristicCE(),  // primary CE
                            makeHeuristicCE(),  // substitution phase CE, same as primary
-                           std::make_unique<cost_model::CostEstimator>(coefs),
+                           std::make_unique<cost_model::CostEstimatorImpl>(coefs),
                            defaultConvertPathToInterval,
                            ConstEval::constFold,
                            std::move(debugInfo),
@@ -326,7 +326,7 @@ OptPhaseManager makePhaseManagerRequireRID(OptPhaseManager::PhaseSet phaseSet,
                            std::move(metadata),
                            makeHeuristicCE(),  // primary CE
                            makeHeuristicCE(),  // substitution phase CE, same as primary
-                           makeCosting(),
+                           makeCostEstimator(),
                            defaultConvertPathToInterval,
                            ConstEval::constFold,
                            std::move(debugInfo),
diff --git a/src/mongo/db/query/optimizer/utils/unit_test_utils.h b/src/mongo/db/query/optimizer/utils/unit_test_utils.h
index 953b8a07781..a1e6549e52f 100644
--- a/src/mongo/db/query/optimizer/utils/unit_test_utils.h
+++ b/src/mongo/db/query/optimizer/utils/unit_test_utils.h
@@ -30,7 +30,7 @@
 #pragma once
 
 #include "mongo/db/bson/dotted_path_support.h"
-#include "mongo/db/query/ce/ce_hinted.h"
+#include "mongo/db/query/ce/hinted_estimator.h"
 #include "mongo/db/query/cost_model/cost_model_gen.h"
 #include "mongo/db/query/optimizer/defs.h"
 #include "mongo/db/query/optimizer/explain.h"
@@ -168,17 +168,17 @@ IndexDefinition makeCompositeIndexDefinition(std::vector<TestIndexField> indexFi
 /**
  * A factory function to create a heuristic-based cardinality estimator.
  */
-std::unique_ptr<CEInterface> makeHeuristicCE();
+std::unique_ptr<CardinalityEstimator> makeHeuristicCE();
 
 /**
  * A factory function to create a hint-based cardinality estimator.
  */
-std::unique_ptr<CEInterface> makeHintedCE(ce::PartialSchemaSelHints hints);
+std::unique_ptr<CardinalityEstimator> makeHintedCE(ce::PartialSchemaSelHints hints);
 
 /**
  * A convenience factory function to create costing.
  */
-std::unique_ptr<CostingInterface> makeCosting();
+std::unique_ptr<CostEstimator> makeCostEstimator();
 
 /**
  * A convenience factory function to create OptPhaseManager for unit tests.
@@ -195,7 +195,7 @@ OptPhaseManager makePhaseManager(OptPhaseManager::PhaseSet phaseSet,
 OptPhaseManager makePhaseManager(OptPhaseManager::PhaseSet phaseSet,
                                  PrefixId& prefixId,
                                  Metadata metadata,
-                                 std::unique_ptr<CEInterface> ceDerivation,
+                                 std::unique_ptr<CardinalityEstimator> ce,
                                  DebugInfo debugInfo,
                                  QueryHints queryHints = {});
 
diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl
index 6b10ec2e9fd..af84c505323 100644
--- a/src/mongo/db/query/query_knobs.idl
+++ b/src/mongo/db/query/query_knobs.idl
@@ -742,7 +742,7 @@ server_parameters:
     cpp_vartype: std::string
     default: sampling
     validator:
-      callback: ce::validateCEMode
+        callback: optimizer::ce::validateCEMode
 
   internalCascadesOptimizerDisableScan:
     description: "Disable full collection scans in the Cascades optimizer."
diff --git a/src/mongo/db/query/stats/SConscript b/src/mongo/db/query/stats/SConscript
new file mode 100644
index 00000000000..1ef2b61f00e
--- /dev/null
+++ b/src/mongo/db/query/stats/SConscript
@@ -0,0 +1,123 @@
+# -*- mode: python -*-
+
+Import("env")
+
+env = env.Clone()
+
+env.Library(
+    target="query_stats",
+    source=[
+        'collection_statistics_impl.cpp',
+        'stats_catalog.cpp',
+        'stats_cache.cpp',
+        'stats_cache_loader_impl.cpp',
+    ],
+    LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/db/dbdirectclient',
+        '$BUILD_DIR/mongo/util/caching',
+        '$BUILD_DIR/mongo/util/concurrency/thread_pool',
+        'stats_histograms',
+    ],
+)
+
+env.Library(
+    target="stats_histograms",
+    source=[
+        'array_histogram.cpp',
+        'scalar_histogram.cpp',
+        'stats.idl',
+        'value_utils.cpp',
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/db/exec/sbe/query_sbe_values',
+    ],
+)
+
+env.Library(
+    target="stats_gen",
+    source=[
+        'max_diff.cpp',
+    ],
+    LIBDEPS=[
+        'stats_histograms',
+    ],
+)
+
+env.CppUnitTest(
+    target='stats_cache_loader_test',
+    source=[
+        'stats_cache_loader_test.cpp',
+        'stats_cache_loader_test_fixture.cpp',
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/db/auth/authmocks',
+        '$BUILD_DIR/mongo/db/catalog/collection_crud',
+        '$BUILD_DIR/mongo/db/commands/test_commands_enabled',
+        '$BUILD_DIR/mongo/db/index_builds_coordinator_mongod',
+        '$BUILD_DIR/mongo/db/multitenancy',
+        '$BUILD_DIR/mongo/db/op_observer/op_observer',
+        '$BUILD_DIR/mongo/db/op_observer/op_observer_impl',
+        '$BUILD_DIR/mongo/db/query/datetime/date_time_support',
+        '$BUILD_DIR/mongo/db/query/query_test_service_context',
+        '$BUILD_DIR/mongo/db/query_expressions',
+        '$BUILD_DIR/mongo/db/repl/drop_pending_collection_reaper',
+        '$BUILD_DIR/mongo/db/repl/oplog',
+        '$BUILD_DIR/mongo/db/repl/optime',
+        '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
+        '$BUILD_DIR/mongo/db/repl/replmocks',
+        '$BUILD_DIR/mongo/db/repl/storage_interface_impl',
+        '$BUILD_DIR/mongo/db/server_base',
+        '$BUILD_DIR/mongo/db/service_context',
+        '$BUILD_DIR/mongo/db/service_context_d_test_fixture',
+        '$BUILD_DIR/mongo/db/service_context_test_fixture',
+        '$BUILD_DIR/mongo/db/shard_role',
+        '$BUILD_DIR/mongo/db/storage/wiredtiger/storage_wiredtiger',
+        '$BUILD_DIR/mongo/db/timeseries/timeseries_options',
+        '$BUILD_DIR/mongo/unittest/unittest',
+        '$BUILD_DIR/mongo/util/clock_source_mock',
+        '$BUILD_DIR/mongo/util/fail_point',
+        '$BUILD_DIR/mongo/util/pcre_wrapper',
+    ],
+)
+
+env.CppUnitTest(
+    target="stats_cache_test",
+    source=[
+        "stats_cache_test.cpp",
+        "stats_cache_loader_mock.cpp",
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/mongo/db/service_context',
+        'stats_test_utils',
+    ],
+)
+
+env.CppUnitTest(
+    target="stats_path_test",
+    source=[
+        "stats_path_test.cpp",
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/mongo/db/service_context',
+        'stats_test_utils',
+    ],
+)
+
+env.Library(
+    target="stats_test_utils",
+    source=[
+        'collection_statistics_mock.cpp',
+        'rand_utils.cpp',
+        'rand_utils_new.cpp',
+        'maxdiff_test_utils.cpp',
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/mongo/db/exec/sbe/sbe_abt_test_util',
+        "$BUILD_DIR/mongo/unittest/unittest",
+        'stats_gen',
+        'stats_histograms',
+    ],
+)
diff --git a/src/mongo/db/query/stats/array_histogram.cpp b/src/mongo/db/query/stats/array_histogram.cpp
new file mode 100644
index 00000000000..ccf11bf02d2
--- /dev/null
+++ b/src/mongo/db/query/stats/array_histogram.cpp
@@ -0,0 +1,209 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/value_utils.h"
+
+namespace mongo::stats {
+namespace {
+TypeCounts mapStatsTypeCountToTypeCounts(std::vector<TypeTag> tc) {
+    TypeCounts out;
+    for (const auto& t : tc) {
+        out.emplace(deserialize(t.getTypeName().toString()), t.getCount());
+    }
+    return out;
+}
+}  // namespace
+
+ArrayHistogram::ArrayHistogram() : ArrayHistogram(ScalarHistogram(), {}) {}
+
+ArrayHistogram::ArrayHistogram(Statistics stats)
+    : ArrayHistogram(stats.getScalarHistogram(),
+                     mapStatsTypeCountToTypeCounts(stats.getTypeCount()),
+                     stats.getTrueCount(),
+                     stats.getFalseCount()) {
+    // TODO SERVER-71513: initialize non-scalar histogram fields.
+}
+
+ArrayHistogram::ArrayHistogram(ScalarHistogram scalar,
+                               TypeCounts typeCounts,
+                               ScalarHistogram arrayUnique,
+                               ScalarHistogram arrayMin,
+                               ScalarHistogram arrayMax,
+                               TypeCounts arrayTypeCounts,
+                               double emptyArrayCount,
+                               double trueCount,
+                               double falseCount)
+    : _scalar(std::move(scalar)),
+      _typeCounts(std::move(typeCounts)),
+      _emptyArrayCount(emptyArrayCount),
+      _trueCount(trueCount),
+      _falseCount(falseCount),
+      _arrayUnique(std::move(arrayUnique)),
+      _arrayMin(std::move(arrayMin)),
+      _arrayMax(std::move(arrayMax)),
+      _arrayTypeCounts(std::move(arrayTypeCounts)) {
+    invariant(isArray());
+}
+
+ArrayHistogram::ArrayHistogram(ScalarHistogram scalar,
+                               TypeCounts typeCounts,
+                               double trueCount,
+                               double falseCount)
+    : _scalar(std::move(scalar)),
+      _typeCounts(std::move(typeCounts)),
+      _emptyArrayCount(0.0),
+      _trueCount(trueCount),
+      _falseCount(falseCount),
+      _arrayUnique(boost::none),
+      _arrayMin(boost::none),
+      _arrayMax(boost::none),
+      _arrayTypeCounts(boost::none) {
+    invariant(!isArray());
+}
+
+bool ArrayHistogram::isArray() const {
+    return _arrayUnique && _arrayMin && _arrayMax && _arrayTypeCounts;
+}
+
+std::string typeCountsToString(const TypeCounts& typeCounts) {
+    std::ostringstream os;
+    os << "{";
+    bool first = true;
+    for (auto [tag, count] : typeCounts) {
+        if (!first)
+            os << ", ";
+        os << tag << ": " << count;
+        first = false;
+    }
+    os << "}";
+    return os.str();
+}
+
+std::string ArrayHistogram::toString() const {
+    std::ostringstream os;
+    os << "{\n";
+    os << " scalar: " << _scalar.toString();
+    os << ",\n typeCounts: " << typeCountsToString(_typeCounts);
+    if (isArray()) {
+        os << ",\n arrayUnique: " << _arrayUnique->toString();
+        os << ",\n arrayMin: " << _arrayMin->toString();
+        os << ",\n arrayMax: " << _arrayMax->toString();
+        os << ",\n arrayTypeCounts: " << typeCountsToString(*_arrayTypeCounts);
+    }
+    os << "\n}\n";
+    return os.str();
+}
+
+const ScalarHistogram& ArrayHistogram::getScalar() const {
+    return _scalar;
+}
+
+const ScalarHistogram& ArrayHistogram::getArrayUnique() const {
+    invariant(isArray());
+    return *_arrayUnique;
+}
+
+const ScalarHistogram& ArrayHistogram::getArrayMin() const {
+    invariant(isArray());
+    return *_arrayMin;
+}
+
+const ScalarHistogram& ArrayHistogram::getArrayMax() const {
+    invariant(isArray());
+    return *_arrayMax;
+}
+
+const TypeCounts& ArrayHistogram::getTypeCounts() const {
+    return _typeCounts;
+}
+
+const TypeCounts& ArrayHistogram::getArrayTypeCounts() const {
+    invariant(isArray());
+    return *_arrayTypeCounts;
+}
+
+double ArrayHistogram::getArrayCount() const {
+    if (isArray()) {
+        auto findArray = _typeCounts.find(sbe::value::TypeTags::Array);
+        uassert(6979504,
+                "Histogram with array data must have a total array count.",
+                findArray != _typeCounts.end());
+        double arrayCount = findArray->second;
+        uassert(6979503, "Histogram with array data must have at least one array.", arrayCount > 0);
+        return arrayCount;
+    }
+    return 0;
+}
+
+BSONObj ArrayHistogram::serialize() const {
+    BSONObjBuilder histogramBuilder;
+
+    // Serialize boolean type counters.
+    histogramBuilder.append("trueCount", getTrueCount());
+    histogramBuilder.append("falseCount", getFalseCount());
+
+    // Serialize empty array counts.
+    histogramBuilder.appendNumber("emptyArrayCount", getEmptyArrayCount());
+
+    // Serialize type counts.
+    BSONArrayBuilder typeCountBuilder(histogramBuilder.subarrayStart("typeCount"));
+    const auto& typeCounts = getTypeCounts();
+    for (const auto& [sbeType, count] : typeCounts) {
+        auto typeCount = BSON("typeName" << stats::serialize(sbeType) << "count" << count);
+        typeCountBuilder.append(typeCount);
+    }
+    typeCountBuilder.doneFast();
+
+    // Serialize scalar histogram.
+    histogramBuilder.append("scalarHistogram", getScalar().serialize());
+
+    // TODO SERVER-71513: serialize array histograms.
+
+    histogramBuilder.doneFast();
+    return histogramBuilder.obj();
+}
+
+BSONObj makeStatistics(double documents, const ArrayHistogram& arrayHistogram) {
+    BSONObjBuilder builder;
+    builder.appendNumber("documents", documents);
+    builder.appendElements(arrayHistogram.serialize());
+    builder.doneFast();
+    return builder.obj();
+}
+
+BSONObj makeStatsPath(StringData path, double documents, const ArrayHistogram& arrayHistogram) {
+    BSONObjBuilder builder;
+    builder.append("_id", path);
+    builder.append("statistics", makeStatistics(documents, arrayHistogram));
+    builder.doneFast();
+    return builder.obj();
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/array_histogram.h b/src/mongo/db/query/stats/array_histogram.h
new file mode 100644
index 00000000000..9a80feae423
--- /dev/null
+++ b/src/mongo/db/query/stats/array_histogram.h
@@ -0,0 +1,142 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <map>
+
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/stats/scalar_histogram.h"
+#include "mongo/db/query/stats/stats_gen.h"
+
+namespace mongo::stats {
+using TypeCounts = std::map<sbe::value::TypeTags, double>;
+
+class ArrayHistogram {
+public:
+    // Constructs an empty scalar histogram.
+    ArrayHistogram();
+
+    // Constructor using StatsPath IDL as input.
+    ArrayHistogram(Statistics stats);
+
+    // Constructor for scalar field histograms.
+    ArrayHistogram(ScalarHistogram scalar,
+                   TypeCounts typeCounts,
+                   double trueCount = 0.0,
+                   double falseCount = 0.0);
+
+    // Constructor for array field histograms. We have to initialize all array fields in this case.
+    ArrayHistogram(ScalarHistogram scalar,
+                   TypeCounts typeCounts,
+                   ScalarHistogram arrayUnique,
+                   ScalarHistogram arrayMin,
+                   ScalarHistogram arrayMax,
+                   TypeCounts arrayTypeCounts,
+                   double emptyArrayCount = 0.0,
+                   double trueCount = 0.0,
+                   double falseCount = 0.0);
+
+    // ArrayHistogram is neither copy-constructible nor copy-assignable.
+    ArrayHistogram(const ArrayHistogram&) = delete;
+    ArrayHistogram& operator=(const ArrayHistogram&) = delete;
+
+    // However, it is move-constructible and move-assignable.
+    ArrayHistogram(ArrayHistogram&&) = default;
+    ArrayHistogram& operator=(ArrayHistogram&&) = default;
+    ~ArrayHistogram() = default;
+
+    std::string toString() const;
+
+    // Serialize to BSON for storage in stats collection.
+    BSONObj serialize() const;
+
+    const ScalarHistogram& getScalar() const;
+    const ScalarHistogram& getArrayUnique() const;
+    const ScalarHistogram& getArrayMin() const;
+    const ScalarHistogram& getArrayMax() const;
+    const TypeCounts& getTypeCounts() const;
+    const TypeCounts& getArrayTypeCounts() const;
+
+    // Returns whether or not this histogram includes array data points.
+    bool isArray() const;
+
+    // Get the total number of arrays in the histogram's path including empty arrays.
+    double getArrayCount() const;
+
+    // Get the total number of empty arrays ( [] ) in the histogram's path.
+    double getEmptyArrayCount() const {
+        return _emptyArrayCount;
+    }
+
+    // Get the count of true booleans.
+    double getTrueCount() const {
+        return _trueCount;
+    }
+
+    // Get the count of false booleans.
+    double getFalseCount() const {
+        return _falseCount;
+    }
+
+private:
+    /* Fields for all paths. */
+
+    // Contains values which appeared originally as scalars on the path.
+    ScalarHistogram _scalar;
+    // The number of values of each type.
+    TypeCounts _typeCounts;
+    // The number of empty arrays - they are not accounted for in the histograms.
+    double _emptyArrayCount;
+    // The counts of true & false booleans.
+    double _trueCount;
+    double _falseCount;
+
+    /* Fields for array paths (only initialized if arrays are present). */
+
+    // Contains unique scalar values originating from arrays.
+    boost::optional<ScalarHistogram> _arrayUnique;
+    // Contains minimum values originating from arrays **per class**.
+    boost::optional<ScalarHistogram> _arrayMin;
+    // Contains maximum values originating from arrays **per class**.
+    boost::optional<ScalarHistogram> _arrayMax;
+    // The number of values of each type inside all arrays.
+    boost::optional<TypeCounts> _arrayTypeCounts;
+};
+
+/**
+ * Returns an owned BSON Object representing data matching mongo::Statistics IDL.
+ */
+BSONObj makeStatistics(double documents, const ArrayHistogram& arrayHistogram);
+
+/**
+ * Returns an owned BSON Object representing data matching mongo::StatsPath IDL.
+ */
+BSONObj makeStatsPath(StringData path, double documents, const ArrayHistogram& arrayHistogram);
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/collection_statistics.h b/src/mongo/db/query/stats/collection_statistics.h
new file mode 100644
index 00000000000..22e48663a61
--- /dev/null
+++ b/src/mongo/db/query/stats/collection_statistics.h
@@ -0,0 +1,60 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/array_histogram.h"
+
+namespace mongo::stats {
+
+using Histograms = std::map<std::string, std::shared_ptr<ArrayHistogram>>;
+
+class CollectionStatistics {
+public:
+    /**
+     * Returns the cardinality of the given collection.
+     */
+    virtual double getCardinality() const = 0;
+
+    /**
+     * Returns the histogram for the given field path, or nullptr if none exists.
+     */
+    virtual const ArrayHistogram* getHistogram(const std::string& path) const = 0;
+
+    /**
+     * Adds a histogram along the given path.
+     */
+    virtual void addHistogram(const std::string& path,
+                              std::shared_ptr<ArrayHistogram> histogram) const = 0;
+
+    virtual ~CollectionStatistics() = default;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/collection_statistics_impl.cpp b/src/mongo/db/query/stats/collection_statistics_impl.cpp
new file mode 100644
index 00000000000..b03829b3f1d
--- /dev/null
+++ b/src/mongo/db/query/stats/collection_statistics_impl.cpp
@@ -0,0 +1,72 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/collection_statistics_impl.h"
+
+#include "mongo/db/client.h"
+#include "mongo/db/query/stats/stats_catalog.h"
+
+namespace mongo::stats {
+
+CollectionStatisticsImpl::CollectionStatisticsImpl(double cardinality, const NamespaceString& nss)
+    : _cardinality{cardinality}, _histograms{}, _nss{nss} {};
+
+double CollectionStatisticsImpl::getCardinality() const {
+    return _cardinality;
+}
+
+void CollectionStatisticsImpl::addHistogram(const std::string& path,
+                                            std::shared_ptr<ArrayHistogram> histogram) const {
+    _histograms[path] = histogram;
+}
+
+const ArrayHistogram* CollectionStatisticsImpl::getHistogram(const std::string& path) const {
+    if (auto mapIt = _histograms.find(path); mapIt != _histograms.end()) {
+        return mapIt->second.get();
+    } else {
+        uassert(8423368, "no current client", Client::getCurrent());
+        auto opCtx = Client::getCurrent()->getOperationContext();
+        uassert(8423367, "no operation context", opCtx);
+        StatsCatalog& statsCatalog = StatsCatalog::get(opCtx);
+        const auto swHistogram = statsCatalog.getHistogram(opCtx, _nss, path);
+        if (!swHistogram.isOK()) {
+            if (swHistogram != ErrorCodes::NamespaceNotFound) {
+                uasserted(swHistogram.getStatus().code(),
+                          str::stream() << "Error getting histograms for path " << _nss << " : "
+                                        << path << swHistogram.getStatus().reason());
+            }
+            return nullptr;
+        }
+        const auto histogram = std::move(swHistogram.getValue());
+        addHistogram(path, histogram);
+        return histogram.get();
+    }
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/collection_statistics_impl.h b/src/mongo/db/query/stats/collection_statistics_impl.h
new file mode 100644
index 00000000000..19c9612382f
--- /dev/null
+++ b/src/mongo/db/query/stats/collection_statistics_impl.h
@@ -0,0 +1,67 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+
+namespace mongo::stats {
+
+using Histograms = std::map<std::string, std::shared_ptr<ArrayHistogram>>;
+
+class CollectionStatisticsImpl : public CollectionStatistics {
+public:
+    CollectionStatisticsImpl(double cardinality, const NamespaceString& nss);
+
+    /**
+     * Returns the cardinality of the given collection.
+     */
+    double getCardinality() const override;
+
+    /**
+     * Returns the histogram for the given field path, or nullptr if none exists.
+     */
+    const ArrayHistogram* getHistogram(const std::string& path) const override;
+
+    /**
+     * Adds a histogram along the given path.
+     */
+    void addHistogram(const std::string& path,
+                      std::shared_ptr<ArrayHistogram> histogram) const override;
+
+    ~CollectionStatisticsImpl() = default;
+
+private:
+    double _cardinality;
+    mutable Histograms _histograms;
+    const NamespaceString _nss;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/collection_statistics_mock.cpp b/src/mongo/db/query/stats/collection_statistics_mock.cpp
new file mode 100644
index 00000000000..39b2f65e527
--- /dev/null
+++ b/src/mongo/db/query/stats/collection_statistics_mock.cpp
@@ -0,0 +1,53 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/collection_statistics_mock.h"
+
+namespace mongo::stats {
+
+CollectionStatisticsMock::CollectionStatisticsMock(double cardinality)
+    : _cardinality{cardinality}, _histograms{} {};
+
+double CollectionStatisticsMock::getCardinality() const {
+    return _cardinality;
+}
+
+void CollectionStatisticsMock::addHistogram(const std::string& path,
+                                            std::shared_ptr<ArrayHistogram> histogram) const {
+    _histograms[path] = histogram;
+}
+
+const ArrayHistogram* CollectionStatisticsMock::getHistogram(const std::string& path) const {
+    if (auto mapIt = _histograms.find(path); mapIt != _histograms.end()) {
+        return mapIt->second.get();
+    }
+    return nullptr;
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/collection_statistics_mock.h b/src/mongo/db/query/stats/collection_statistics_mock.h
new file mode 100644
index 00000000000..04fee5ff69c
--- /dev/null
+++ b/src/mongo/db/query/stats/collection_statistics_mock.h
@@ -0,0 +1,64 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+
+namespace mongo::stats {
+
+class CollectionStatisticsMock : public CollectionStatistics {
+public:
+    CollectionStatisticsMock(double cardinality);
+
+    /**
+     * Returns the cardinality of the given collection.
+     */
+    double getCardinality() const override;
+
+    /**
+     * Adds a histogram along the given path.
+     */
+    void addHistogram(const std::string& path,
+                      std::shared_ptr<ArrayHistogram> histogram) const override;
+
+    /**
+     * Returns the histogram for the given field path, or nullptr if none exists.
+     */
+    const ArrayHistogram* getHistogram(const std::string& path) const override;
+
+    ~CollectionStatisticsMock() = default;
+
+private:
+    double _cardinality;
+    mutable Histograms _histograms;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/max_diff.cpp b/src/mongo/db/query/stats/max_diff.cpp
new file mode 100644
index 00000000000..9203b3d8321
--- /dev/null
+++ b/src/mongo/db/query/stats/max_diff.cpp
@@ -0,0 +1,378 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/max_diff.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <ostream>
+#include <queue>
+
+#include "mongo/base/string_data.h"
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/bson/bsontypes.h"
+#include "mongo/db/exec/sbe/values/bson.h"
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/stats/value_utils.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo::stats {
+namespace {
+namespace value = sbe::value;
+
+std::string printDistribution(const DataDistribution& distr, size_t nElems) {
+    std::ostringstream os;
+    for (size_t i = 0; i < std::min(nElems, distr._freq.size()); ++i) {
+        os << "{val: " << distr._bounds[i].get() << ", " << distr._freq[i].toString() << "}\n";
+    }
+    return os.str();
+}
+
+double valueSpread(value::TypeTags tag1,
+                   value::Value val1,
+                   value::TypeTags tag2,
+                   value::Value val2) {
+    double doubleVal1 = valueToDouble(tag1, val1);
+    double doubleVal2 = valueToDouble(tag2, val2);
+    uassert(6660502,
+            "Data distribution values must be monotonically increasing.",
+            doubleVal2 >= doubleVal1);
+    return doubleVal2 - doubleVal1;
+}
+
+// TODO: This doesn't seem right -- it looks like we're sorting on the frequency,
+//       not the difference between buckets
+std::vector<ValFreq> generateTopKBuckets(const DataDistribution& dataDistrib, size_t numBuckets) {
+    struct AreaComparator {
+        bool operator()(const ValFreq& a, const ValFreq& b) const {
+            return a._normArea > b._normArea;
+        }
+    };
+    std::priority_queue<ValFreq, std::vector<ValFreq>, AreaComparator> pq;
+
+    for (const auto& valFreq : dataDistrib._freq) {
+        if (pq.size() < numBuckets) {
+            pq.emplace(valFreq);
+        } else if (AreaComparator()(valFreq, pq.top())) {
+            pq.pop();
+            pq.emplace(valFreq);
+        }
+    }
+
+    std::vector<ValFreq> result;
+    while (!pq.empty()) {
+        result.push_back(pq.top());
+        pq.pop();
+    }
+
+    std::sort(result.begin(), result.end(), [](const ValFreq& a, const ValFreq& b) {
+        return a._idx < b._idx;
+    });
+
+    return result;
+}
+
+/**
+ * Helper for getting the input for constructing an array histogram for an array estimator using the
+ * values in an array. For each value in `arrayElements`, update the min, max, and unique value
+ * vectors. These will be used to generate the corresponding histograms for array values.
+ */
+void updateMinMaxUniqArrayVals(std::vector<SBEValue>& arrayElements,
+                               std::vector<SBEValue>& arrayMinData,
+                               std::vector<SBEValue>& arrayMaxData,
+                               std::vector<SBEValue>& arrayUniqueData) {
+
+    if (arrayElements.size() == 0) {
+        return;
+    }
+
+    sortValueVector(arrayElements);
+
+    // Emit values for arrayMin and arrayMax histograms.
+    {
+        boost::optional<SBEValue> prev;
+        for (const auto& element : arrayElements) {
+            if (!prev) {
+                arrayMinData.push_back(element);
+            } else if (!sameTypeClass(prev->getTag(), element.getTag())) {
+                arrayMaxData.push_back(*prev);
+                arrayMinData.push_back(element);
+            }
+            prev = element;
+        }
+        if (prev) {
+            arrayMaxData.push_back(*prev);
+        }
+    }
+
+    // Emit values for arrayUnique histogram.
+    {
+        boost::optional<SBEValue> prev;
+        for (const auto& element : arrayElements) {
+            if (!prev ||
+                compareValues(
+                    prev->getTag(), prev->getValue(), element.getTag(), element.getValue()) < 0) {
+                arrayUniqueData.push_back(element);
+                prev = element;
+            }
+        }
+    }
+}
+}  // namespace
+
+DataDistribution getDataDistribution(const std::vector<SBEValue>& sortedInput) {
+    if (sortedInput.empty()) {
+        return {};
+    }
+
+    DataDistribution result;
+    value::TypeTags prevTag;
+    value::Value prevValue;
+    bool first = true;
+
+    // Aggregate the values in a sorted dataset into a frequency distribution.
+    size_t idx = 0;
+    for (size_t i = 0; i < sortedInput.size(); i++) {
+        const auto v = sortedInput[i].get();
+        const auto comparison = first ? 1 : compareValues(v.first, v.second, prevTag, prevValue);
+        first = false;
+
+        if (comparison != 0) {
+            uassert(6660550, "Input is not sorted", comparison > 0);
+            prevTag = v.first;
+            prevValue = v.second;
+
+            const auto [tagCopy, valCopy] = copyValue(v.first, v.second);
+            result._bounds.emplace_back(tagCopy, valCopy);
+            result._freq.emplace_back(idx, 1);
+            ++idx;
+        } else {
+            ++result._freq.back()._freq;
+        }
+    }
+
+    // Calculate the area for all values in the data distribution.
+    // The current minimum and maximum areas of the values of a type class.
+    double maxArea = 0.0;
+
+    for (size_t i = 0; i + 1 < result._freq.size(); ++i) {
+        const auto v1 = result._bounds[i];
+        const auto v2 = result._bounds[i + 1];
+        const bool newTypeClass = !sameTypeClass(v1.getTag(), v2.getTag());
+
+        if (newTypeClass) {
+            const auto res = result.typeClassBounds.emplace(i, maxArea);
+            uassert(6660551, "There can't be duplicate type class bounds.", res.second);
+            maxArea = 0.0;
+        } else if (i == 0) {
+            const double spread =
+                valueSpread(v1.getTag(), v1.getValue(), v2.getTag(), v2.getValue());
+            maxArea = result._freq[i]._freq * spread;
+        }
+
+        if (i == 0 || newTypeClass) {
+            // Make sure we insert bucket boundaries between different types, and also make sure
+            // first value is picked for a boundary.
+            result._freq[i]._area = std::numeric_limits<double>::infinity();
+        } else {
+            const double spread =
+                valueSpread(v1.getTag(), v1.getValue(), v2.getTag(), v2.getValue());
+            result._freq[i]._area = result._freq[i]._freq * spread;
+            maxArea = std::max(maxArea, result._freq[i]._area);
+        }
+    }
+
+    // Make sure last value is picked as a histogram bucket boundary.
+    result._freq.back()._area = std::numeric_limits<double>::infinity();
+    const auto res = result.typeClassBounds.emplace(result._freq.size(), maxArea);
+    uassert(6660503, "There can't be duplicate type class bounds.", res.second);
+
+    // Compute normalized areas. If the spread is 0, the area may also be 0. This could happen,
+    // for instance, if there is only a single value of a given type,
+    size_t beginIdx = 0;
+    for (const auto [endIdx, area] : result.typeClassBounds) {
+        for (size_t i = beginIdx; i < endIdx; ++i) {
+            result._freq[i]._normArea = area > 0.0 ? (result._freq[i]._area / area) : 0.0;
+        }
+        beginIdx = endIdx;
+    }
+
+    // std::cout << "Distribution sorted by value:\n"
+    //           << printDistribution(result, result._freq.size()) << "\n"
+    //           << std::flush;
+
+    return result;
+}
+
+ScalarHistogram genMaxDiffHistogram(const DataDistribution& dataDistrib, size_t numBuckets) {
+    if (dataDistrib._freq.empty()) {
+        return {};
+    }
+
+    std::vector<ValFreq> topKBuckets = generateTopKBuckets(dataDistrib, numBuckets);
+    uassert(6660504,
+            "Must have bucket boundary on first value",
+            topKBuckets[0]._idx == dataDistrib._freq[0]._idx);
+    uassert(6660505,
+            "Must have bucket boundary on last value",
+            topKBuckets.back()._idx == dataDistrib._freq.back()._idx);
+
+    std::vector<Bucket> buckets;
+    value::Array bounds;
+
+    // Create histogram buckets out of the top-K bucket values.
+    size_t startBucketIdx = 0;
+    double cumulativeFreq = 0.0;
+    double cumulativeNDV = 0.0;
+    for (size_t i = 0; i < std::min(dataDistrib._freq.size(), numBuckets); i++) {
+        const size_t bucketBoundIdx = topKBuckets[i]._idx;
+        const double freq = dataDistrib._freq.at(bucketBoundIdx)._freq;
+
+        // Compute per-bucket statistics.
+        double rangeFreq = 0.0;
+        double ndv = 0.0;
+        while (startBucketIdx < bucketBoundIdx) {
+            rangeFreq += dataDistrib._freq[startBucketIdx++]._freq;
+            ++ndv;
+        }
+        cumulativeFreq += rangeFreq + freq;
+        cumulativeNDV += ndv + 1.0;
+
+        // Add a histogram bucket.
+        const auto v = dataDistrib._bounds[startBucketIdx];
+        const auto [copyTag, copyVal] = value::copyValue(v.getTag(), v.getValue());
+        bounds.push_back(copyTag, copyVal);
+        buckets.emplace_back(freq, rangeFreq, cumulativeFreq, ndv, cumulativeNDV);
+        startBucketIdx++;
+    }
+
+    return {std::move(bounds), std::move(buckets)};
+}
+
+ArrayHistogram createArrayEstimator(const std::vector<SBEValue>& arrayData, size_t nBuckets) {
+    // Values that will be used as inputs to histogram generation code.
+    std::vector<SBEValue> scalarData;
+    std::vector<SBEValue> arrayMinData;
+    std::vector<SBEValue> arrayMaxData;
+    std::vector<SBEValue> arrayUniqueData;
+
+    // Type counters.
+    TypeCounts typeCounts;
+    TypeCounts arrayTypeCounts;
+
+    // Value counters.
+    double emptyArrayCount = 0;
+    double trueCount = 0;
+    double falseCount = 0;
+
+    for (const auto& v : arrayData) {
+        const auto val = v.getValue();
+        const auto tag = v.getTag();
+
+        // Increment type counters.
+        auto tagCount = typeCounts.insert({tag, 1});
+        if (!tagCount.second) {
+            ++tagCount.first->second;
+        }
+
+        if (tag == value::TypeTags::Array) {
+            // If we have an array, we can construct min, max, and unique histograms from its
+            // elements, provided that they are histogrammable.
+            std::vector<SBEValue> arrayElements;
+
+            value::Array* arr = value::getArrayView(val);
+            size_t arrSize = arr->size();
+            if (arrSize == 0) {
+                ++emptyArrayCount;
+                continue;
+            }
+
+            for (size_t i = 0; i < arrSize; i++) {
+                const auto [tag, val] = arr->getAt(i);
+
+                // Increment array type tag counts.
+                auto arrTagCount = arrayTypeCounts.insert({tag, 1});
+                if (!arrTagCount.second) {
+                    ++arrTagCount.first->second;
+                }
+
+                if (!canEstimateTypeViaHistogram(tag)) {
+                    // If the elements of this array are not histogrammable, then we can only update
+                    // the array type counters
+                    continue;
+                }
+
+                const auto [tagCopy, valCopy] = value::copyValue(tag, val);
+                arrayElements.emplace_back(tagCopy, valCopy);
+            }
+            updateMinMaxUniqArrayVals(arrayElements, arrayMinData, arrayMaxData, arrayUniqueData);
+
+        } else if (tag == value::TypeTags::Boolean) {
+            // If we have a boolean, we also have counters for true and false values we should
+            // increment here.
+            if (value::bitcastTo<bool>(val)) {
+                trueCount++;
+            } else {
+                falseCount++;
+            }
+            continue;
+
+        } else if (!canEstimateTypeViaHistogram(tag)) {
+            // If we have a non-histogrammable type, we can only increment the type counters for it;
+            // we cannot build a scalar histogram on it.
+            continue;
+
+        } else {
+            // Assume non-arrays are scalars. Emit values for the scalar histogram.
+            scalarData.push_back(v);
+        }
+    }
+
+    // Lambda helper to construct histogram from an unsorted value vector.
+    const auto makeHistogram = [&nBuckets](std::vector<SBEValue>& values) {
+        sortValueVector(values);
+        return genMaxDiffHistogram(getDataDistribution(values), nBuckets);
+    };
+
+    return {makeHistogram(scalarData),
+            std::move(typeCounts),
+            makeHistogram(arrayUniqueData),
+            makeHistogram(arrayMinData),
+            makeHistogram(arrayMaxData),
+            std::move(arrayTypeCounts),
+            emptyArrayCount,
+            trueCount,
+            falseCount};
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/max_diff.h b/src/mongo/db/query/stats/max_diff.h
new file mode 100644
index 00000000000..147cb35af8d
--- /dev/null
+++ b/src/mongo/db/query/stats/max_diff.h
@@ -0,0 +1,82 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/scalar_histogram.h"
+#include "mongo/db/query/stats/value_utils.h"
+
+namespace mongo::stats {
+
+struct ValFreq {
+    ValFreq(size_t idx, size_t freq) : _idx(idx), _freq(freq), _area(-1.0), _normArea(-1) {}
+
+    std::string toString() const {
+        std::ostringstream os;
+        os << "idx: " << _idx << ", freq: " << _freq << ", area: " << _area
+           << ", normArea: " << _normArea;
+        return os.str();
+    }
+
+    size_t _idx;       // Original index according to value order.
+    size_t _freq;      // Frequency of the value.
+    double _area;      // Derived as: spread * frequency
+    double _normArea;  // Area normalized to the maximum in a type class.
+};
+
+struct DataDistribution {
+    std::vector<SBEValue> _bounds;
+    std::vector<ValFreq> _freq;
+    // The min/max areas of each type class. The key is the index of the last boundary of the class.
+    std::map<size_t, double> typeClassBounds;
+};
+
+/**
+    Given a set of values sorted in BSON order, generate a data distribution consisting of
+    counts for each value with the values in sorted order
+*/
+DataDistribution getDataDistribution(const std::vector<SBEValue>& sortedInput);
+
+/**
+    Given a data distribution, generate a scalar histogram with the supplied number of buckets
+*/
+ScalarHistogram genMaxDiffHistogram(const DataDistribution& dataDistrib, size_t numBuckets);
+
+/**
+    Given a vector containing SBEValues, generate a set of statistics to summarize the supplied
+    data. Histograms will use the supplied number of buckets.
+*/
+ArrayHistogram createArrayEstimator(const std::vector<SBEValue>& arrayData, size_t nBuckets);
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/maxdiff_test_utils.cpp b/src/mongo/db/query/stats/maxdiff_test_utils.cpp
new file mode 100644
index 00000000000..cb0e66dc285
--- /dev/null
+++ b/src/mongo/db/query/stats/maxdiff_test_utils.cpp
@@ -0,0 +1,120 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/maxdiff_test_utils.h"
+
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/max_diff.h"
+
+namespace mongo::stats {
+
+static std::vector<std::string> convertToJSON(const std::vector<SBEValue>& input) {
+    std::vector<std::string> result;
+
+    for (size_t i = 0; i < input.size(); i++) {
+        const auto [objTag, objVal] = sbe::value::makeNewObject();
+        sbe::value::ValueGuard vg(objTag, objVal);
+
+        const auto [tag, val] = input[i].get();
+        // Copy the value because objVal owns its value, and the ValueGuard releases not only
+        // objVal, but also its Value (in the case below - copyVal).
+        const auto [copyTag, copyVal] = sbe::value::copyValue(tag, val);
+        sbe::value::getObjectView(objVal)->push_back("a", copyTag, copyVal);
+
+        std::ostringstream os;
+        os << std::make_pair(objTag, objVal);
+        result.push_back(os.str());
+    }
+
+    return result;
+}
+
+size_t getActualCard(OperationContext* opCtx,
+                     const std::vector<SBEValue>& input,
+                     const std::string& query) {
+    return mongo::optimizer::runPipeline(opCtx, query, convertToJSON(input)).size();
+}
+
+std::string makeMatchExpr(const SBEValue& val, optimizer::ce::EstimationType cmpOp) {
+    std::stringstream matchExpr;
+    std::string cmpOpName = optimizer::ce::estimationTypeName.at(cmpOp);
+    matchExpr << "[{$match: {a: {$" << cmpOpName << ": " << val.get() << "}}}]";
+    return matchExpr.str();
+}
+
+ScalarHistogram makeHistogram(std::vector<SBEValue>& randData, size_t nBuckets) {
+    sortValueVector(randData);
+    const DataDistribution& dataDistrib = getDataDistribution(randData);
+    return genMaxDiffHistogram(dataDistrib, nBuckets);
+}
+
+std::string printValueArray(const std::vector<SBEValue>& values) {
+    std::stringstream strStream;
+    for (size_t i = 0; i < values.size(); ++i) {
+        strStream << " " << values[i].get();
+    }
+    return strStream.str();
+}
+
+std::string plotArrayEstimator(const ArrayHistogram& estimator, const std::string& header) {
+    std::ostringstream os;
+    os << header << "\n";
+    if (!estimator.getScalar().empty()) {
+        os << "Scalar histogram:\n" << estimator.getScalar().plot();
+    }
+    if (!estimator.getArrayUnique().empty()) {
+        os << "Array unique histogram:\n" << estimator.getArrayUnique().plot();
+    }
+    if (!estimator.getArrayMin().empty()) {
+        os << "Array min histogram:\n" << estimator.getArrayMin().plot();
+    }
+    if (!estimator.getArrayMax().empty()) {
+        os << "Array max histogram:\n" << estimator.getArrayMax().plot();
+    }
+    if (!estimator.getTypeCounts().empty()) {
+        os << "Per scalar data type value counts: ";
+        for (auto tagCount : estimator.getTypeCounts()) {
+            os << tagCount.first << "=" << tagCount.second << " ";
+        }
+    }
+    if (!estimator.getArrayTypeCounts().empty()) {
+        os << "\nPer array data type value counts: ";
+        for (auto tagCount : estimator.getArrayTypeCounts()) {
+            os << tagCount.first << "=" << tagCount.second << " ";
+        }
+    }
+    if (estimator.isArray()) {
+        os << "\nEmpty array count: " << estimator.getEmptyArrayCount();
+    }
+    os << "\n";
+
+    return os.str();
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/maxdiff_test_utils.h b/src/mongo/db/query/stats/maxdiff_test_utils.h
new file mode 100644
index 00000000000..a34f7dd41ee
--- /dev/null
+++ b/src/mongo/db/query/stats/maxdiff_test_utils.h
@@ -0,0 +1,74 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "mongo/db/exec/sbe/abt/sbe_abt_test_util.h"
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/ce/histogram_predicate_estimation.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/scalar_histogram.h"
+#include "mongo/db/query/stats/value_utils.h"
+
+namespace mongo::stats {
+
+/**
+    Given a list of SBE values and a query, create a collection containing the data,
+    and count the results from the supplied query.
+ */
+size_t getActualCard(OperationContext* opCtx,
+                     const std::vector<SBEValue>& input,
+                     const std::string& query);
+
+/**
+    Given a value and a comparison operator, generate a match expression reflecting
+    x cmpOp val.
+*/
+std::string makeMatchExpr(const SBEValue& val, optimizer::ce::EstimationType cmpOp);
+
+/**
+    Given a vector of values, create a histogram reflection the distribution of the vector
+    with the supplied number of buckets.
+*/
+ScalarHistogram makeHistogram(std::vector<SBEValue>& randData, size_t nBuckets);
+
+/**
+    Serialize a vector of values.
+*/
+std::string printValueArray(const std::vector<SBEValue>& values);
+
+/**
+    Plot a set of statistics as stored in ArrayHistogram.
+*/
+std::string plotArrayEstimator(const ArrayHistogram& estimator, const std::string& header);
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/rand_utils.cpp b/src/mongo/db/query/stats/rand_utils.cpp
new file mode 100644
index 00000000000..ff66272a681
--- /dev/null
+++ b/src/mongo/db/query/stats/rand_utils.cpp
@@ -0,0 +1,392 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/rand_utils.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo::stats {
+namespace value = sbe::value;
+
+const std::string DatasetDescriptor::_alphabet =
+    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+DatasetDescriptor::DatasetDescriptor(const DataTypeDistribution& dataTypeDistribution,
+                                     size_t intNDV,
+                                     int minInt,
+                                     int maxInt,
+                                     size_t strNDV,
+                                     size_t minStrLen,
+                                     size_t maxStrLen,
+                                     std::shared_ptr<DatasetDescriptor> nestedDataDescriptor,
+                                     double reuseScalarsRatio,
+                                     size_t arrNDV,
+                                     size_t minArrLen,
+                                     size_t maxArrLen)
+    : _gen{42},
+      _reuseScalarsRatio(reuseScalarsRatio),
+      _intNDV(std::min(intNDV, static_cast<size_t>(std::abs(maxInt - minInt)))),
+      _uniformIntDist{minInt, maxInt},
+      _arrNDV(arrNDV),
+      _uniformArrSizeDist{minArrLen, maxArrLen},
+      _nestedDataDescriptor(nestedDataDescriptor) {
+    uassert(6660520, "Maximum integer number must be >= the minimum one.", (maxInt >= minInt));
+    uassert(6660521, "Maximum string size must be >= the minimum one.", (maxStrLen >= minStrLen));
+    uassert(6660522,
+            "Array specs must be 0 if there is no array data descriptor.",
+            _nestedDataDescriptor || (arrNDV == 0 && minArrLen == 0 && maxArrLen == 0));
+    uassert(6660523,
+            "Nested arrays requires sensible array lengths",
+            !_nestedDataDescriptor || maxArrLen >= minArrLen);
+    uassert(6660524, "Recursive descriptors are not allowed.", nestedDataDescriptor.get() != this);
+    uassert(6660525,
+            "reuseScalarsRatio is a probability, must be in [0, 1].",
+            reuseScalarsRatio >= 0 && reuseScalarsRatio <= 1.0);
+
+    // Compute absolute ranges given relative weights of each value type.
+    double sumWeights = 0;
+    for (const auto& weightedType : dataTypeDistribution) {
+        sumWeights += weightedType.second;
+    }
+    double sumRelativeWeights = 0;
+    auto lastKey = dataTypeDistribution.crbegin()->first;
+    for (auto it = dataTypeDistribution.cbegin(); it != dataTypeDistribution.cend(); ++it) {
+        const auto weightedType = *it;
+        if (weightedType.first != lastKey) {
+            sumRelativeWeights += weightedType.second / sumWeights;
+            uassert(6660526, "The sum of weights can't be >= 1", sumRelativeWeights < 1);
+        } else {
+            // Due to rounding errors the last relative weight may not be exactly 1.0. Set it
+            // to 1.0.
+            sumRelativeWeights = 1.0;
+        }
+        _dataTypeDistribution.emplace(sumRelativeWeights, weightedType.first);
+    }
+
+    // Generate a set of random integers.
+    mongo::stdx::unordered_set<int> tmpIntSet;
+    tmpIntSet.reserve(_intNDV);
+    if (_intNDV == intNDV) {
+        for (int i = minInt; i <= maxInt; ++i) {
+            tmpIntSet.insert(i);  // This is a dense set of all ints the range.
+        }
+    } else {
+        size_t randCount = 0;
+        while (tmpIntSet.size() < _intNDV && randCount < 10 * _intNDV) {
+            int randInt = _uniformIntDist(_gen);
+            ++randCount;
+            tmpIntSet.insert(randInt);
+        }
+    }
+    uassert(
+        6660527, "Too few integers generated.", (double)tmpIntSet.size() / (double)_intNDV > 0.99);
+    _intSet.reserve(tmpIntSet.size());
+    _intSet.insert(_intSet.end(), tmpIntSet.begin(), tmpIntSet.end());
+    _uniformIntIdxDist.param(
+        std::uniform_int_distribution<size_t>::param_type(0, _intSet.size() - 1));
+
+    // Generate a set of random strings with random sizes so that each string can be chosen
+    // multiple times in the test data set.
+    _stringSet.reserve(strNDV);
+    std::uniform_int_distribution<size_t> uniformStrSizeDistr{minStrLen, maxStrLen};
+    for (size_t i = 0; i < strNDV; ++i) {
+        size_t len = uniformStrSizeDistr(_gen);
+        const auto randStr = genRandomString(len);
+        _stringSet.push_back(randStr);
+    }
+    _uniformStrIdxDist.param(
+        std::uniform_int_distribution<size_t>::param_type(0, _stringSet.size() - 1));
+
+    // Generate a set of random arrays that are chosen from when generating array data.
+    fillRandomArraySet();
+}
+
+std::vector<SBEValue> DatasetDescriptor::genRandomDataset(size_t nElems,
+                                                          DatasetDescriptor* parentDesc) {
+    std::vector<SBEValue> randValues;
+    randValues.reserve(nElems);
+    DatasetDescriptor* curDesc = this;
+
+    if (parentDesc) {
+        double reuseProb = _uniformRandProbability(_gen);
+        if (reuseProb < parentDesc->_reuseScalarsRatio) {
+            curDesc = parentDesc;
+        }
+    }
+
+    for (size_t i = 0; i < nElems; ++i) {
+        // Get the data type of the current value to be generated.
+        value::TypeTags genTag = this->getRandDataType();
+        // Generate a random value of the corresponding type.
+        switch (genTag) {
+            case value::TypeTags::NumberInt64: {
+                size_t idx = curDesc->_uniformIntIdxDist(_gen);
+                auto randInt = curDesc->_intSet.at(idx);
+                const auto [tag, val] = makeInt64Value(randInt);
+                randValues.emplace_back(tag, val);
+                break;
+            }
+            case value::TypeTags::StringBig:
+            case value::TypeTags::StringSmall: {
+                size_t idx = curDesc->_uniformStrIdxDist(_gen);
+                const auto randStr = curDesc->_stringSet.at(idx);
+                const auto [tag, val] = value::makeNewString(randStr);
+                const auto [copyTag, copyVal] = value::copyValue(tag, val);
+                randValues.emplace_back(copyTag, copyVal);
+                break;
+            }
+            case value::TypeTags::Array: {
+                if (_nestedDataDescriptor) {
+                    const auto randArray = genRandomArray();
+                    auto [arrayTag, arrayVal] = value::makeNewArray();
+                    value::Array* arr = value::getArrayView(arrayVal);
+                    for (const auto& elem : randArray) {
+                        const auto [copyTag, copyVal] =
+                            value::copyValue(elem.getTag(), elem.getValue());
+                        arr->push_back(copyTag, copyVal);
+                    }
+                    randValues.emplace_back(arrayTag, arrayVal);
+                }
+                break;
+            }
+            default:
+                uasserted(6660528, "Unsupported data type");
+        }
+    }
+
+    return randValues;
+}
+
+std::string DatasetDescriptor::genRandomString(size_t len) {
+    std::string randStr;
+    randStr.reserve(len);
+    for (size_t i = 0; i < len; ++i) {
+        size_t idx = _uniformCharIdxDist(_gen);
+        const char ch = _alphabet[idx];
+        randStr += ch;
+    }
+
+    return randStr;
+}
+
+std::vector<SBEValue> DatasetDescriptor::genRandomArray() {
+    uassert(6660529,
+            "There must be a nested data descriptor for random array generation.",
+            _nestedDataDescriptor);
+    if (_arrNDV == 0) {
+        size_t randArraySize = _uniformArrSizeDist(_gen);
+        return _nestedDataDescriptor->genRandomDataset(randArraySize, this);
+    } else {
+        size_t idx = _uniformArrIdxDist(_gen);
+        return _arraySet.at(idx);
+    }
+}
+
+void DatasetDescriptor::fillRandomArraySet() {
+    for (size_t i = 0; i < _arrNDV; ++i) {
+        size_t randArraySize = _uniformArrSizeDist(_gen);
+        const auto randArray = _nestedDataDescriptor->genRandomDataset(randArraySize, this);
+        _arraySet.push_back(randArray);
+    }
+
+    if (_arrNDV > 0) {
+        _uniformArrIdxDist.param(
+            std::uniform_int_distribution<size_t>::param_type(0, _arraySet.size() - 1));
+    }
+}
+
+/**
+    Generate a random string. It is possible (even expected) that the same parameters
+    will generate different strings on successive calls
+*/
+std::string genRandomString(size_t len, std::mt19937_64& gen, size_t seed) {
+    std::string randStr;
+    randStr.reserve(len);
+    const constexpr char* kAlphabet =
+        "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+    std::uniform_int_distribution<size_t> uniformDist{0, std::strlen(kAlphabet) - 1};
+
+    for (size_t i = 0; i < len; ++i) {
+        size_t idx = uniformDist(gen);
+        const char ch = kAlphabet[idx];
+        randStr += ch;
+    }
+
+    return randStr;
+}
+
+/**
+    Generate a string. This string will be deterministic in that the same
+    parameters will always generate the same string, even on different platforms.
+*/
+std::string genString(size_t len, size_t seed) {
+    std::string str;
+    str.reserve(len);
+
+    const constexpr char* kAlphabet =
+        "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+    const int kAlphabetLength = strlen(kAlphabet);
+
+    unsigned long long rand = seed;
+    for (size_t i = 0; i < len; ++i) {
+        // Library implementations of rand vary by compiler, naturally, Since we still
+        // want the appearance of randomness, but consistency across compilers, we use a linear
+        // congruential generator to choose characters for the string. The parameters chosen
+        // are from Numerical Recipes. We use the upper 32 bits when calculating the character
+        // index, as the lower 32 are essentially nonrandom -- a weakness of LCGs in general.
+        rand = 3935559000370003845ULL * rand + 269134368944950781ULL;
+
+        int idx = (rand >> 32) % kAlphabetLength;
+        str += kAlphabet[idx];
+    }
+
+    return str;
+}
+
+/**
+    Generate an array of values with the required ratio of int to string. This array will be
+    deterministic in that the same parameters will always generate the same array, even on
+    different platforms.
+*/
+std::vector<SBEValue> genFixedValueArray(size_t nElems, double intRatio, double strRatio) {
+
+    std::vector<SBEValue> values;
+
+    const int intNDV = static_cast<int>(nElems) / 4;
+    for (size_t i = 0; i < std::round(nElems * intRatio); ++i) {
+        const auto [tag, val] = makeInt64Value((i % intNDV) + 1);
+        values.emplace_back(tag, val);
+    }
+
+    if (strRatio == 0.0) {
+        return values;
+    }
+
+    // Generate a set of strings so that each string can be chosen multiple times in the test
+    // data set.
+    const size_t strNDV = nElems / 5;
+    std::vector<std::string> stringSet;
+    stringSet.reserve(strNDV);
+    for (size_t i = 0; i < strNDV; ++i) {
+        const auto randStr = genString(8, i);
+        stringSet.push_back(randStr);
+    }
+
+    for (size_t i = 0; i < std::round(nElems * strRatio); ++i) {
+        size_t idx = i % stringSet.size();
+        const auto randStr = stringSet[idx];
+        const auto [tag, val] = value::makeNewString(randStr);
+        values.emplace_back(tag, val);
+    }
+
+    return values;
+}
+
+std::vector<SBEValue> genRandomValueArray(size_t nElems,
+                                          double intRatio,
+                                          double strRatio,
+                                          size_t seed) {
+    std::vector<SBEValue> randValues;
+    const int intNDV = static_cast<int>(nElems) / 4;
+    const size_t strNDV = nElems / 5;
+    std::vector<std::string> stringSet;
+    stringSet.reserve(strNDV);
+
+    std::mt19937_64 gen{seed};
+    std::uniform_int_distribution<int> uniformDist{1, intNDV};
+
+    for (size_t i = 0; i < std::round(nElems * intRatio); ++i) {
+        const auto [tag, val] = makeInt64Value(uniformDist(gen));
+        randValues.emplace_back(tag, val);
+    }
+
+    // Generate a set of strings so that each string can be chosen multiple times in the test
+    // data set.
+    for (size_t i = 0; i < strNDV; ++i) {
+        const auto randStr = genRandomString(8, gen, seed);
+        stringSet.push_back(randStr);
+    }
+
+    std::uniform_int_distribution<size_t> idxDistr{0, stringSet.size() - 1};
+    for (size_t i = 0; i < std::round(nElems * strRatio); ++i) {
+        size_t idx = idxDistr(gen);
+        const auto randStr = stringSet[idx];
+        const auto [tag, val] = value::makeNewString(randStr);
+        randValues.emplace_back(tag, val);
+    }
+
+    return randValues;
+}
+
+std::vector<SBEValue> nestArrays(const std::vector<SBEValue>& input, size_t emptyArrayCount) {
+    std::vector<SBEValue> result;
+    auto [arrayTag, arrayVal] = value::makeNewArray();
+
+    for (size_t i = 0; i < input.size(); i++) {
+        const auto v = input[i].get();
+        const auto [tagCopy, valCopy] = value::copyValue(v.first, v.second);
+
+        if (i % 10 < 5) {
+            // 50% of values remain scalar.
+            result.emplace_back(tagCopy, valCopy);
+        } else {
+            // 50% of the values are grouped into arrays of size 10.
+            value::Array* arr = value::getArrayView(arrayVal);
+            arr->push_back(tagCopy, valCopy);
+            if (arr->size() == 10) {
+                result.emplace_back(arrayTag, arrayVal);
+                std::tie(arrayTag, arrayVal) = value::makeNewArray();
+            }
+        }
+    }
+
+    for (size_t i = 0; i < emptyArrayCount; ++i) {
+        auto [emptyArrayTag, emptyArrayVal] = value::makeNewArray();
+        result.emplace_back(emptyArrayTag, emptyArrayVal);
+    }
+
+    // It's possible that the array still contains something. If it's empty,
+    // we can safely release it. If not, append it to the result.
+    value::Array* arr = value::getArrayView(arrayVal);
+    if (arr->size() > 0) {
+        result.emplace_back(arrayTag, arrayVal);
+    } else {
+        value::releaseValue(arrayTag, arrayVal);
+    }
+
+    return result;
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/rand_utils.h b/src/mongo/db/query/stats/rand_utils.h
new file mode 100644
index 00000000000..89e4741fd2a
--- /dev/null
+++ b/src/mongo/db/query/stats/rand_utils.h
@@ -0,0 +1,188 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <random>
+#include <vector>
+
+#include "mongo/db/query/stats/value_utils.h"
+
+namespace mongo::stats {
+// A simple histogram describing the distribution of values of each data type.
+using DataTypeDistribution = std::map<sbe::value::TypeTags, double>;
+
+/**
+    Describes the distribution of a dataset according to type and weight. Other ctor parameters
+    are used to describe the various data types which can be emitted and correspond to the fields
+    named similarly
+ */
+class DatasetDescriptor {
+public:
+    DatasetDescriptor(const DataTypeDistribution& dataTypeDistribution,
+                      size_t intNDV,
+                      int minInt,
+                      int maxInt,
+                      size_t strNDV,
+                      size_t minStrLen,
+                      size_t maxStrLen,
+                      std::shared_ptr<DatasetDescriptor> nestedDataDescriptor = nullptr,
+                      double reuseScalarsRatio = 0,
+                      size_t arrNDV = 0,
+                      size_t minArrLen = 0,
+                      size_t maxArrLen = 0);
+
+    // Generate a random dataset of 'nElems' according to the data distribution characteristics in
+    // this object.
+    std::vector<SBEValue> genRandomDataset(size_t nElems, DatasetDescriptor* parentDesc = nullptr);
+
+private:
+    // Select a random value data type.
+    sbe::value::TypeTags getRandDataType() {
+        double key = _uniformRandProbability(_gen);
+        return (*_dataTypeDistribution.upper_bound(key)).second;
+    }
+
+    // Generate a random string with size 'len'.
+    std::string genRandomString(size_t len);
+
+    // Generate a random array with length determined uniformly between minArrLen and maxArrLen
+    std::vector<SBEValue> genRandomArray();
+
+    // Generate a set of random arrays that are chosen from when generating array data.
+    void fillRandomArraySet();
+
+private:
+    using InternalDataTypeDistribution = std::map<double, sbe::value::TypeTags>;
+    /*
+     * General distribution charecteristics.
+     */
+
+    // Pseudo-random generator.
+    std::mt19937_64 _gen;
+    // Random probabilities. Used to:
+    // - Select Value data types as random indexes in '_dataTypeDistribution'.
+    // - Select the source of values - either existing scalars or new.
+    std::uniform_real_distribution<double> _uniformRandProbability{0.0, 1.0};
+    // Distribution of different SBE data types. There will be %percent values of each type.
+    InternalDataTypeDistribution _dataTypeDistribution;
+    double _reuseScalarsRatio;
+
+    /*
+     * Integer data parameters.
+     */
+
+    // Number of distinct integer values.
+    const size_t _intNDV;
+    // A set of integers to choose from while generating random integers.
+    std::vector<int> _intSet;
+    // Generator of random integers with uniform distribution.
+    std::uniform_int_distribution<int> _uniformIntDist;
+    // Generator of random indexes into the set of integers '_intSet'.
+    std::uniform_int_distribution<size_t> _uniformIntIdxDist;
+
+    /*
+     * String data parameters.
+     */
+
+    // All strings draw characters from this alphabet.
+    static const std::string _alphabet;
+    // A set of random strings to choose from. In theory there can be duplicates, but this is very
+    // unlikely. We don't care much if there are a few duplicates anyway.
+    std::vector<std::string> _stringSet;
+    // Generator of random indexes into the set of characters '_alphabet'.
+    std::uniform_int_distribution<size_t> _uniformCharIdxDist{0, _alphabet.size() - 1};
+    // Generator of random indexes into the set of strings '_stringSet'.
+    std::uniform_int_distribution<size_t> _uniformStrIdxDist;
+
+    /*
+     * Array data parameters.
+     */
+
+    // Number of distinct arrays.
+    // TODO: currently not used. The idea is to use it in the same way as arrays - pre-generate
+    // '_arrNDV' arrays, then select randomly from this initial set.
+    size_t _arrNDV;
+    // Set of arrays to pick from when generating random data.
+    std::vector<std::vector<SBEValue>> _arraySet;
+    // Generator of random array sizes.
+    std::uniform_int_distribution<size_t> _uniformArrSizeDist;
+    // Descriptor of the dataset within each array.
+    std::shared_ptr<DatasetDescriptor> _nestedDataDescriptor;
+    // Generator of random indexes into the set of arrays '_arraySet'.
+    std::uniform_int_distribution<size_t> _uniformArrIdxDist;
+};
+
+/**
+    Generate a pseudorandom string of length n
+    * The alphabet is fixed as [0-9][a-z][A-Z]
+    * Characters are chosed uniformly from the alphabet
+    * Randomness is implemented such that it is independent of the platform,
+        i.e. given the same length and seed on any platform, we will produce the
+        same string.
+*/
+std::string genString(size_t len, size_t seed);
+
+/**
+    Generate a set of elements consisting of strings and ints in the
+    requested ratio. The generated array will contain the same values given the same
+    inputs on all platforms.
+ */
+std::vector<SBEValue> genFixedValueArray(size_t nElems, double intRatio, double strRatio);
+
+/**
+    Generate a random string of length len.
+    * The alphabet is fixed as [0-9][a-z][A-Z].
+    * Characters are chosed uniformly from the alphabet.
+    * Generated strings are likely to differ by platform, so derived values depending on them
+      are also likely to change.
+ */
+std::string genRandomString(size_t len, std::mt19937_64& gen, size_t seed);
+
+
+/**
+    Generate a uniformly random set of elements consisting of string and ints in the
+    requested ratio. The resulting array is very likely to differ between platforms, even
+    with the same seed. Thus, derived values are also likely to change.
+
+    Prefer genFixedValueArray when comparing derived values against constants.
+ */
+std::vector<SBEValue> genRandomValueArray(size_t nElems,
+                                          double intRatio,
+                                          double strRatio,
+                                          size_t seed);
+
+/**
+    Generate a set up values consisting of half scalars, and half arrays of length 10.
+
+    Values contained in the result will be drawn from the input vector.
+ */
+std::vector<SBEValue> nestArrays(const std::vector<SBEValue>& input, size_t emptyArrayCount);
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/rand_utils_new.cpp b/src/mongo/db/query/stats/rand_utils_new.cpp
new file mode 100644
index 00000000000..a8e8fab3bb8
--- /dev/null
+++ b/src/mongo/db/query/stats/rand_utils_new.cpp
@@ -0,0 +1,250 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/rand_utils_new.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo::stats {
+namespace value = sbe::value;
+
+const std::string StrDistribution::_alphabet =
+    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+void DataTypeDistrNew::generate(std::vector<SBEValue>& randValues, std::mt19937_64& gen) {
+    if (_nullsRatio > 0 && _nullSelector(gen) < _nullsRatio) {
+        auto [tag, val] = makeNullValue();
+        randValues.emplace_back(tag, val);
+    } else {
+        size_t idx = (*_idxDist)(gen);
+        const auto val = _valSet.at(idx);
+        auto [copyTag, copyVal] = copyValue(val.getTag(), val.getValue());
+        randValues.emplace_back(copyTag, copyVal);
+    }
+}
+
+void DataTypeDistrNew::generate(value::Array* randValueArray, std::mt19937_64& gen) {
+    if (_nullsRatio > 0 && _nullSelector(gen) < _nullsRatio) {
+        auto [tag, val] = makeNullValue();
+        randValueArray->push_back(tag, val);
+    } else {
+        size_t idx = (*_idxDist)(gen);
+        const auto val = _valSet.at(idx);
+        auto [copyTag, copyVal] = copyValue(val.getTag(), val.getValue());
+        randValueArray->push_back(copyTag, copyVal);
+    }
+}
+
+IntDistribution::IntDistribution(MixedDistributionDescriptor distrDescriptor,
+                                 double weight,
+                                 size_t ndv,
+                                 int minInt,
+                                 int maxInt,
+                                 double nullsRatio)
+    : DataTypeDistrNew(distrDescriptor,
+                       value::TypeTags::NumberInt64,
+                       weight,
+                       std::min(ndv, static_cast<size_t>(std::abs(maxInt - minInt))),
+                       nullsRatio),
+      _minInt(minInt),
+      _maxInt(maxInt) {
+    uassert(6660507, "Maximum integer number must be >= the minimum one.", (maxInt >= minInt));
+}
+
+void IntDistribution::init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) {
+    std::set<int> tmpIntSet;
+    std::uniform_int_distribution<int> uniformIntDist{_minInt, _maxInt};
+
+    if (_ndv == static_cast<size_t>(std::abs(_maxInt - _minInt))) {
+        // This is a dense set of all ints in the range.
+        for (int i = _minInt; i <= _maxInt; ++i) {
+            tmpIntSet.insert(i);
+        }
+    } else {
+        size_t randCount = 0;
+        while (tmpIntSet.size() < _ndv && randCount < 10 * _ndv) {
+            int randInt = uniformIntDist(gen);
+            ++randCount;
+            tmpIntSet.insert(randInt);
+        }
+    }
+    uassert(6660508, "Too few integers generated.", (double)tmpIntSet.size() / (double)_ndv > 0.99);
+    _valSet.reserve(tmpIntSet.size());
+    for (const auto randInt : tmpIntSet) {
+        const auto [tag, val] = makeInt64Value(randInt);
+        _valSet.emplace_back(tag, val);
+    }
+
+    _idxDist = MixedDistribution::make(_mixedDistrDescriptor, 0, _valSet.size() - 1);
+}
+
+StrDistribution::StrDistribution(MixedDistributionDescriptor distrDescriptor,
+                                 double weight,
+                                 size_t ndv,
+                                 size_t minStrLen,
+                                 size_t maxStrLen,
+                                 double nullsRatio)
+    : DataTypeDistrNew(distrDescriptor, value::TypeTags::StringBig, weight, ndv, nullsRatio),
+      _minStrLen(minStrLen),
+      _maxStrLen(maxStrLen) {
+    uassert(6660509, "Maximum string size must be >= the minimum one.", (maxStrLen >= minStrLen));
+}
+
+void StrDistribution::init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) {
+    // Generate a set of random strings with random sizes between _minStrLen and _maxStrLen.
+    _valSet.reserve(_ndv);
+    std::uniform_int_distribution<size_t> uniformStrSizeDistr{_minStrLen, _maxStrLen};
+    for (size_t i = 0; i < _ndv; ++i) {
+        size_t len = uniformStrSizeDistr(gen);
+        const auto randStr = genRandomString(len, gen);
+        const auto [tag, val] = value::makeNewString(randStr);
+        _valSet.emplace_back(tag, val);
+    }
+
+    _idxDist = MixedDistribution::make(_mixedDistrDescriptor, 0, _valSet.size() - 1);
+}
+
+std::string StrDistribution::genRandomString(size_t len, std::mt19937_64& gen) {
+    std::string randStr;
+    randStr.reserve(len);
+    for (size_t i = 0; i < len; ++i) {
+        size_t idx = _uniformCharIdxDist(gen);
+        const char ch = _alphabet[idx];
+        randStr += ch;
+    }
+
+    return randStr;
+}
+
+ArrDistribution::ArrDistribution(MixedDistributionDescriptor distrDescriptor,
+                                 double weight,
+                                 size_t ndv,
+                                 size_t minArrLen,
+                                 size_t maxArrLen,
+                                 std::unique_ptr<DatasetDescriptorNew> arrayDataDescriptor,
+                                 double reuseScalarsRatio,
+                                 double nullsRatio)
+    : DataTypeDistrNew(distrDescriptor, value::TypeTags::Array, weight, ndv, nullsRatio),
+      _uniformArrSizeDist{minArrLen, maxArrLen},
+      _arrayDataDescriptor(std::move(arrayDataDescriptor)),
+      _reuseScalarsRatio(reuseScalarsRatio) {
+    uassert(6660510,
+            "Array specs must be 0 if there is no array data descriptor.",
+            _arrayDataDescriptor || (ndv == 0 && minArrLen == 0 && maxArrLen == 0));
+    uassert(6660511,
+            "Nested arrays requires sensible array lengths.",
+            !_arrayDataDescriptor || maxArrLen >= minArrLen);
+    uassert(6660512,
+            "reuseScalarsRatio must be in [0, 1].",
+            reuseScalarsRatio >= 0 && reuseScalarsRatio <= 1.0);
+}
+
+void ArrDistribution::init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) {
+    uassert(6660513, "There must always be a parent data descriptor.", parentDesc);
+
+    // Extract the per-type probabilities from the parent descriptor, but set the array probability
+    // to 0 to avoid self-recursion.
+    std::vector<double> parentProbabilities;
+    for (const auto& dtd : parentDesc->_dataTypeDistributions) {
+        double prob = (dtd->tag() == value::TypeTags::Array) ? 0 : dtd->weight();
+        parentProbabilities.push_back(prob);
+    }
+    std::discrete_distribution<size_t> parentDataTypeSelector;
+    parentDataTypeSelector.param(std::discrete_distribution<size_t>::param_type(
+        parentProbabilities.begin(), parentProbabilities.end()));
+
+    // Generate _ndv distinct arrays, and store them in _valSet.
+    for (size_t i = 0; i < _ndv; ++i) {
+        auto [arrayTag, arrayVal] = value::makeNewArray();
+        value::Array* arr = value::getArrayView(arrayVal);
+        size_t randArraySize = _uniformArrSizeDist(gen);
+        arr->reserve(randArraySize);
+        // Generate the data for one random array.
+        for (size_t j = 0; j < randArraySize; ++j) {
+            DataTypeDistrNew* dtd = nullptr;
+            size_t idx;
+            double reuseParentProb = _uniformRandProbability(gen);
+            if (reuseParentProb < _reuseScalarsRatio) {
+                // Pick a random data type descriptor from the parent.
+                idx = parentDataTypeSelector(gen);
+                dtd = parentDesc->_dataTypeDistributions.at(idx).get();
+            } else {
+                idx = _arrayDataDescriptor->_dataTypeSelector(gen);
+                dtd = _arrayDataDescriptor->_dataTypeDistributions.at(idx).get();
+            }
+            dtd->generate(arr, gen);
+        }
+        _valSet.emplace_back(arrayTag, arrayVal);
+    }
+
+    _idxDist = MixedDistribution::make(_mixedDistrDescriptor, 0, _valSet.size() - 1);
+}
+
+DatasetDescriptorNew::DatasetDescriptorNew(TypeDistrVector dataTypeDistributions,
+                                           std::mt19937_64& gen)
+    : _dataTypeDistributions(std::move(dataTypeDistributions)), _gen{gen} {
+
+    // The probability of each type to be chosen. Extracted into a vector in order to setup a
+    // discrete_distribution.
+    std::vector<double> probabilities;
+    probabilities.reserve(_dataTypeDistributions.size());
+    for (auto& dtd : _dataTypeDistributions) {
+        dtd->init(this, gen);
+        probabilities.push_back(dtd->weight());
+    }
+    _dataTypeSelector.param(
+        std::discrete_distribution<size_t>::param_type(probabilities.begin(), probabilities.end()));
+}
+
+DataTypeDistrNew* DatasetDescriptorNew::getRandDataTypeDist() {
+    size_t idx = _dataTypeSelector(_gen);
+    return _dataTypeDistributions[idx].get();
+}
+
+std::vector<SBEValue> DatasetDescriptorNew::genRandomDataset(size_t nElems) {
+    std::vector<SBEValue> randValues;
+    randValues.reserve(nElems);
+
+    for (size_t i = 0; i < nElems; ++i) {
+        DataTypeDistrNew* dtd = getRandDataTypeDist();
+        dtd->generate(randValues, _gen);
+    }
+
+    return randValues;
+}
+
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/rand_utils_new.h b/src/mongo/db/query/stats/rand_utils_new.h
new file mode 100644
index 00000000000..be77578fc28
--- /dev/null
+++ b/src/mongo/db/query/stats/rand_utils_new.h
@@ -0,0 +1,353 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <random>
+#include <vector>
+
+#include "mongo/db/query/stats/value_utils.h"
+
+namespace mongo::stats {
+
+class DatasetDescriptorNew;
+
+/**
+ * A base class for wrappers of STL random distributions that produce size_t values within a range.
+ * This class enables polymorphic usage of random distributions, for instance to implement a mix of
+ * distributions.
+ */
+class RandomDistribution {
+public:
+    RandomDistribution() = default;
+    RandomDistribution(const RandomDistribution&) = default;
+    RandomDistribution(RandomDistribution&&) = default;
+    RandomDistribution& operator=(const RandomDistribution&) = default;
+    RandomDistribution& operator=(RandomDistribution&&) = default;
+    virtual ~RandomDistribution() = default;
+
+    virtual size_t operator()(std::mt19937_64& gen) = 0;
+};
+
+/**
+    A uniform random distribution of size_t within a range
+ */
+class UniformDistr : public RandomDistribution {
+public:
+    UniformDistr(size_t min, size_t max) : _distr{min, max}, _min(min), _max(max) {}
+
+    size_t operator()(std::mt19937_64& gen) override {
+        size_t result = _distr(gen);
+        uassert(6660540, "Random index out of range", result >= _min && result <= _max);
+        return result;
+    }
+
+private:
+    std::uniform_int_distribution<size_t> _distr;
+    size_t _min;
+    size_t _max;
+};
+
+/**
+ * Wrapper of normal distribution that is guaranteed to produces size_t values within a certain
+ * range. The STL class normal_distribution takes a median and standard deviation. This class
+ * computes a suitable median and standard deviation from the required [min,max] boundaries.
+ */
+class NormalDistr : public RandomDistribution {
+public:
+    NormalDistr(size_t min, size_t max)
+        : _distr{(double)(min + max) / 2.0, (double)(max - min) / 4.0},
+          _backup{min, max},
+          _min((double)min),
+          _max((double)max) {}
+
+    size_t operator()(std::mt19937_64& gen) override {
+        size_t result = std::round(_distr(gen));
+        size_t trials = 0;
+        // If the result is outside the range (an event with low probability), try 10 more times to
+        // get a number in the range.
+        while (!(result >= _min && result <= _max) && trials < 10) {
+            double randNum = _distr(gen);
+            if (randNum < _min) {
+                result = std::ceil(randNum);
+            } else if (randNum > _max) {
+                result = std::floor(randNum);
+            } else {
+                result = std::round(randNum);
+            }
+            ++trials;
+        }
+        if (result < _min && result > _max) {
+            // We couldn't generate a number in [min,max] within 10 attempts. Generate a uniform
+            // number.
+            result = _backup(gen);
+        }
+        uassert(6660541, "Random index out of range", result >= _min && result <= _max);
+        return result;
+    }
+
+private:
+    std::normal_distribution<double> _distr;
+    std::uniform_int_distribution<size_t> _backup;
+    double _min;
+    double _max;
+};
+
+enum class DistrType { kUniform, kNormal };
+
+using MixedDistributionDescriptor = std::vector<std::pair<DistrType, double /*weight*/>>;
+
+/**
+ * Generator for mixed distribution, where mixing is on the type of distribution, in the
+ * probabilities specified in distrProbabilites
+ */
+class MixedDistribution {
+public:
+    MixedDistribution(std::vector<std::unique_ptr<RandomDistribution>> distrMix,
+                      std::vector<double>& distrProbabilities)
+        : _distrMix(std::move(distrMix)) {
+        _distDist.param(std::discrete_distribution<size_t>::param_type(distrProbabilities.begin(),
+                                                                       distrProbabilities.end()));
+    }
+
+    static std::unique_ptr<MixedDistribution> make(MixedDistributionDescriptor& descriptor,
+                                                   size_t min,
+                                                   size_t max) {
+        std::vector<double> distrProbabilities;
+        std::vector<std::unique_ptr<RandomDistribution>> distrMix;
+
+        for (const auto& [distrType, weight] : descriptor) {
+            distrProbabilities.push_back(weight);
+            switch (distrType) {
+                case DistrType::kUniform:
+                    distrMix.emplace_back(std::make_unique<UniformDistr>(min, max));
+                    break;
+                case DistrType::kNormal:
+                    distrMix.emplace_back(std::make_unique<NormalDistr>(min, max));
+                    break;
+                default:
+                    MONGO_UNREACHABLE;
+            }
+        }
+
+        return std::make_unique<MixedDistribution>(std::move(distrMix), distrProbabilities);
+    }
+
+    size_t operator()(std::mt19937_64& gen) {
+        size_t distIdx = _distDist(gen);
+        size_t result = (*_distrMix.at(distIdx))(gen);
+        return result;
+    }
+
+private:
+    // Mix of different distributions. There can be instances of the same type of distribution,
+    // because they can still be defined differently.
+    std::vector<std::unique_ptr<RandomDistribution>> _distrMix;
+    // Distribution of distributions - select the current distribution with a certain probability.
+    std::discrete_distribution<size_t> _distDist;
+};
+
+/**
+ * Descriptor of a typed data distribution
+ */
+class DataTypeDistrNew {
+public:
+    DataTypeDistrNew(MixedDistributionDescriptor distrDescriptor,
+                     sbe::value::TypeTags tag,
+                     double weight,
+                     size_t ndv,
+                     double nullsRatio = 0.0)
+        : _mixedDistrDescriptor(distrDescriptor),
+          _tag(tag),
+          _weight(weight),
+          _ndv(ndv),
+          _nullsRatio(nullsRatio) {
+        uassert(6660542, "NDV must be > 0.", ndv > 0);
+        uassert(6660543, "nullsRatio must be in [0, 1].", nullsRatio >= 0 && nullsRatio <= 1);
+    }
+
+    virtual ~DataTypeDistrNew() = default;
+
+    /**
+     * Generate all unique values that generation chooses from, and store them in '_valSet'.
+     * Different data types provide different implementations.
+     * @todo: The 'parentDesc' parameter is used only by array generation. Consider a different way
+     * of passing it only to that type.
+     */
+    virtual void init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) = 0;
+
+    /**
+     * Generate a single random value, and store it in 'randValues' vector.
+     */
+    void generate(std::vector<SBEValue>& randValues, std::mt19937_64& gen);
+
+    /**
+     * Generate a single random value, and store it in 'randValueArray' array.
+     */
+    void generate(sbe::value::Array* randValueArray, std::mt19937_64& gen);
+
+    /**
+     * Custom equality comparison for storage in sets. There can be only datatype in a set.
+     */
+    bool operator==(const DataTypeDistrNew& d) const {
+        return this->_tag == d._tag;
+    }
+
+    sbe::value::TypeTags tag() const {
+        return _tag;
+    }
+
+    double weight() const {
+        return _weight;
+    }
+
+protected:
+    MixedDistributionDescriptor _mixedDistrDescriptor;
+    sbe::value::TypeTags _tag;
+    // Weight that determines the probability of a value of this type.
+    const double _weight;
+    const size_t _ndv;
+    // A set of (randomly generated) values to choose from when generating random datasets.
+    std::vector<SBEValue> _valSet;
+    // Generator of random indexes into a set of values.
+    // std::uniform_int_distribution<size_t> _idxDist;
+    std::unique_ptr<MixedDistribution> _idxDist;
+    // Percent of null values in the dataset.
+    double _nullsRatio;
+    std::uniform_real_distribution<double> _nullSelector{0, 1};
+
+    friend class DatasetDescriptorNew;
+};
+
+using TypeDistrVector = std::vector<std::unique_ptr<DataTypeDistrNew>>;
+
+/**
+ * Integer data distribution.
+ */
+class IntDistribution : public DataTypeDistrNew {
+public:
+    IntDistribution(MixedDistributionDescriptor distrDescriptor,
+                    double weight,
+                    size_t ndv,
+                    int minInt,
+                    int maxInt,
+                    double nullsRatio = 0);
+
+    /*
+     * Generate a set of random integers, and store them in _valSet.
+     */
+    void init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) override;
+
+protected:
+    int _minInt;
+    int _maxInt;
+};
+
+/**
+ * String data distribution.
+ */
+class StrDistribution : public DataTypeDistrNew {
+public:
+    StrDistribution(MixedDistributionDescriptor distrDescriptor,
+                    double weight,
+                    size_t ndv,
+                    size_t minStrLen,
+                    size_t maxStrLen,
+                    double nullsRatio = 0);
+
+    /*
+     * Generate a set of random strings, and store them in _valSet.
+     */
+    void init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) override;
+
+protected:
+    std::string genRandomString(size_t len, std::mt19937_64& gen);
+
+    size_t _minStrLen;
+    size_t _maxStrLen;
+    // All strings draw characters from this alphabet.
+    static const std::string _alphabet;
+    // Generator of random indexes into the set of characters '_alphabet'.
+    std::uniform_int_distribution<size_t> _uniformCharIdxDist{0, _alphabet.size() - 1};
+};
+
+/**
+ * SBE array data distribution.
+ */
+class ArrDistribution : public DataTypeDistrNew {
+public:
+    ArrDistribution(MixedDistributionDescriptor distrDescriptor,
+                    double weight,
+                    size_t ndv,
+                    size_t minArrLen,
+                    size_t maxArrLen,
+                    std::unique_ptr<DatasetDescriptorNew> arrayDataDescriptor,
+                    double reuseScalarsRatio = 0,
+                    double nullsRatio = 0);
+
+private:
+    void init(DatasetDescriptorNew* parentDesc, std::mt19937_64& gen) override;
+
+    // Generator of random array sizes.
+    std::uniform_int_distribution<size_t> _uniformArrSizeDist;
+    // Descriptor of the dataset within each array.
+    std::unique_ptr<DatasetDescriptorNew> _arrayDataDescriptor;
+    // Randomly select a parent or a child distribution when generating random
+    std::uniform_real_distribution<double> _uniformRandProbability{0.0, 1.0};
+    double _reuseScalarsRatio;
+};
+
+/**
+    Given a list of tyoed data distibutions, this class is used to generate a vector of values
+    according to the distribution weights.
+*/
+class DatasetDescriptorNew {
+public:
+    DatasetDescriptorNew(TypeDistrVector dataTypeDistributions, std::mt19937_64& gen);
+
+    // Generate a random dataset of 'nElems' according to the data distribution characteristics in
+    // this object.
+    std::vector<SBEValue> genRandomDataset(size_t nElems);
+
+private:
+    // Select a random value data type.
+    DataTypeDistrNew* getRandDataTypeDist();
+
+    // Distribution of different SBE data types. There will be %percent values of each type.
+    // TODO: is it a better idea to store shared_ptr or raw pointers to enable reuse?
+    TypeDistrVector _dataTypeDistributions;
+    // Pseudo-random generator.
+    std::mt19937_64& _gen;
+    // Select a random data type distribution.
+    std::discrete_distribution<size_t> _dataTypeSelector;
+
+    friend class ArrDistribution;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/scalar_histogram.cpp b/src/mongo/db/query/stats/scalar_histogram.cpp
new file mode 100644
index 00000000000..87ab175fe83
--- /dev/null
+++ b/src/mongo/db/query/stats/scalar_histogram.cpp
@@ -0,0 +1,192 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/scalar_histogram.h"
+
+#include "mongo/db/exec/sbe/values/bson.h"
+#include "mongo/db/exec/sbe/values/value.h"
+
+namespace mongo::stats {
+Bucket::Bucket(
+    double equalFreq, double rangeFreq, double cumulativeFreq, double ndv, double cumulativeNDV)
+    : _equalFreq(equalFreq),
+      _rangeFreq(rangeFreq),
+      _cumulativeFreq(cumulativeFreq),
+      _ndv(ndv),
+      _cumulativeNDV(cumulativeNDV) {
+    uassert(6695702, "Invalid equalFreq", _equalFreq >= 0.0);
+    uassert(6695703, "Invalid rangeFreq", _rangeFreq >= 0.0);
+    uassert(6695704, "Invalid ndv", _ndv <= _rangeFreq);
+    uassert(6695705, "Invalid cumulative frequency", _cumulativeFreq >= _equalFreq + _rangeFreq);
+    uassert(6695706, "Invalid cumulative ndv", _cumulativeNDV >= _ndv + 1.0);
+}
+
+std::string Bucket::toString() const {
+    std::ostringstream os;
+    os << "equalFreq: " << _equalFreq << ", rangeFreq: " << _rangeFreq
+       << ", cumulativeFreq: " << _cumulativeFreq << ", ndv: " << _ndv
+       << ", cumulativeNDV: " << _cumulativeNDV;
+    return os.str();
+}
+
+std::string Bucket::dump() const {
+    std::ostringstream os;
+    os << _equalFreq << ", " << _rangeFreq << ", " << _ndv;
+    return os.str();
+}
+
+BSONObj Bucket::serialize() const {
+    BSONObjBuilder bob;
+    bob.appendNumber("boundaryCount", _equalFreq);
+    bob.appendNumber("rangeCount", _rangeFreq);
+    bob.appendNumber("rangeDistincts", _ndv);
+    bob.appendNumber("cumulativeCount", _cumulativeFreq);
+    bob.appendNumber("cumulativeDistincts", _cumulativeNDV);
+    bob.doneFast();
+    return bob.obj();
+}
+
+ScalarHistogram::ScalarHistogram() : ScalarHistogram({}, {}) {}
+
+ScalarHistogram::ScalarHistogram(const StatsHistogram& histogram) {
+    for (const auto& bucket : histogram.getBuckets()) {
+        Bucket b(bucket.getBoundaryCount(),
+                 bucket.getRangeCount(),
+                 bucket.getCumulativeCount(),
+                 bucket.getRangeDistincts(),
+                 bucket.getCumulativeDistincts());
+        _buckets.push_back(std::move(b));
+    }
+    for (const auto& bound : histogram.getBounds()) {
+        // We cannot insert a view here, because the lifetime of the of the bound is shorter than
+        // that of the histogram. In the case of a larger type, e.g. BigString/bsonString, we need
+        // to copy over the entire string as well, not just a pointer to memory which may be
+        // deallocated before we need it.
+        auto value = sbe::bson::convertFrom<false>(bound.getElement());
+        _bounds.push_back(value.first, value.second);
+    }
+}
+
+ScalarHistogram::ScalarHistogram(sbe::value::Array bounds, std::vector<Bucket> buckets)
+    : _bounds(std::move(bounds)), _buckets(std::move(buckets)) {
+    uassert(6695707, "Invalid sizes", bounds.size() == buckets.size());
+}
+
+std::string ScalarHistogram::toString() const {
+    std::ostringstream os;
+    os << "[";
+    for (size_t i = 0; i < _buckets.size(); i++) {
+        os << "{val: " << _bounds.getAt(i) << ", " << _buckets.at(i).toString() << "}";
+        if (_buckets.size() - i > 1)
+            os << ",";
+    }
+    os << "]";
+    return os.str();
+}
+
+std::string ScalarHistogram::plot() const {
+    std::ostringstream os;
+    double maxFreq = 0;
+    const double maxBucketSize = 100;
+
+    for (const auto& bucket : _buckets) {
+        double maxBucketFreq = std::max(bucket._equalFreq, bucket._rangeFreq);
+        maxFreq = std::max(maxFreq, maxBucketFreq);
+    }
+
+    std::vector<std::pair<double, std::string>> headers;
+    size_t maxHeaderSize = 0;
+    for (size_t i = 0; i < _buckets.size(); ++i) {
+        std::ostringstream rngHeader;
+        std::ostringstream eqlHeader;
+        double scaledRngF = maxBucketSize * _buckets[i]._rangeFreq / maxFreq;
+        double scaledEqlF = maxBucketSize * _buckets[i]._equalFreq / maxFreq;
+        rngHeader << _bounds.getAt(i) << ": " << _buckets[i]._rangeFreq;
+        eqlHeader << _bounds.getAt(i) << ": " << _buckets[i]._equalFreq;
+        auto rngStr = rngHeader.str();
+        maxHeaderSize = std::max(maxHeaderSize, rngStr.size());
+        headers.emplace_back(scaledRngF, rngStr);
+        auto eqlStr = eqlHeader.str();
+        maxHeaderSize = std::max(maxHeaderSize, eqlStr.size());
+        headers.emplace_back(scaledEqlF, eqlStr);
+    }
+
+    const std::string maxLine(maxBucketSize + maxHeaderSize + 3, '-');
+    os << maxLine << "\n";
+    for (size_t j = 0; j < headers.size(); ++j) {
+        auto header = headers.at(j);
+        header.second.resize(maxHeaderSize, ' ');
+        const std::string bar(std::round(header.first), '*');
+        os << header.second << " | " << bar << "\n";
+    }
+    os << maxLine << "\n";
+
+    return os.str();
+}
+
+std::string ScalarHistogram::dump() const {
+    std::ostringstream os;
+    os << "Histogram:\n{";
+    for (size_t i = 0; i < _buckets.size(); i++) {
+        os << "{" << _bounds.getAt(i) << ", " << _buckets.at(i).dump() << "},\n";
+    }
+    os << "}";
+    return os.str();
+}
+
+const sbe::value::Array& ScalarHistogram::getBounds() const {
+    return _bounds;
+}
+
+const std::vector<Bucket>& ScalarHistogram::getBuckets() const {
+    return _buckets;
+}
+
+BSONObj ScalarHistogram::serialize() const {
+    BSONObjBuilder histogramBuilder;
+
+    // Construct bucket BSON.
+    auto buckets = getBuckets();
+    BSONArrayBuilder bucketsBuilder(histogramBuilder.subarrayStart("buckets"));
+    for (const auto& bucket : buckets) {
+        bucketsBuilder.append(bucket.serialize());
+    }
+    bucketsBuilder.doneFast();
+
+    // Construct bucket bounds BSON.
+    auto bounds = getBounds();
+    BSONArrayBuilder boundsBuilder(histogramBuilder.subarrayStart("bounds"));
+    sbe::bson::convertToBsonObj(boundsBuilder, &bounds);
+    boundsBuilder.doneFast();
+
+    histogramBuilder.doneFast();
+    return histogramBuilder.obj();
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/scalar_histogram.h b/src/mongo/db/query/stats/scalar_histogram.h
new file mode 100644
index 00000000000..0473f369af1
--- /dev/null
+++ b/src/mongo/db/query/stats/scalar_histogram.h
@@ -0,0 +1,120 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/stats/stats_gen.h"
+
+namespace mongo::stats {
+
+/**
+ * Statistics related to a single ScalarHistogram bucket. The boundary value is kept in a separate
+ * array, so that each bucket has a corresponding boundary value. The reason for this to manage the
+ * memory of values.
+ */
+struct Bucket {
+    Bucket(double equalFreq,
+           double rangeFreq,
+           double cumulativeFreq,
+           double ndv,
+           double cumulativeNDV);
+
+    std::string toString() const;
+    // Help function to dump the bucket content as needed by histogram creation in the unit tests.
+    std::string dump() const;
+
+    // Frequency of the bound value itself.
+    double _equalFreq;
+
+    // Frequency of other values.
+    double _rangeFreq;
+
+    // Sum of frequencies of preceding buckets to avoid recomputing. Includes both _equalFreq and
+    // _rangeFreq.
+    double _cumulativeFreq;
+
+    // Number of distinct values in this bucket, excludes the bound.
+    double _ndv;
+
+    // Sum of distinct values in preceding buckets including this bucket.
+    double _cumulativeNDV;
+
+    // Serialize to BSON for storage in stats collection.
+    BSONObj serialize() const;
+};
+
+/**
+ * A ScalarHistogram over a set of values. The ScalarHistogram consists of two parallel vectors -
+ * one with the individual value statistics, and another one with the actual boundary values.
+ */
+class ScalarHistogram {
+public:
+    ScalarHistogram();
+    ScalarHistogram(const StatsHistogram& histogram);
+    ScalarHistogram(sbe::value::Array bounds, std::vector<Bucket> buckets);
+
+    // Print a human-readable representation of a histogram.
+    std::string toString() const;
+    std::string plot() const;
+    // Help function to dump the content of the histogram as needed by the manual histogram creation
+    // in the unit tests (without cummulative frequency and NDV).
+    std::string dump() const;
+
+    const sbe::value::Array& getBounds() const;
+    const std::vector<Bucket>& getBuckets() const;
+    // Return the total number of histogrammed values.
+    size_t getCardinality() const {
+        if (_buckets.empty()) {
+            return 0.0;
+        }
+        return _buckets.back()._cumulativeFreq;
+    }
+
+    bool empty() const {
+        return _buckets.empty();
+    }
+
+    // Serialize to BSON for storage in stats collection.
+    BSONObj serialize() const;
+
+    static constexpr size_t kMaxBuckets = 100;
+
+private:
+    // Bucket bounds representing the **highest** value in each bucket.
+    sbe::value::Array _bounds;
+
+    std::vector<Bucket> _buckets;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats.idl b/src/mongo/db/query/stats/stats.idl
new file mode 100644
index 00000000000..eb6220d45b9
--- /dev/null
+++ b/src/mongo/db/query/stats/stats.idl
@@ -0,0 +1,102 @@
+# Copyright (C) 2022-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+global:
+    cpp_namespace: "mongo"
+
+imports:
+    - "mongo/db/basic_types.idl"
+
+structs:
+    StatsBucket:
+        description: "Histogram bucket"
+        fields:
+            boundaryCount:
+                type: double
+            rangeCount:
+                type: double
+            rangeDistincts:
+                type: double
+            cumulativeCount:
+                type: double
+            cumulativeDistincts:
+                type: double
+
+    StatsHistogram:
+        description: "MaxDiff Histogram"
+        fields:
+            buckets:
+                type: array<StatsBucket>
+            bounds:
+                type: array<IDLAnyType>
+
+    TypeTag:
+        description: "SBE types and their corresponding frequencies in the histogram"
+        fields:
+            typeName:
+                type: string
+            count:
+                type: double
+
+    StatsArrayHistogram:
+        description: "Array Histogram"
+        fields:
+            minHistogram:
+                type: StatsHistogram
+            maxHistogram:
+                type: StatsHistogram
+            uniqueHistogram:
+                type: StatsHistogram
+            typeCount:
+                type: array<TypeTag>
+
+    Statistics:
+        description: "Serialized representation of data statistics for a key path"
+        fields:
+            documents:
+                type: double
+            trueCount:
+                type: double
+            falseCount:
+                type: double
+            emptyArrayCount:
+                type: double
+            typeCount:
+                type: array<TypeTag>
+            scalarHistogram:
+                type: StatsHistogram
+            arrayStatistics:
+                type: StatsArrayHistogram
+                optional: true
+
+    StatsPath:
+        description: "Key path to statstics"
+        fields:
+            _id:
+                type: string
+            statistics:
+                type: Statistics
diff --git a/src/mongo/db/query/stats/stats_cache.cpp b/src/mongo/db/query/stats/stats_cache.cpp
new file mode 100644
index 00000000000..dfe5a43890e
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache.cpp
@@ -0,0 +1,74 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/stats_cache.h"
+
+#include "mongo/db/query/stats/collection_statistics.h"
+#include "mongo/util/read_through_cache.h"
+
+#include "mongo/logv2/log.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
+namespace mongo::stats {
+namespace {
+const auto statsCacheDecoration = ServiceContext::declareDecoration<std::unique_ptr<StatsCache>>();
+}  // namespace
+
+StatsCache::StatsCache(ServiceContext* service,
+                       std::unique_ptr<StatsCacheLoader> cacheLoader,
+                       ThreadPoolInterface& threadPool,
+                       int size)
+    : ReadThroughCache(
+          _mutex,
+          service,
+          threadPool,
+          [this](OperationContext* opCtx,
+                 const StatsPathString& statsPath,
+                 const ValueHandle& stats) { return _lookupStats(opCtx, statsPath, stats); },
+          size),
+      _statsCacheLoader(std::move(cacheLoader)) {}
+
+StatsCache::LookupResult StatsCache::_lookupStats(OperationContext* opCtx,
+                                                  const StatsPathString& statsPath,
+                                                  const StatsCacheValueHandle& stats) {
+
+    try {
+        invariant(_statsCacheLoader);
+        auto newStats = _statsCacheLoader->getStats(opCtx, statsPath).get();
+        return LookupResult(std::move(newStats));
+    } catch (const DBException& ex) {
+        if (ex.code() == ErrorCodes::NamespaceNotFound) {
+            return StatsCache::LookupResult(boost::none);
+        }
+        throw;
+    }
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache.h b/src/mongo/db/query/stats/stats_cache.h
new file mode 100644
index 00000000000..37d3d238a4d
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache.h
@@ -0,0 +1,81 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/base/string_data.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+#include "mongo/db/query/stats/stats_cache_loader.h"
+#include "mongo/util/concurrency/thread_pool.h"
+#include "mongo/util/read_through_cache.h"
+
+namespace mongo::stats {
+using StatsCacheType = ReadThroughCache<StatsPathString, StatsCacheVal>;
+using StatsCacheValueHandle = StatsCacheType::ValueHandle;
+
+/**
+ * Collectoin statistics read through cache. It reads from the persitent storage but never wrties to
+ * it.
+ */
+class StatsCache : public StatsCacheType {
+public:
+    /**
+     * The constructor provides the Service context under which this cache has been instantiated,
+     * and a Thread pool to be used for invoking the blocking 'lookup' calls. The size is the number
+     * of entries the underlying LRU cache will hold.
+     */
+    StatsCache(ServiceContext* service,
+               std::unique_ptr<StatsCacheLoader> cacheLoader,
+               ThreadPoolInterface& threadPool,
+               int size);
+
+    /**
+     *  Returns statsCacheLoader currently used for testing only.
+     */
+    StatsCacheLoader* getStatsCacheLoader() {
+        invariant(_statsCacheLoader);
+
+        return _statsCacheLoader.get();
+    }
+
+private:
+    /**
+     * Reads collection stats from the underlying storage if its not found in the in memory cache.
+     */
+    LookupResult _lookupStats(OperationContext* opCtx,
+                              const StatsPathString& statsPath,
+                              const ValueHandle& stats);
+
+    Mutex _mutex = MONGO_MAKE_LATCH("StatsCache::_mutex");
+
+    std::unique_ptr<StatsCacheLoader> _statsCacheLoader;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_loader.h b/src/mongo/db/query/stats/stats_cache_loader.h
new file mode 100644
index 00000000000..7bad4b64304
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_loader.h
@@ -0,0 +1,58 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/stdx/thread.h"
+
+namespace mongo::stats {
+using StatsPathString = std::pair<NamespaceString, std::string>;
+using StatsCacheVal = std::shared_ptr<ArrayHistogram>;
+
+class StatsCacheLoader {
+public:
+    /**
+     * Non-blocking call, which returns CollectionStatistics from the the persistent metadata store.
+     *
+     * If for some reason the asynchronous fetch operation cannot be dispatched (for example on
+     * shutdown), throws a DBException.
+     */
+    virtual SemiFuture<StatsCacheVal> getStats(OperationContext* opCtx,
+                                               const StatsPathString& statsPath) = 0;
+
+    virtual void setStatsReturnValueForTest(StatusWith<StatsCacheVal> swStats){};
+
+    virtual ~StatsCacheLoader() {}
+
+    static constexpr StringData kStatsPrefix = "system.statistics"_sd;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_loader_impl.cpp b/src/mongo/db/query/stats/stats_cache_loader_impl.cpp
new file mode 100644
index 00000000000..e41912eafc0
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_loader_impl.cpp
@@ -0,0 +1,82 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/stats_cache_loader_impl.h"
+
+#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/stats_gen.h"
+#include "mongo/logv2/log.h"
+#include "mongo/stdx/thread.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
+namespace mongo::stats {
+SemiFuture<StatsCacheVal> StatsCacheLoaderImpl::getStats(OperationContext* opCtx,
+                                                         const StatsPathString& statsPath) {
+
+    std::string statsColl(kStatsPrefix + "." + statsPath.first.coll());
+
+    NamespaceString statsNss(statsPath.first.db(), statsColl);
+    DBDirectClient client(opCtx);
+
+
+    FindCommandRequest findRequest{statsNss};
+    BSONObj filter = BSON("_id" << statsPath.second);
+    LOGV2_DEBUG(7085600, 1, "findRequest filter", "filter"_attr = filter.toString());
+    findRequest.setFilter(filter.getOwned());
+
+    try {
+        auto cursor = client.find(std::move(findRequest));
+
+        if (!cursor) {
+            uasserted(ErrorCodes::OperationFailed,
+                      str::stream()
+                          << "Failed to establish a cursor for reading " << statsPath.first.ns()
+                          << ",  path " << statsPath.second << " from local storage");
+        }
+
+        if (cursor->more()) {
+            IDLParserContext ctx("StatsPath");
+            BSONObj document = cursor->nextSafe().getOwned();
+            auto parsedStats = StatsPath::parse(ctx, document);
+            StatsCacheVal statsPtr(new ArrayHistogram(parsedStats.getStatistics()));
+            return makeReadyFutureWith([this, statsPtr] { return statsPtr; }).semi();
+        }
+
+        uasserted(ErrorCodes::NamespaceNotFound,
+                  str::stream() << "Stats does not exists for " << statsNss.ns() << ",  path "
+                                << statsPath.second);
+    } catch (const DBException& ex) {
+        uassertStatusOK(ex.toStatus());
+    }
+    MONGO_UNREACHABLE
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_loader_impl.h b/src/mongo/db/query/stats/stats_cache_loader_impl.h
new file mode 100644
index 00000000000..979a1009acb
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_loader_impl.h
@@ -0,0 +1,45 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+#include "mongo/db/query/stats/stats_cache_loader.h"
+#include "mongo/stdx/thread.h"
+
+namespace mongo::stats {
+
+class StatsCacheLoaderImpl : public StatsCacheLoader {
+public:
+    SemiFuture<StatsCacheVal> getStats(OperationContext* opCtx,
+                                       const StatsPathString& statsPath) override;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_loader_mock.cpp b/src/mongo/db/query/stats/stats_cache_loader_mock.cpp
new file mode 100644
index 00000000000..c190d61c312
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_loader_mock.cpp
@@ -0,0 +1,50 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/stats_cache_loader_mock.h"
+
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+#include "mongo/stdx/thread.h"
+
+namespace mongo::stats {
+
+const Status StatsCacheLoaderMock::kInternalErrorStatus = {
+    ErrorCodes::InternalError, "Stats cache loader received unexpected request"};
+
+SemiFuture<StatsCacheVal> StatsCacheLoaderMock::getStats(OperationContext* opCtx,
+                                                         const StatsPathString& statsPath) {
+
+    return makeReadyFutureWith([this] { return _swStatsReturnValueForTest; }).semi();
+}
+
+void StatsCacheLoaderMock::setStatsReturnValueForTest(StatusWith<StatsCacheVal> swStats) {
+    _swStatsReturnValueForTest = std::move(swStats);
+}
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_loader_mock.h b/src/mongo/db/query/stats/stats_cache_loader_mock.h
new file mode 100644
index 00000000000..9951bcfd2ca
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_loader_mock.h
@@ -0,0 +1,52 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+#include "mongo/db/query/stats/stats_cache_loader.h"
+#include "mongo/stdx/thread.h"
+
+namespace mongo::stats {
+
+class StatsCacheLoaderMock : public StatsCacheLoader {
+public:
+    SemiFuture<StatsCacheVal> getStats(OperationContext* opCtx,
+                                       const StatsPathString& statsPath) override;
+
+    void setStatsReturnValueForTest(StatusWith<StatsCacheVal> swStats);
+
+    static const Status kInternalErrorStatus;
+
+private:
+    StatusWith<StatsCacheVal> _swStatsReturnValueForTest{kInternalErrorStatus};
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_loader_test.cpp b/src/mongo/db/query/stats/stats_cache_loader_test.cpp
new file mode 100644
index 00000000000..a22e6dd9044
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_loader_test.cpp
@@ -0,0 +1,116 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/bson/oid.h"
+#include "mongo/db/catalog/collection_write_path.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/query/stats/scalar_histogram.h"
+#include "mongo/db/query/stats/stats_cache_loader_impl.h"
+#include "mongo/db/query/stats/stats_cache_loader_test_fixture.h"
+#include "mongo/db/query/stats/stats_gen.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/assert_util.h"
+#include "mongo/util/fail_point.h"
+
+namespace mongo::stats {
+namespace {
+
+class StatsCacheLoaderTest : public StatsCacheLoaderTestFixture {
+protected:
+    void createStatsCollection(NamespaceString nss);
+    StatsCacheLoaderImpl _statsCacheLoader;
+};
+
+void StatsCacheLoaderTest::createStatsCollection(NamespaceString nss) {
+    auto opCtx = operationContext();
+    AutoGetCollection autoColl(opCtx, nss, MODE_IX);
+    auto db = autoColl.ensureDbExists(opCtx);
+    WriteUnitOfWork wuow(opCtx);
+    ASSERT(db->createCollection(opCtx, nss));
+    wuow.commit();
+}
+
+TEST_F(StatsCacheLoaderTest, VerifyStatsLoad) {
+    // Initialize histogram buckets.
+    constexpr double doubleCount = 15.0;
+    constexpr double trueCount = 12.0;
+    constexpr double falseCount = 16.0;
+    constexpr double numDocs = doubleCount + trueCount + falseCount;
+    std::vector<Bucket> buckets{
+        Bucket{1.0, 0.0, 1.0, 0.0, 1.0},
+        Bucket{2.0, 5.0, 8.0, 1.0, 2.0},
+        Bucket{3.0, 4.0, 15.0, 2.0, 6.0},
+    };
+
+    // Initialize histogram bounds.
+    auto [boundsTag, boundsVal] = sbe::value::makeNewArray();
+    sbe::value::ValueGuard boundsGuard{boundsTag, boundsVal};
+    auto bounds = sbe::value::getArrayView(boundsVal);
+    bounds->push_back(sbe::value::TypeTags::NumberDouble, 1.0);
+    bounds->push_back(sbe::value::TypeTags::NumberDouble, 2.0);
+    bounds->push_back(sbe::value::TypeTags::NumberDouble, 3.0);
+
+    // Create a scalar histogram.
+    TypeCounts tc{
+        {sbe::value::TypeTags::NumberDouble, doubleCount},
+        {sbe::value::TypeTags::Boolean, trueCount + falseCount},
+    };
+    ScalarHistogram sh(*bounds, buckets);
+    ArrayHistogram ah(sh, tc, trueCount, falseCount);
+    auto expectedSerialized = ah.serialize();
+
+    // Serialize histogram into a stats path.
+    std::string path = "somePath";
+    auto serialized = stats::makeStatsPath(path, numDocs, ah);
+
+    // Initalize stats collection.
+    NamespaceString nss("test", "stats");
+    std::string statsColl(StatsCacheLoader::kStatsPrefix + "." + nss.coll());
+    NamespaceString statsNss(nss.db(), statsColl);
+    createStatsCollection(statsNss);
+
+    // Write serialized stats path to collection.
+    AutoGetCollection autoColl(operationContext(), statsNss, MODE_IX);
+    const CollectionPtr& coll = autoColl.getCollection();
+    {
+        WriteUnitOfWork wuow(operationContext());
+        ASSERT_OK(collection_internal::insertDocument(
+            operationContext(), coll, InsertStatement(serialized), nullptr));
+        wuow.commit();
+    }
+
+    // Read stats path & verify values are consistent with what we expect.
+    auto actualAH = _statsCacheLoader.getStats(operationContext(), std::make_pair(nss, path)).get();
+    auto actualSerialized = actualAH->serialize();
+
+    ASSERT_BSONOBJ_EQ(expectedSerialized, actualSerialized);
+}
+
+}  // namespace
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_loader_test_fixture.cpp b/src/mongo/db/query/stats/stats_cache_loader_test_fixture.cpp
new file mode 100644
index 00000000000..1e353196b83
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_loader_test_fixture.cpp
@@ -0,0 +1,74 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/stats_cache_loader_test_fixture.h"
+
+#include <memory>
+
+#include "mongo/db/repl/replication_coordinator_mock.h"
+#include "mongo/db/repl/storage_interface_impl.h"
+#include "mongo/db/service_context_d_test_fixture.h"
+
+namespace mongo::stats {
+
+void StatsCacheLoaderTestFixture::setUp() {
+    // Set up mongod.
+    ServiceContextMongoDTest::setUp();
+
+    auto service = getServiceContext();
+    _storage = std::make_unique<repl::StorageInterfaceImpl>();
+    _opCtx = cc().makeOperationContext();
+
+    // Set up ReplicationCoordinator and ensure that we are primary.
+    auto replCoord = std::make_unique<repl::ReplicationCoordinatorMock>(service);
+    ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_PRIMARY));
+    repl::ReplicationCoordinator::set(service, std::move(replCoord));
+
+    // Set up oplog collection. If the WT storage engine is used, the oplog collection is expected
+    // to exist when fetching the next opTime (LocalOplogInfo::getNextOpTimes) to use for a write.
+    repl::createOplog(operationContext());
+}
+
+void StatsCacheLoaderTestFixture::tearDown() {
+    _storage.reset();
+    _opCtx.reset();
+
+    // Tear down mongod.
+    ServiceContextMongoDTest::tearDown();
+}
+
+OperationContext* StatsCacheLoaderTestFixture::operationContext() {
+    return _opCtx.get();
+}
+
+repl::StorageInterface* StatsCacheLoaderTestFixture::storageInterface() {
+    return _storage.get();
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_loader_test_fixture.h b/src/mongo/db/query/stats/stats_cache_loader_test_fixture.h
new file mode 100644
index 00000000000..6c7d502fdf1
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_loader_test_fixture.h
@@ -0,0 +1,60 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/operation_context.h"
+#include "mongo/db/query/stats/stats_cache_loader.h"
+#include "mongo/db/repl/storage_interface_impl.h"
+#include "mongo/db/service_context_d_test_fixture.h"
+
+namespace mongo::stats {
+
+/**
+ * Sets up and provides a repl::StorageInterface and OperationContext.
+ * Database data are cleared  between test runs.
+ */
+class StatsCacheLoaderTestFixture : public ServiceContextMongoDTest {
+public:
+    explicit StatsCacheLoaderTestFixture(Options options = {})
+        : ServiceContextMongoDTest(std::move(options)) {}
+
+    OperationContext* operationContext();
+    repl::StorageInterface* storageInterface();
+
+protected:
+    void setUp() override;
+    void tearDown() override;
+
+private:
+    ServiceContext::UniqueOperationContext _opCtx;
+    std::unique_ptr<repl::StorageInterface> _storage;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_cache_test.cpp b/src/mongo/db/query/stats/stats_cache_test.cpp
new file mode 100644
index 00000000000..b95dc2c3bd8
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_cache_test.cpp
@@ -0,0 +1,131 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include <string>
+
+#include "mongo/db/client.h"
+#include "mongo/db/concurrency/locker_noop_service_context_test_fixture.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/query/stats/stats_cache.h"
+#include "mongo/db/query/stats/stats_cache_loader_mock.h"
+#include "mongo/unittest/barrier.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/concurrency/thread_pool.h"
+#include "mongo/util/read_through_cache.h"
+#include "mongo/util/scopeguard.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
+
+namespace mongo::stats {
+namespace {
+
+using unittest::assertGet;
+
+/**
+ * Fixture for tests, which do not need to exercise the multi-threading capabilities of the cache
+ * and as such do not require control over the creation/destruction of their operation contexts.
+ */
+class StatsCacheTest : public LockerNoopServiceContextTest {
+protected:
+    // Extends StatsCache and automatically provides it with a thread  pool, which will be
+    // shutdown and joined before the StatsCache is destroyed (which is part of the  contract of
+    // ReadThroughCache)
+    class CacheWithThreadPool : public StatsCache {
+    public:
+        CacheWithThreadPool(ServiceContext* service,
+                            std::unique_ptr<StatsCacheLoader> cacheLoaderMock,
+                            size_t size)
+            : StatsCache(service, std::move(cacheLoaderMock), _threadPool, size) {
+            _threadPool.startup();
+        }
+
+    private:
+        ThreadPool _threadPool{[] {
+            ThreadPool::Options options;
+            options.poolName = "StatsCacheTest";
+            options.minThreads = 1;
+            options.maxThreads = 1;
+            return options;
+        }()};
+    };
+
+    const ServiceContext::UniqueOperationContext _opCtxHolder{makeOperationContext()};
+    OperationContext* const _opCtx{_opCtxHolder.get()};
+};
+
+TEST(StatsCacheTest, StandaloneValueHandle) {
+    StatsCacheVal statsPtr(new ArrayHistogram());
+    StatsCache::ValueHandle standaloneHandle(std::move(statsPtr));
+    ASSERT(standaloneHandle.isValid());
+}
+
+TEST_F(StatsCacheTest, KeyDoesNotExist) {
+    Status namespaceNotFoundErrorStatus = {ErrorCodes::NamespaceNotFound,
+                                           "The key does not exists"};
+    auto cacheLoaderMock = std::make_unique<StatsCacheLoaderMock>();
+    auto cache = CacheWithThreadPool(getServiceContext(), std::move(cacheLoaderMock), 1);
+    cache.getStatsCacheLoader()->setStatsReturnValueForTest(
+        std::move(namespaceNotFoundErrorStatus));
+    auto handle = cache.acquire(_opCtx, std::make_pair(NamespaceString("db", "coll"), "somePath"));
+    ASSERT(!handle);
+}
+
+/*
+TEST_F(StatsCacheTest, LoadStats) {
+    auto cacheLoaderMock = std::make_unique<StatsCacheLoaderMock>();
+    auto cache = CacheWithThreadPool(getServiceContext(), std::move(cacheLoaderMock), 1);
+
+    auto stats1 = CollectionStatistics(1);
+    auto stats2 = CollectionStatistics(2);
+
+    cache.getStatsCacheLoader()->setStatsReturnValueForTest(std::move(stats1));
+
+    auto handle = cache.acquire(_opCtx, NamespaceString("db", "coll1"));
+    ASSERT(handle.isValid());
+    ASSERT_EQ(1, handle->getCardinality());
+
+    // Make all requests to StatsCacheLoader to throw an exception to ensre that test returns value
+    // from cache.
+    Status internalErrorStatus = {ErrorCodes::InternalError,
+                                  "Stats cache loader received unexpected request"};
+    cache.getStatsCacheLoader()->setStatsReturnValueForTest(std::move(internalErrorStatus));
+
+    handle = cache.acquire(_opCtx, NamespaceString("db", "coll1"));
+    ASSERT(handle.isValid());
+    ASSERT_EQ(1, handle->getCardinality());
+
+    cache.getStatsCacheLoader()->setStatsReturnValueForTest(std::move(stats2));
+    handle = cache.acquire(_opCtx, NamespaceString("db", "coll2"));
+    ASSERT(handle.isValid());
+    ASSERT_EQ(2, handle->getCardinality());
+}
+*/
+
+}  // namespace
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_catalog.cpp b/src/mongo/db/query/stats/stats_catalog.cpp
new file mode 100644
index 00000000000..99891f1dc4c
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_catalog.cpp
@@ -0,0 +1,108 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/stats_catalog.h"
+
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+#include "mongo/db/query/stats/stats_cache.h"
+#include "mongo/util/read_through_cache.h"
+
+#include "mongo/logv2/log.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
+namespace mongo::stats {
+namespace {
+const auto statsCatalogDecoration =
+    ServiceContext::declareDecoration<std::unique_ptr<StatsCatalog>>();
+}  // namespace
+
+StatsCatalog::StatsCatalog(ServiceContext* service,
+                           std::unique_ptr<StatsCacheLoader> statsCacheLoader)
+    : _executor(std::make_shared<ThreadPool>([] {
+          ThreadPool::Options options;
+          options.poolName = "StatsCache";
+          options.minThreads = 0;
+          options.maxThreads = 2;
+          return options;
+      }())),
+      _statsCache(service, std::move(statsCacheLoader), *_executor, 1000) {
+    _executor->startup();
+}
+
+StatsCatalog::~StatsCatalog() {
+    // The executor is used by the StatsCatalog, so it must be joined, before this cache is
+    // destroyed, per the contract of ReadThroughCache.
+    _executor->shutdown();
+    _executor->join();
+}
+
+void StatsCatalog::set(ServiceContext* serviceContext, std::unique_ptr<StatsCatalog> cache) {
+    auto& statsCatalog = statsCatalogDecoration(serviceContext);
+    invariant(!statsCatalog);
+
+    statsCatalog = std::move(cache);
+}
+
+StatsCatalog& StatsCatalog::get(ServiceContext* serviceContext) {
+    auto& statsCatalog = statsCatalogDecoration(serviceContext);
+    invariant(statsCatalog);
+
+    return *statsCatalog;
+}
+
+StatsCatalog& StatsCatalog::get(OperationContext* opCtx) {
+    return get(opCtx->getServiceContext());
+}
+
+StatusWith<std::shared_ptr<ArrayHistogram>> StatsCatalog::getHistogram(OperationContext* opCtx,
+                                                                       const NamespaceString& nss,
+                                                                       const std::string& path) {
+    try {
+        auto handle = _statsCache.acquire(opCtx, std::make_pair(nss, path));
+        uassert(ErrorCodes::NamespaceNotFound,
+                str::stream() << "path " << nss << " : " << path << " not found",
+                handle);
+
+        return *(handle.get());
+    } catch (const DBException& ex) {
+        return ex.toStatus();
+    }
+}
+
+Status StatsCatalog::invalidatePath(const NamespaceString& nss, const std::string& path) {
+    try {
+        _statsCache.invalidateKey(std::make_pair(nss, path));
+        return Status::OK();
+    } catch (const DBException& ex) {
+        return ex.toStatus();
+    }
+}
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_catalog.h b/src/mongo/db/query/stats/stats_catalog.h
new file mode 100644
index 00000000000..e86b4562b8e
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_catalog.h
@@ -0,0 +1,77 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/base/string_data.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/query/stats/collection_statistics.h"
+#include "mongo/db/query/stats/stats_cache.h"
+#include "mongo/db/query/stats/stats_cache_loader.h"
+#include "mongo/util/concurrency/thread_pool.h"
+
+namespace mongo::stats {
+/**
+ * This class owns statsCache and manages executor lifetime.
+ */
+class StatsCatalog {
+public:
+    /**
+     * Stores the catalog on the specified service context. May only be called once for the lifetime
+     * of the service context.
+     */
+    static void set(ServiceContext* serviceContext, std::unique_ptr<StatsCatalog> catalog);
+
+    static StatsCatalog& get(ServiceContext* serviceContext);
+    static StatsCatalog& get(OperationContext* opCtx);
+
+    /**
+     * The constructor provides the Service context under which the cache needs to be instantiated,
+     * and a Thread pool to be used for invoking the blocking 'lookup' calls. The size is the number
+     * of entries the underlying LRU cache will hold.
+     */
+    StatsCatalog(ServiceContext* service, std::unique_ptr<StatsCacheLoader> cacheLoader);
+
+    ~StatsCatalog();
+
+    StatusWith<std::shared_ptr<ArrayHistogram>> getHistogram(OperationContext* opCtx,
+                                                             const NamespaceString& nss,
+                                                             const std::string& path);
+
+    Status invalidatePath(const NamespaceString& nss, const std::string& path);
+
+private:
+    /**
+     * The executor is used by the cache.
+     */
+    std::shared_ptr<ThreadPool> _executor;
+    StatsCache _statsCache;
+};
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/stats_path_test.cpp b/src/mongo/db/query/stats/stats_path_test.cpp
new file mode 100644
index 00000000000..3e3afe50b32
--- /dev/null
+++ b/src/mongo/db/query/stats/stats_path_test.cpp
@@ -0,0 +1,129 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/exec/sbe/values/bson.h"
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/query/stats/array_histogram.h"
+#include "mongo/db/query/stats/scalar_histogram.h"
+#include "mongo/db/query/stats/stats_gen.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo::stats {
+namespace {
+
+IDLParserContext ctx("StatsPath");
+
+/**
+ *  Validate round trip conversion for histogram bucket
+ */
+TEST(StatsPath, BasicValidStatsBucketDouble) {
+    // Create & parse StatsBucket.
+    auto serializedBucket = Bucket{3.0, 4.0, 15.0, 2.0, 6.0}.serialize();
+    auto parsedBucket = StatsBucket::parse(ctx, serializedBucket);
+
+    // Round-trip conversion.
+    auto bucketToBSON = parsedBucket.toBSON();
+    ASSERT_BSONOBJ_EQ(serializedBucket, bucketToBSON);
+}
+
+/**
+ *  Validate round-trip conversion for StatsPath datatype.
+ */
+TEST(StatsPath, BasicValidStatsPath) {
+    // Initialize histogram buckets.
+    constexpr double doubleCount = 15.0;
+    constexpr double trueCount = 12.0;
+    constexpr double falseCount = 16.0;
+    constexpr double numDocs = doubleCount + trueCount + falseCount;
+    std::vector<Bucket> buckets{
+        Bucket{1.0, 0.0, 1.0, 0.0, 1.0},
+        Bucket{2.0, 5.0, 8.0, 1.0, 2.0},
+        Bucket{3.0, 4.0, 15.0, 2.0, 6.0},
+    };
+
+    // Initialize histogram bounds.
+    auto [boundsTag, boundsVal] = sbe::value::makeNewArray();
+    sbe::value::ValueGuard boundsGuard{boundsTag, boundsVal};
+    auto bounds = sbe::value::getArrayView(boundsVal);
+    bounds->push_back(sbe::value::TypeTags::NumberDouble, 1.0);
+    bounds->push_back(sbe::value::TypeTags::NumberDouble, 2.0);
+    bounds->push_back(sbe::value::TypeTags::NumberDouble, 3.0);
+
+    // Create a scalar histogram.
+    TypeCounts tc{
+        {sbe::value::TypeTags::NumberDouble, doubleCount},
+        {sbe::value::TypeTags::Boolean, trueCount + falseCount},
+    };
+    ScalarHistogram sh(*bounds, buckets);
+    ArrayHistogram ah(sh, tc, trueCount, falseCount);
+
+    // Serialize to BSON.
+    auto serializedPath = stats::makeStatsPath("somePath", numDocs, ah);
+
+    // Parse StatsPath via IDL & serialize to BSON.
+    auto parsedPath = StatsPath::parse(ctx, serializedPath);
+    auto parsedPathToBSON = parsedPath.toBSON();
+
+    // We should end up with the same serialized BSON in the end.
+    ASSERT_BSONOBJ_EQ(serializedPath, parsedPathToBSON);
+}
+
+/**
+ *  Validate round-trip conversion for StatsPath datatype.
+ */
+TEST(StatsPath, BasicValidEmptyStatsPath) {
+    // Initialize histogram buckets.
+    constexpr double numDocs = 0.0;
+    std::vector<Bucket> buckets;
+
+    // Initialize histogram bounds.
+    auto [boundsTag, boundsVal] = sbe::value::makeNewArray();
+    sbe::value::ValueGuard boundsGuard{boundsTag, boundsVal};
+    auto bounds = sbe::value::getArrayView(boundsVal);
+
+    // Create an empty scalar histogram.
+    TypeCounts tc;
+    ScalarHistogram sh(*bounds, buckets);
+    ArrayHistogram ah(sh, tc);
+
+    // Serialize to BSON.
+    auto serializedPath = stats::makeStatsPath("someEmptyPath", numDocs, ah);
+
+    // Parse StatsPath via IDL & serialize to BSON.
+    auto parsedPath = StatsPath::parse(ctx, serializedPath);
+    auto parsedPathToBSON = parsedPath.toBSON();
+
+    // We should end up with the same serialized BSON in the end.
+    ASSERT_BSONOBJ_EQ(serializedPath, parsedPathToBSON);
+}
+
+}  // namespace
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/value_utils.cpp b/src/mongo/db/query/stats/value_utils.cpp
new file mode 100644
index 00000000000..5af0f1c248c
--- /dev/null
+++ b/src/mongo/db/query/stats/value_utils.cpp
@@ -0,0 +1,252 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/stats/value_utils.h"
+
+#include "mongo/db/query/stats/scalar_histogram.h"
+
+namespace mongo::stats {
+namespace value = sbe::value;
+
+SBEValue::SBEValue(value::TypeTags tag, value::Value val) : _tag(tag), _val(val) {}
+
+SBEValue::SBEValue(std::pair<value::TypeTags, value::Value> v) : SBEValue(v.first, v.second) {}
+
+SBEValue::SBEValue(const SBEValue& other) {
+    auto [tag, val] = copyValue(other._tag, other._val);
+    _tag = tag;
+    _val = val;
+}
+
+SBEValue::SBEValue(SBEValue&& other) {
+    _tag = other._tag;
+    _val = other._val;
+
+    other._tag = value::TypeTags::Nothing;
+    other._val = 0;
+}
+
+SBEValue::~SBEValue() {
+    value::releaseValue(_tag, _val);
+}
+
+SBEValue& SBEValue::operator=(const SBEValue& other) {
+    value::releaseValue(_tag, _val);
+
+    auto [tag, val] = copyValue(other._tag, other._val);
+    _tag = tag;
+    _val = val;
+    return *this;
+}
+
+SBEValue& SBEValue::operator=(SBEValue&& other) {
+    value::releaseValue(_tag, _val);
+
+    _tag = other._tag;
+    _val = other._val;
+
+    other._tag = value::TypeTags::Nothing;
+    other._val = 0;
+
+    return *this;
+}
+
+std::pair<value::TypeTags, value::Value> SBEValue::get() const {
+    return std::make_pair(_tag, _val);
+}
+
+value::TypeTags SBEValue::getTag() const {
+    return _tag;
+}
+
+value::Value SBEValue::getValue() const {
+    return _val;
+}
+
+std::pair<value::TypeTags, value::Value> makeInt64Value(int v) {
+    return std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(v));
+};
+
+std::pair<value::TypeTags, value::Value> makeNullValue() {
+    return std::make_pair(value::TypeTags::Null, 0);
+};
+
+bool sameTypeClass(value::TypeTags tag1, value::TypeTags tag2) {
+    if (tag1 == tag2) {
+        return true;
+    }
+
+    static constexpr const char* kTempFieldName = "temp";
+
+    BSONObjBuilder minb1;
+    minb1.appendMinForType(kTempFieldName, value::tagToType(tag1));
+    const BSONObj min1 = minb1.obj();
+
+    BSONObjBuilder minb2;
+    minb2.appendMinForType(kTempFieldName, value::tagToType(tag2));
+    const BSONObj min2 = minb2.obj();
+
+    return min1.woCompare(min2) == 0;
+}
+
+bool sameTypeBracket(value::TypeTags tag1, value::TypeTags tag2) {
+    if (tag1 == tag2) {
+        return true;
+    }
+    return ((value::isNumber(tag1) && value::isNumber(tag2)) ||
+            (value::isString(tag1) && value::isString(tag2)));
+}
+
+int32_t compareValues(value::TypeTags tag1,
+                      value::Value val1,
+                      value::TypeTags tag2,
+                      value::Value val2) {
+    const auto [compareTag, compareVal] = value::compareValue(tag1, val1, tag2, val2);
+    uassert(6660547, "Invalid comparison result", compareTag == value::TypeTags::NumberInt32);
+    return value::bitcastTo<int32_t>(compareVal);
+}
+
+void sortValueVector(std::vector<SBEValue>& sortVector) {
+    const auto cmp = [](const SBEValue& a, const SBEValue& b) {
+        return compareValues(a.getTag(), a.getValue(), b.getTag(), b.getValue()) < 0;
+    };
+    std::sort(sortVector.begin(), sortVector.end(), cmp);
+}
+
+double valueToDouble(value::TypeTags tag, value::Value val) {
+    double result = 0;
+    if (value::isNumber(tag)) {
+        result = value::numericCast<double>(tag, val);
+    } else if (value::isString(tag)) {
+        const StringData sd = value::getStringView(tag, val);
+
+        // Convert a prefix of the string to a double.
+        const size_t maxPrecision = std::min(sd.size(), sizeof(double));
+        for (size_t i = 0; i < maxPrecision; ++i) {
+            const char ch = sd[i];
+            const double charToDbl = ch / std::pow(2, i * 8);
+            result += charToDbl;
+        }
+    } else if (tag == value::TypeTags::Date || tag == value::TypeTags::Timestamp) {
+        int64_t v = value::bitcastTo<int64_t>(val);
+        result = value::numericCast<double>(value::TypeTags::NumberInt64, v);
+
+    } else if (tag == value::TypeTags::ObjectId) {
+        auto objView =
+            ConstDataView(reinterpret_cast<const char*>(sbe::value::getObjectIdView(val)->data()));
+        // Take the first 8 bytes of the ObjectId.
+        // ToDo: consider using the entire ObjectId or other parts of it
+        // 	 auto v = objView.read<LittleEndian<uint64_t>>(sizeof(uint32_t));
+        auto v = objView.read<LittleEndian<uint64_t>>();
+        result = value::numericCast<double>(value::TypeTags::NumberInt64, v);
+    } else {
+        uassert(6844500, "Unexpected value type", false);
+    }
+
+    return result;
+}
+
+bool canEstimateTypeViaHistogram(value::TypeTags tag) {
+    if (sbe::value::isNumber(tag) || value::isString(tag)) {
+        return true;
+    }
+
+    switch (tag) {
+        // Other types that we can/do build histograms on:
+        // - Date/time types.
+        case value::TypeTags::Date:
+        case value::TypeTags::Timestamp:
+        // - ObjectId.
+        case value::TypeTags::ObjectId:
+            return true;
+
+        // Types that can only be estimated via the type-counters.
+        case value::TypeTags::Object:
+        case value::TypeTags::Array:
+        case value::TypeTags::Null:
+        case value::TypeTags::Nothing:
+        case value::TypeTags::Boolean:
+            return false;
+
+        // Trying to estimate any other types should result in an error.
+        default:
+            uasserted(7051100,
+                      str::stream()
+                          << "Type " << tag << " is not supported by histogram estimation.");
+    }
+
+    MONGO_UNREACHABLE;
+}
+
+std::string serialize(value::TypeTags tag) {
+    std::ostringstream os;
+    os << tag;
+    return os.str();
+}
+
+// TODO: does this belong in SBE value utils?
+value::TypeTags deserialize(const std::string& name) {
+    if ("NumberInt32" == name) {
+        return value::TypeTags::NumberInt32;
+    } else if ("NumberInt64" == name) {
+        return value::TypeTags::NumberInt64;
+    } else if ("NumberDecimal" == name) {
+        return value::TypeTags::NumberDecimal;
+    } else if ("NumberDouble" == name) {
+        return value::TypeTags::NumberDouble;
+    } else if ("StringBig" == name) {
+        return value::TypeTags::StringBig;
+    } else if ("StringSmall" == name) {
+        return value::TypeTags::StringSmall;
+    } else if ("bsonString" == name) {
+        return value::TypeTags::bsonString;
+    } else if ("Date" == name) {
+        return value::TypeTags::Date;
+    } else if ("Timestamp" == name) {
+        return value::TypeTags::Timestamp;
+    } else if ("ObjectId" == name) {
+        return value::TypeTags::ObjectId;
+    } else if ("Object" == name) {
+        return value::TypeTags::Object;
+    } else if ("Boolean" == name) {
+        return value::TypeTags::Boolean;
+    } else if ("Array" == name) {
+        return value::TypeTags::Array;
+    } else if ("Null" == name) {
+        return value::TypeTags::Null;
+    } else if ("Nothing" == name) {
+        return value::TypeTags::Nothing;
+    }
+
+    // Trying to deserialize any other types should result in an error.
+    uasserted(6660600,
+              str::stream() << "String " << name << " is not convertable to SBE type tag.");
+}
+
+}  // namespace mongo::stats
diff --git a/src/mongo/db/query/stats/value_utils.h b/src/mongo/db/query/stats/value_utils.h
new file mode 100644
index 00000000000..d79417ea724
--- /dev/null
+++ b/src/mongo/db/query/stats/value_utils.h
@@ -0,0 +1,120 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/exec/sbe/values/value.h"
+
+namespace mongo::stats {
+/**
+    Container object for SBE value/tag pairs. Supplied values are owned by this object
+    and are released on destruction
+*/
+class SBEValue {
+public:
+    SBEValue(sbe::value::TypeTags tag, sbe::value::Value val);
+    SBEValue(std::pair<sbe::value::TypeTags, sbe::value::Value> v);
+    ~SBEValue();
+
+    SBEValue(const SBEValue& other);
+    SBEValue(SBEValue&& other);
+
+    SBEValue& operator=(const SBEValue& other);
+    SBEValue& operator=(SBEValue&& other);
+
+    std::pair<sbe::value::TypeTags, sbe::value::Value> get() const;
+    sbe::value::TypeTags getTag() const;
+    sbe::value::Value getValue() const;
+
+private:
+    sbe::value::TypeTags _tag;
+    sbe::value::Value _val;
+};
+
+/**
+    Generate an SBE Value pair that represents the supplied int with
+    type Int64
+*/
+std::pair<sbe::value::TypeTags, sbe::value::Value> makeInt64Value(int v);
+
+/**
+    Generate an SBE Value pair representing a BSON null value
+*/
+std::pair<sbe::value::TypeTags, sbe::value::Value> makeNullValue();
+
+/**
+    Do the supplied type tags represent the same BSON type?
+*/
+bool sameTypeClass(sbe::value::TypeTags tag1, sbe::value::TypeTags tag2);
+
+/**
+    Do the supplied type tags represent the same BSON type?
+    TODO: This may be the same as sameTypeClass. @timourk?
+*/
+bool sameTypeBracket(sbe::value::TypeTags tag1, sbe::value::TypeTags tag2);
+
+/**
+    Compare a pair of SBE values.
+
+    The return will be
+        <0 if val1 < val2 in BSON order
+        0 if val1 == val2 in BSON order
+        >0 if val1 > val2 in BSON order
+*/
+int32_t compareValues(sbe::value::TypeTags tag1,
+                      sbe::value::Value val1,
+                      sbe::value::TypeTags tag2,
+                      sbe::value::Value val2);
+
+/**
+    Sort a vector of values in place in BSON order
+*/
+void sortValueVector(std::vector<SBEValue>& sortVector);
+
+/**
+    Convert a value of any supported type into a double according to some metric. This
+    metric will be consistent with ordering in the type.
+*/
+double valueToDouble(sbe::value::TypeTags tag, sbe::value::Value val);
+
+/**
+ * Returns true for types that can be estimated via histograms, and false for types that need type
+ * counters. Any other type results in a uassert.
+ *
+ * NOTE: This should be kept in sync with 'valueToDouble' above.
+ */
+bool canEstimateTypeViaHistogram(sbe::value::TypeTags tag);
+
+/**
+ * Serialize/Deserialize a TypeTag to a string for TypeCount storage in the stats collection.
+ */
+std::string serialize(sbe::value::TypeTags tag);
+sbe::value::TypeTags deserialize(const std::string& name);
+
+}  // namespace mongo::stats
-- 
cgit v1.2.1