summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMilena Ivanova <milena.ivanova@mongodb.com>2022-12-12 10:54:53 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-12-12 11:28:43 +0000
commit21aba5b624e0371869a453f89ad1b3711ee720b0 (patch)
tree9d9dd03c7ecf453cf51310282287a760e8eccd91
parent20913e4b839e108613a711b60698bc515eb2b2ac (diff)
downloadmongo-21aba5b624e0371869a453f89ad1b3711ee720b0.tar.gz
SERVER-71989 Implement CE accuracy testing framework based on JS testing
-rw-r--r--jstests/cqf/analyze/array_histogram.js2
-rw-r--r--jstests/cqf/analyze/ce_histogram.js2
-rw-r--r--jstests/cqf/analyze/missing_histogram.js2
-rw-r--r--jstests/cqf/analyze/type_counts.js6
-rw-r--r--jstests/libs/ce_stats_utils.js21
-rw-r--r--jstests/libs/optimizer_utils.js15
-rw-r--r--jstests/query_golden/ce_accuracy.js167
-rw-r--r--jstests/query_golden/expected_output/ce_accuracy470
-rw-r--r--jstests/query_golden/libs/ce_data.js35
-rw-r--r--jstests/query_golden/libs/compute_errors.js29
-rw-r--r--jstests/query_golden/libs/generate_queries.js74
11 files changed, 810 insertions, 13 deletions
diff --git a/jstests/cqf/analyze/array_histogram.js b/jstests/cqf/analyze/array_histogram.js
index 23277ab08ba..0ee10af9ebf 100644
--- a/jstests/cqf/analyze/array_histogram.js
+++ b/jstests/cqf/analyze/array_histogram.js
@@ -112,7 +112,7 @@ runHistogramsTest(function verifyArrayHistograms() {
createAndValidateHistogram({coll, expectedHistogram});
// Verify CE.
- forceHistogramCE();
+ forceCE("histogram");
// Equality predicates.
verifyCEForMatch(
diff --git a/jstests/cqf/analyze/ce_histogram.js b/jstests/cqf/analyze/ce_histogram.js
index c9f2691b887..cb27f55b3f9 100644
--- a/jstests/cqf/analyze/ce_histogram.js
+++ b/jstests/cqf/analyze/ce_histogram.js
@@ -187,7 +187,7 @@ function verifyCEForNDV(ndv) {
};
// Verify CE for all distinct values of each field across multiple types.
- forceHistogramCE();
+ forceCE("histogram");
for (const field of fields) {
jsTestLog(`Testing histogram for ndv ${ndv} and field ${field}`);
diff --git a/jstests/cqf/analyze/missing_histogram.js b/jstests/cqf/analyze/missing_histogram.js
index 7349337dd1d..57d16836883 100644
--- a/jstests/cqf/analyze/missing_histogram.js
+++ b/jstests/cqf/analyze/missing_histogram.js
@@ -29,7 +29,7 @@ runHistogramsTest(function testEmptyAndMissingHistograms() {
// Ensure we have no histogram.
assert.eq(db.system.statistics[noHistogramColl.getName()].count(), 0);
- forceHistogramCE();
+ forceCE("histogram");
// We actually use heuristics because we don't generate histograms for empty collections;
// however, our estimate should still be 0, because the collection is empty.
diff --git a/jstests/cqf/analyze/type_counts.js b/jstests/cqf/analyze/type_counts.js
index b43a37dad55..d3a25eb85b9 100644
--- a/jstests/cqf/analyze/type_counts.js
+++ b/jstests/cqf/analyze/type_counts.js
@@ -109,7 +109,7 @@ runHistogramsTest(function testTypeCounts() {
});
// Verify type count CE.
- forceHistogramCE();
+ forceCE("histogram");
let hint = {a: 1};
// TODO SERVER-70936: estimate boolean counts.
@@ -352,7 +352,7 @@ runHistogramsTest(function testTypeCounts() {
});
// Verify type count CE.
- forceHistogramCE();
+ forceCE("histogram");
hint = {"a.b": 1};
// Test CE for histogrammable types.
@@ -551,7 +551,7 @@ runHistogramsTest(function testTypeCounts() {
// Verify type count CE. Note that for non-$elemMatch preidcates, we include both array and
// scalar type-counts, while for $elemMatch predicates, we include only array type counts in
// our estimate.
- forceHistogramCE();
+ forceCE("histogram");
hint = {a: 1};
// TODO SERVER-70936: estimate boolean counts.
diff --git a/jstests/libs/ce_stats_utils.js b/jstests/libs/ce_stats_utils.js
index fdef187007a..48f8ec77f0c 100644
--- a/jstests/libs/ce_stats_utils.js
+++ b/jstests/libs/ce_stats_utils.js
@@ -126,12 +126,19 @@ function runHistogramsTest(test) {
}
/**
- * We need to set the CE query knob to use histograms and force the use of the new optimizer to
- * ensure that we use histograms to estimate CE.
+ * Creates a single-field index for each field in the 'fields' array.
*/
-function forceHistogramCE() {
- assert.commandWorked(
- db.adminCommand({setParameter: 1, internalQueryCardinalityEstimatorMode: "histogram"}));
- assert.commandWorked(
- db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "forceBonsai"}));
+function createIndexes(coll, fields) {
+ for (const field of fields) {
+ assert.commandWorked(coll.createIndex({[field]: 1}));
+ }
+}
+
+/**
+ * Creates statistics for each field in the 'fields' array.
+ */
+function analyzeFields(coll, fields) {
+ for (const field of fields) {
+ assert.commandWorked(db.runCommand({analyze: coll.getName(), key: field}));
+ }
}
diff --git a/jstests/libs/optimizer_utils.js b/jstests/libs/optimizer_utils.js
index bb363a6a54a..4f7b4ca7c3d 100644
--- a/jstests/libs/optimizer_utils.js
+++ b/jstests/libs/optimizer_utils.js
@@ -324,3 +324,18 @@ function runWithParams(keyValPairs, fn) {
}
}
}
+
+function round2(n) {
+ return (Math.round(n * 100) / 100);
+}
+
+/**
+ * Force cardinality estimation mode: "histogram", "heuristic", or "sampling". We need to force the
+ * use of the new optimizer.
+ */
+function forceCE(mode) {
+ assert.commandWorked(
+ db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "forceBonsai"}));
+ assert.commandWorked(
+ db.adminCommand({setParameter: 1, internalQueryCardinalityEstimatorMode: mode}));
+}
diff --git a/jstests/query_golden/ce_accuracy.js b/jstests/query_golden/ce_accuracy.js
new file mode 100644
index 00000000000..db011020a52
--- /dev/null
+++ b/jstests/query_golden/ce_accuracy.js
@@ -0,0 +1,167 @@
+/**
+ * Tests for cardinality estimation accuracy.
+ * @tags: [
+ * requires_cqf,
+ * ]
+ */
+
+(function() {
+
+load("jstests/libs/ce_stats_utils.js");
+load("jstests/libs/optimizer_utils.js");
+load("jstests/query_golden/libs/ce_data.js");
+load("jstests/query_golden/libs/compute_errors.js");
+load("jstests/query_golden/libs/generate_queries.js");
+
+runHistogramsTest(function testCEAccuracy() {
+ // Flag to show more information for debugging purposes:
+ // - query results;
+ // - execution of sampling CE strategy.
+ const ceDebugFlag = false;
+ let ceStrategies = ["heuristic", "histogram"];
+ if (ceDebugFlag) {
+ ceStrategies.push("sampling");
+ }
+
+ const coll = db.ce_accuracy;
+ coll.drop();
+
+ // Collection to store accuracy errors.
+ const errorColl = db.ce_errors;
+ errorColl.drop();
+
+ // Stats collection.
+ const statsColl = db.system.statistics.ce_accuracy;
+
+ jsTestLog("Populating collection");
+ assert.commandWorked(coll.insertMany(getCEDocs()));
+ const collSize = coll.find().itcount();
+ print(`Collection count: ${coll.find().itcount()}\n`);
+ if (ceDebugFlag) {
+ show(coll.find().limit(100).toArray());
+ }
+
+ // Switch to 'tryBonsai' to create statistics.
+ assert.commandWorked(
+ db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"}));
+ // Create indexes and statistics.
+ const fields = ["a", "b", "c"];
+ createIndexes(coll, fields);
+ analyzeFields(coll, fields);
+ for (const field of fields) {
+ let stats = statsColl.find({"_id": field})[0];
+ assert.eq(stats["statistics"]["documents"], collSize);
+ }
+
+ function runAggregationWithCE(testcase, strategy) {
+ if (testcase["nReturned"] == null) {
+ var explain = coll.explain("executionStats").aggregate(testcase.pipeline);
+ testcase["nReturned"] = explain.executionStats.nReturned;
+ } else {
+ var explain = coll.explain().aggregate(testcase.pipeline);
+ }
+ testcase[strategy] = round2(getRootCE(explain));
+ }
+
+ // Run all queries in the array testCases with all CE strategies.
+ function runQueries(testCases) {
+ ceStrategies.forEach(function(strategy) {
+ forceCE(strategy);
+ testCases.forEach(testCase => runAggregationWithCE(testCase, strategy));
+ });
+ }
+
+ function computeStrategyErrors(testcase, strategy, collSize) {
+ let errorDoc = {};
+ const absError = testcase[strategy] - testcase.nReturned;
+ let relError = 0.0;
+ if (testcase.nReturned > 0) {
+ relError = absError / testcase.nReturned;
+ } else if (testcase[strategy] > 0) {
+ // We cannot compute the relative error by division by zero. Take 10% of the absolute
+ // error instead.
+ relError = 0.1 * testcase[strategy];
+ }
+
+ // Selectivity error wrt collection size.
+ const selError = 100.0 * absError / collSize;
+ errorDoc["absError"] = round2(absError);
+ errorDoc["relError"] = round2(relError);
+ errorDoc["selError"] = round2(selError);
+ return errorDoc;
+ }
+
+ function computeAndPrintErrors(testcase, collSize) {
+ let errorDoc = {_id: testcase._id};
+
+ ceStrategies.forEach(function(strategy) {
+ errors = computeStrategyErrors(testcase, strategy, collSize);
+ errorDoc[strategy] = errors;
+ print(`${strategy}: ${testcase[strategy]} `);
+ print(`AbsError: ${errors["absError"]}, RelError: ${errors["relError"]} , SelError: ${
+ errors["selError"]}%\n`);
+ });
+ return errorDoc;
+ }
+
+ // Queries.
+ // Defined as documents: { _id: 1, pipeline: [{$match: {a: {$gt: 16}}}] }
+ let values = selectQueryValues(coll, "a");
+ let testCasesInt = generateComparisons("a", values);
+ let testCasesStr = generateComparisons("b", ["", genRandomString(10), genRandomString(15)]);
+ let testCasesArr = generateRangePredicates("c", [
+ [2, 4],
+ ]);
+ let testCases = testCasesInt.concat(testCasesStr).concat(testCasesArr);
+
+ let i = 0;
+ for (let query of testCases) {
+ query["_id"] = i++;
+ }
+
+ runQueries(testCases);
+
+ // Compute errors per query.
+ for (const testcase of testCases) {
+ jsTestLog(`Query ${testcase._id}: ${tojsononeline(testcase.pipeline)}`);
+ if (ceDebugFlag)
+ show(coll.aggregate(testcase.pipeline));
+ print(`Actual cardinality: ${testcase.nReturned}\n`);
+ print(`Cardinality estimates:\n`);
+ let errorDoc = computeAndPrintErrors(testcase, collSize);
+ errorColl.insert(errorDoc);
+ }
+
+ if (ceDebugFlag) {
+ show(testCases);
+ show(errorColl.find());
+ }
+
+ // Switch to 'forceClassicEngine' to run the pipelines computing the errors.
+ assert.commandWorked(
+ db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "forceClassicEngine"}));
+
+ // Compute aggregate errors for all CE strategies across all queries.
+ const trialSize = errorColl.find().itcount();
+
+ print(`\nHeuristic mean errors: `);
+ print(`absRMSE: ${computeRMSE(errorColl, "$heuristic.absError", trialSize)}, `);
+ print(`relRMSE: ${computeRMSE(errorColl, "$heuristic.relError", trialSize)}, `);
+ print(
+ `meanAbsSelErr: ${computeMeanAbsSelError(errorColl, "$heuristic.selError", trialSize)}%\n`);
+
+ print(`Histogram mean errors: `);
+ print(`absRMSE: ${computeRMSE(errorColl, "$histogram.absError", trialSize)}, `);
+ print(`relRMSE: ${computeRMSE(errorColl, "$histogram.relError", trialSize)}, `);
+ print(
+ `meanAbsSelErr: ${computeMeanAbsSelError(errorColl, "$histogram.selError", trialSize)}%\n`);
+
+ if (ceDebugFlag) {
+ print(`Sampling mean errors: `);
+ print(`absRMSE: ${computeRMSE(errorColl, "$sampling.absError", trialSize)}, `);
+ print(`relRMSE: ${computeRMSE(errorColl, "$sampling.relError", trialSize)}, `);
+ print(`meanAbsSelErr: ${
+ computeMeanAbsSelError(errorColl, "$sampling.selError", trialSize)}%\n`);
+ }
+});
+})();
diff --git a/jstests/query_golden/expected_output/ce_accuracy b/jstests/query_golden/expected_output/ce_accuracy
new file mode 100644
index 00000000000..518993f6b99
--- /dev/null
+++ b/jstests/query_golden/expected_output/ce_accuracy
@@ -0,0 +1,470 @@
+
+
+[jsTest] ----
+[jsTest] Populating collection
+[jsTest] ----
+
+Collection count: 10
+
+
+[jsTest] ----
+[jsTest] Query 0: [ { "$match" : { "a" : { "$eq" : 12 } } } ]
+[jsTest] ----
+
+Actual cardinality: 1
+Cardinality estimates:
+heuristic: 3.16 AbsError: 2.16, RelError: 2.16 , SelError: 21.6%
+histogram: 1 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 1: [ { "$match" : { "a" : { "$eq" : 15 } } } ]
+[jsTest] ----
+
+Actual cardinality: 1
+Cardinality estimates:
+heuristic: 3.16 AbsError: 2.16, RelError: 2.16 , SelError: 21.6%
+histogram: 1 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 2: [ { "$match" : { "a" : { "$eq" : 18 } } } ]
+[jsTest] ----
+
+Actual cardinality: 1
+Cardinality estimates:
+heuristic: 3.16 AbsError: 2.16, RelError: 2.16 , SelError: 21.6%
+histogram: 1 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 3: [ { "$match" : { "a" : { "$lt" : 12 } } } ]
+[jsTest] ----
+
+Actual cardinality: 2
+Cardinality estimates:
+heuristic: 5 AbsError: 3, RelError: 1.5 , SelError: 30%
+histogram: 2 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 4: [ { "$match" : { "a" : { "$lt" : 15 } } } ]
+[jsTest] ----
+
+Actual cardinality: 5
+Cardinality estimates:
+heuristic: 5 AbsError: 0, RelError: 0 , SelError: 0%
+histogram: 5 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 5: [ { "$match" : { "a" : { "$lt" : 18 } } } ]
+[jsTest] ----
+
+Actual cardinality: 8
+Cardinality estimates:
+heuristic: 5 AbsError: -3, RelError: -0.37 , SelError: -30%
+histogram: 8 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 6: [ { "$match" : { "a" : { "$lte" : 12 } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 5 AbsError: 2, RelError: 0.67 , SelError: 20%
+histogram: 3 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 7: [ { "$match" : { "a" : { "$lte" : 15 } } } ]
+[jsTest] ----
+
+Actual cardinality: 6
+Cardinality estimates:
+heuristic: 5 AbsError: -1, RelError: -0.17 , SelError: -10%
+histogram: 6 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 8: [ { "$match" : { "a" : { "$lte" : 18 } } } ]
+[jsTest] ----
+
+Actual cardinality: 9
+Cardinality estimates:
+heuristic: 5 AbsError: -4, RelError: -0.44 , SelError: -40%
+histogram: 9 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 9: [ { "$match" : { "a" : { "$gt" : 12 } } } ]
+[jsTest] ----
+
+Actual cardinality: 7
+Cardinality estimates:
+heuristic: 7 AbsError: 0, RelError: 0 , SelError: 0%
+histogram: 7 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 10: [ { "$match" : { "a" : { "$gt" : 15 } } } ]
+[jsTest] ----
+
+Actual cardinality: 4
+Cardinality estimates:
+heuristic: 7 AbsError: 3, RelError: 0.75 , SelError: 30%
+histogram: 4 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 11: [ { "$match" : { "a" : { "$gt" : 18 } } } ]
+[jsTest] ----
+
+Actual cardinality: 1
+Cardinality estimates:
+heuristic: 7 AbsError: 6, RelError: 6 , SelError: 60%
+histogram: 1 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 12: [ { "$match" : { "a" : { "$gte" : 12 } } } ]
+[jsTest] ----
+
+Actual cardinality: 8
+Cardinality estimates:
+heuristic: 7 AbsError: -1, RelError: -0.12 , SelError: -10%
+histogram: 8 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 13: [ { "$match" : { "a" : { "$gte" : 15 } } } ]
+[jsTest] ----
+
+Actual cardinality: 5
+Cardinality estimates:
+heuristic: 7 AbsError: 2, RelError: 0.4 , SelError: 20%
+histogram: 5 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 14: [ { "$match" : { "a" : { "$gte" : 18 } } } ]
+[jsTest] ----
+
+Actual cardinality: 2
+Cardinality estimates:
+heuristic: 7 AbsError: 5, RelError: 2.5 , SelError: 50%
+histogram: 2 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 15: [ { "$match" : { "b" : { "$eq" : "" } } } ]
+[jsTest] ----
+
+Actual cardinality: 0
+Cardinality estimates:
+heuristic: 3.16 AbsError: 3.16, RelError: 0.32 , SelError: 31.6%
+histogram: 0 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 16: [ { "$match" : { "b" : { "$eq" : "mno" } } } ]
+[jsTest] ----
+
+Actual cardinality: 0
+Cardinality estimates:
+heuristic: 3.16 AbsError: 3.16, RelError: 0.32 , SelError: 31.6%
+histogram: 0 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 17: [ { "$match" : { "b" : { "$eq" : "stuv" } } } ]
+[jsTest] ----
+
+Actual cardinality: 0
+Cardinality estimates:
+heuristic: 3.16 AbsError: 3.16, RelError: 0.32 , SelError: 31.6%
+histogram: 0 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 18: [ { "$match" : { "b" : { "$lt" : "" } } } ]
+[jsTest] ----
+
+Actual cardinality: 0
+Cardinality estimates:
+heuristic: 0 AbsError: 0, RelError: 0 , SelError: 0%
+histogram: 0 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 19: [ { "$match" : { "b" : { "$lt" : "mno" } } } ]
+[jsTest] ----
+
+Actual cardinality: 4
+Cardinality estimates:
+heuristic: 5 AbsError: 1, RelError: 0.25 , SelError: 10%
+histogram: 4 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 20: [ { "$match" : { "b" : { "$lt" : "stuv" } } } ]
+[jsTest] ----
+
+Actual cardinality: 7
+Cardinality estimates:
+heuristic: 5 AbsError: -2, RelError: -0.29 , SelError: -20%
+histogram: 7 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 21: [ { "$match" : { "b" : { "$lte" : "" } } } ]
+[jsTest] ----
+
+Actual cardinality: 0
+Cardinality estimates:
+heuristic: 3.16 AbsError: 3.16, RelError: 0.32 , SelError: 31.6%
+histogram: 0 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 22: [ { "$match" : { "b" : { "$lte" : "mno" } } } ]
+[jsTest] ----
+
+Actual cardinality: 4
+Cardinality estimates:
+heuristic: 5 AbsError: 1, RelError: 0.25 , SelError: 10%
+histogram: 4 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 23: [ { "$match" : { "b" : { "$lte" : "stuv" } } } ]
+[jsTest] ----
+
+Actual cardinality: 7
+Cardinality estimates:
+heuristic: 5 AbsError: -2, RelError: -0.29 , SelError: -20%
+histogram: 7 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 24: [ { "$match" : { "b" : { "$gt" : "" } } } ]
+[jsTest] ----
+
+Actual cardinality: 10
+Cardinality estimates:
+heuristic: 7 AbsError: -3, RelError: -0.3 , SelError: -30%
+histogram: 10 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 25: [ { "$match" : { "b" : { "$gt" : "mno" } } } ]
+[jsTest] ----
+
+Actual cardinality: 6
+Cardinality estimates:
+heuristic: 7 AbsError: 1, RelError: 0.17 , SelError: 10%
+histogram: 6 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 26: [ { "$match" : { "b" : { "$gt" : "stuv" } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 7 AbsError: 4, RelError: 1.33 , SelError: 40%
+histogram: 3 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 27: [ { "$match" : { "b" : { "$gte" : "" } } } ]
+[jsTest] ----
+
+Actual cardinality: 10
+Cardinality estimates:
+heuristic: 7 AbsError: -3, RelError: -0.3 , SelError: -30%
+histogram: 10 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 28: [ { "$match" : { "b" : { "$gte" : "mno" } } } ]
+[jsTest] ----
+
+Actual cardinality: 6
+Cardinality estimates:
+heuristic: 7 AbsError: 1, RelError: 0.17 , SelError: 10%
+histogram: 6 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 29: [ { "$match" : { "b" : { "$gte" : "stuv" } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 7 AbsError: 4, RelError: 1.33 , SelError: 40%
+histogram: 3 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 30: [ { "$match" : { "c" : { "$gt" : 2, "$lt" : 4 } } } ]
+[jsTest] ----
+
+Actual cardinality: 2
+Cardinality estimates:
+heuristic: 4.18 AbsError: 2.18, RelError: 1.09 , SelError: 21.8%
+histogram: 2.85 AbsError: 0.85, RelError: 0.43 , SelError: 8.5%
+
+
+[jsTest] ----
+[jsTest] Query 31: [ { "$match" : { "c" : { "$elemMatch" : { "$gt" : 2, "$lt" : 4 } } } } ]
+[jsTest] ----
+
+Actual cardinality: 2
+Cardinality estimates:
+heuristic: 4.18 AbsError: 2.18, RelError: 1.09 , SelError: 21.8%
+histogram: 1.65 AbsError: -0.35, RelError: -0.18 , SelError: -3.5%
+
+
+[jsTest] ----
+[jsTest] Query 32: [ { "$match" : { "c" : { "$gt" : 2, "$lte" : 4 } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 4.18 AbsError: 1.18, RelError: 0.39 , SelError: 11.8%
+histogram: 3.79 AbsError: 0.79, RelError: 0.26 , SelError: 7.9%
+
+
+[jsTest] ----
+[jsTest] Query 33: [ { "$match" : { "c" : { "$elemMatch" : { "$gt" : 2, "$lte" : 4 } } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 4.18 AbsError: 1.18, RelError: 0.39 , SelError: 11.8%
+histogram: 4.13 AbsError: 1.13, RelError: 0.38 , SelError: 11.3%
+
+
+[jsTest] ----
+[jsTest] Query 34: [ { "$match" : { "c" : { "$gt" : 2, "$eq" : 4 } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 2.65 AbsError: -0.35, RelError: -0.12 , SelError: -3.5%
+histogram: 2.85 AbsError: -0.15, RelError: -0.05 , SelError: -1.5%
+
+
+[jsTest] ----
+[jsTest] Query 35: [ { "$match" : { "c" : { "$elemMatch" : { "$gt" : 2, "$eq" : 4 } } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 2.65 AbsError: -0.35, RelError: -0.12 , SelError: -3.5%
+histogram: 3 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 36: [ { "$match" : { "c" : { "$gte" : 2, "$lt" : 4 } } } ]
+[jsTest] ----
+
+Actual cardinality: 2
+Cardinality estimates:
+heuristic: 4.18 AbsError: 2.18, RelError: 1.09 , SelError: 21.8%
+histogram: 2.85 AbsError: 0.85, RelError: 0.43 , SelError: 8.5%
+
+
+[jsTest] ----
+[jsTest] Query 37: [ { "$match" : { "c" : { "$elemMatch" : { "$gte" : 2, "$lt" : 4 } } } } ]
+[jsTest] ----
+
+Actual cardinality: 2
+Cardinality estimates:
+heuristic: 4.18 AbsError: 2.18, RelError: 1.09 , SelError: 21.8%
+histogram: 2.48 AbsError: 0.48, RelError: 0.24 , SelError: 4.8%
+
+
+[jsTest] ----
+[jsTest] Query 38: [ { "$match" : { "c" : { "$gte" : 2, "$lte" : 4 } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 4.18 AbsError: 1.18, RelError: 0.39 , SelError: 11.8%
+histogram: 3.79 AbsError: 0.79, RelError: 0.26 , SelError: 7.9%
+
+
+[jsTest] ----
+[jsTest] Query 39: [ { "$match" : { "c" : { "$elemMatch" : { "$gte" : 2, "$lte" : 4 } } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 4.18 AbsError: 1.18, RelError: 0.39 , SelError: 11.8%
+histogram: 4.96 AbsError: 1.96, RelError: 0.65 , SelError: 19.6%
+
+
+[jsTest] ----
+[jsTest] Query 40: [ { "$match" : { "c" : { "$gte" : 2, "$eq" : 4 } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 2.65 AbsError: -0.35, RelError: -0.12 , SelError: -3.5%
+histogram: 2.85 AbsError: -0.15, RelError: -0.05 , SelError: -1.5%
+
+
+[jsTest] ----
+[jsTest] Query 41: [ { "$match" : { "c" : { "$elemMatch" : { "$gte" : 2, "$eq" : 4 } } } } ]
+[jsTest] ----
+
+Actual cardinality: 3
+Cardinality estimates:
+heuristic: 2.65 AbsError: -0.35, RelError: -0.12 , SelError: -3.5%
+histogram: 3 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 42: [ { "$match" : { "c" : { "$eq" : 2, "$lt" : 4 } } } ]
+[jsTest] ----
+
+Actual cardinality: 1
+Cardinality estimates:
+heuristic: 2.24 AbsError: 1.24, RelError: 1.24 , SelError: 12.4%
+histogram: 0.55 AbsError: -0.45, RelError: -0.45 , SelError: -4.5%
+
+
+[jsTest] ----
+[jsTest] Query 43: [ { "$match" : { "c" : { "$elemMatch" : { "$eq" : 2, "$lt" : 4 } } } } ]
+[jsTest] ----
+
+Actual cardinality: 1
+Cardinality estimates:
+heuristic: 2.65 AbsError: 1.65, RelError: 1.65 , SelError: 16.5%
+histogram: 1 AbsError: 0, RelError: 0 , SelError: 0%
+
+
+[jsTest] ----
+[jsTest] Query 44: [ { "$match" : { "c" : { "$eq" : 2, "$lte" : 4 } } } ]
+[jsTest] ----
+
+Actual cardinality: 1
+Cardinality estimates:
+heuristic: 2.24 AbsError: 1.24, RelError: 1.24 , SelError: 12.4%
+histogram: 0.63 AbsError: -0.37, RelError: -0.37 , SelError: -3.7%
+
+
+[jsTest] ----
+[jsTest] Query 45: [ { "$match" : { "c" : { "$elemMatch" : { "$eq" : 2, "$lte" : 4 } } } } ]
+[jsTest] ----
+
+Actual cardinality: 1
+Cardinality estimates:
+heuristic: 2.65 AbsError: 1.65, RelError: 1.65 , SelError: 16.5%
+histogram: 1 AbsError: 0, RelError: 0 , SelError: 0%
+
+Heuristic mean errors: absRMSE: 2.394, relRMSE: 1.306, meanAbsSelErr: 19.943%
+Histogram mean errors: absRMSE: 0.431, relRMSE: 0.181, meanAbsSelErr: 1.809%
diff --git a/jstests/query_golden/libs/ce_data.js b/jstests/query_golden/libs/ce_data.js
new file mode 100644
index 00000000000..c59a6af58a9
--- /dev/null
+++ b/jstests/query_golden/libs/ce_data.js
@@ -0,0 +1,35 @@
+// Small data generator for the purpose of developing the test framework.
+
+const alphabet = "abcdefghijklmnopqrstuvwxyz";
+const len = alphabet.length;
+
+// Returns pseudo-random string where the symbols and the length are functions of the parameter n.
+function genRandomString(n) {
+ let strLen = n % 4 + 1;
+ let str = "";
+ let i = 0;
+ while (i < strLen) {
+ let idx = (100 * n + i++) % len;
+ str = str + alphabet[idx];
+ }
+ return str;
+}
+
+const seedArray = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+const arrLen = seedArray.length;
+
+// Returns pseudo-random array where the elements and the length are functions of the parameter n.
+function genRandomArray(n) {
+ let aLen = (7 * n) % 5 + 1;
+ let start = (13 * n) % arrLen;
+ return seedArray.slice(start, start + aLen);
+}
+
+/**
+ * Returns documents for cardinality estimation tests.
+ */
+
+function getCEDocs() {
+ return Array.from({length: 10},
+ (_, i) => ({_id: i, a: i + 10, b: genRandomString(i), c: genRandomArray(i)}));
+}
diff --git a/jstests/query_golden/libs/compute_errors.js b/jstests/query_golden/libs/compute_errors.js
new file mode 100644
index 00000000000..4443376217f
--- /dev/null
+++ b/jstests/query_golden/libs/compute_errors.js
@@ -0,0 +1,29 @@
+// Given a collection containing errors for individual queries, compute root-mean square error.
+function computeRMSE(errorColl, errorField, collSize) {
+ const res =
+ errorColl
+ .aggregate([
+ {$project: {error2: {$pow: [errorField, 2]}}},
+ {$group: {_id: null, cumError: {$sum: "$error2"}}},
+ {
+ $project:
+ {_id: 0, "rmse": {$round: [{$sqrt: {$divide: ["$cumError", collSize]}}, 3]}}
+ }
+ ])
+ .toArray();
+ return res[0].rmse;
+}
+
+// Given a collection with a field with selectivity errors, compute the mean absolute selectivity
+// error.
+function computeMeanAbsSelError(errorColl, errorField, collSize) {
+ const res =
+ errorColl
+ .aggregate([
+ {$project: {absError: {$abs: errorField}}},
+ {$group: {_id: null, cumError: {$sum: "$absError"}}},
+ {$project: {_id: 0, "meanErr": {$round: [{$divide: ["$cumError", collSize]}, 3]}}}
+ ])
+ .toArray();
+ return res[0].meanErr;
+}
diff --git a/jstests/query_golden/libs/generate_queries.js b/jstests/query_golden/libs/generate_queries.js
new file mode 100644
index 00000000000..7129ed64869
--- /dev/null
+++ b/jstests/query_golden/libs/generate_queries.js
@@ -0,0 +1,74 @@
+/**
+ * Helper functions for generating of queries over a collection.
+ */
+
+function makeMatchPredicate(field, boundary, compOp) {
+ return {"$match": {[field]: {[compOp]: boundary}}};
+}
+
+function makeRangePredicate(field, op1, bound1, op2, bound2, isElemMatch = false) {
+ if (isElemMatch) {
+ return {"$match": {[field]: {"$elemMatch": {[op1]: bound1, [op2]: bound2}}}};
+ }
+ return {"$match": {[field]: {[op1]: bound1, [op2]: bound2}}};
+}
+
+function generateComparisons(field, boundaries) {
+ let predicates = [];
+ const compOps = ["$eq", "$lt", "$lte", "$gt", "$gte"];
+ for (const op of compOps) {
+ boundaries.forEach(function(boundary) {
+ predicates.push(makeMatchPredicate(field, boundary, op));
+ });
+ }
+ let docs = [];
+ for (const pred of predicates) {
+ let doc = {"pipeline": [pred]};
+ docs.push(doc);
+ }
+ return docs;
+}
+
+// Generate range predicates with $match and $elemMatch.
+// boundaries:[ [low1, high1], ...]
+function generateRangePredicates(field, boundaries) {
+ let predicates = [];
+ const op1Option = ["$gt", "$gte", "$eq"];
+ const op2Option = ["$lt", "$lte", "$eq"];
+ boundaries.forEach(function(boundary) {
+ assert(boundary.length == 2);
+ for (let op1 of op1Option) {
+ for (let op2 of op2Option) {
+ if (op1 == "$eq" && op2 == "$eq") {
+ continue;
+ }
+ predicates.push(makeRangePredicate(field, op1, boundary[0], op2, boundary[1]));
+ if (boundary[0] <= boundary[1]) {
+ predicates.push(
+ makeRangePredicate(field, op1, boundary[0], op2, boundary[1], true));
+ }
+ }
+ }
+ });
+
+ let docs = [];
+ for (let pred of predicates) {
+ let doc = {"pipeline": [pred]};
+ docs.push(doc);
+ }
+ return docs;
+}
+
+// Helper function to extract a number of values from a collection field, to be used for query
+// generation.
+function selectQueryValues(coll, field) {
+ let values = [];
+ const collSize = coll.find().itcount();
+ let positions = [2, collSize / 2, collSize - 2];
+
+ for (const pos of positions) {
+ const res = coll.find({}, {"_id": 0, [field]: 1}).toArray()[pos];
+ values.push(res[field]);
+ }
+ return values;
+}