summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlya Berciu <alya.berciu@mongodb.com>2022-12-19 12:18:38 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-12-19 12:49:17 +0000
commit72ac89f5e7f44d93bda22eb4686b4827b03b65ed (patch)
tree445df5ae107d71146dd9f3d44fe3886d57087b40
parent014381421c79a4d24bed1ebf82b7075db2ce7bd9 (diff)
downloadmongo-72ac89f5e7f44d93bda22eb4686b4827b03b65ed.tar.gz
SERVER-71377 Adding missing count to CE for null
-rw-r--r--jstests/cqf/analyze/type_counts.js51
-rw-r--r--src/mongo/db/query/ce/histogram_predicate_estimation.cpp8
2 files changed, 49 insertions, 10 deletions
diff --git a/jstests/cqf/analyze/type_counts.js b/jstests/cqf/analyze/type_counts.js
index 84f253c54ea..49ebe0f1eb9 100644
--- a/jstests/cqf/analyze/type_counts.js
+++ b/jstests/cqf/analyze/type_counts.js
@@ -203,7 +203,7 @@ runHistogramsTest(function testTypeCounts() {
verifyCEForMatch({coll, predicate: {a: {c: 3}}, expected: [{_id: 10, a: {c: 3}}], ce: 3, hint});
verifyCEForMatch({coll, predicate: {a: {notInColl: 1}}, expected: [], ce: 3, hint});
- // Test null predicate match. TODO SERVER-71377: make estimate include missing values.
+ // Test null predicate match.
verifyCEForMatch({
coll,
predicate: {a: null},
@@ -214,7 +214,6 @@ runHistogramsTest(function testTypeCounts() {
{_id: 6, b: 2},
{_id: 7},
],
- ce: 3,
hint
});
@@ -422,7 +421,7 @@ runHistogramsTest(function testTypeCounts() {
{coll, predicate: {"a.b": {c: 1}}, expected: [{_id: 15, a: {b: {c: 1}}}], ce: 2, hint});
verifyCEForMatch({coll, predicate: {"a.b": {c: 2}}, expected: [], ce: 2, hint});
- // Test null predicate match. TODO SERVER-71377: make estimate include missing values.
+ // Test null predicate match.
verifyCEForMatch({
coll,
predicate: {"a.b": null},
@@ -437,14 +436,13 @@ runHistogramsTest(function testTypeCounts() {
{_id: 10, "a.b": 1},
{_id: 11, "a.b.c": 1},
],
- ce: 1,
hint
});
// Set up a collection to test CE for nested arrays and non-histogrammable types in arrays.
coll.drop();
assert.commandWorked(coll.createIndex({a: 1}));
- assert.commandWorked(coll.insertMany([
+ const docs = [
/* Booleans. */
{_id: 0, a: true},
{_id: 1, a: false},
@@ -478,7 +476,8 @@ runHistogramsTest(function testTypeCounts() {
{_id: 26, a: [[null]]},
/* Mixed array type-counts. */
{_id: 27, a: [null, true, false, [], [1, 2, 3], ["a", "b", "c"], {a: 1}, {}]},
- ]));
+ ];
+ assert.commandWorked(coll.insertMany(docs));
// TODO SERVER-71057: Only count types once per array.
createAndValidateHistogram({
@@ -745,7 +744,7 @@ runHistogramsTest(function testTypeCounts() {
hint
});
- // Verify null CE. TODO SERVER-71377: make estimate include missing values.
+ // Verify null CE.
// TODO SERVER-71057: Only count each null once per array.
verifyCEForMatch({
coll,
@@ -757,7 +756,7 @@ runHistogramsTest(function testTypeCounts() {
{_id: 25, a: [null, null, null]},
{_id: 27, a: [null, true, false, [], [1, 2, 3], ["a", "b", "c"], {a: 1}, {}]},
],
- ce: 6,
+ ce: 7,
hint
});
verifyCEForMatch({
@@ -792,5 +791,41 @@ runHistogramsTest(function testTypeCounts() {
ce: 13,
hint
});
+
+ // Now create histograms on the same collection for paths that don't exist on any documents.
+ const statistics = {
+ typeCount: [
+ {typeName: "Nothing", count: 28},
+ ],
+ scalarHistogram: {buckets: [], bounds: []},
+ emptyArrayCount: 0,
+ trueCount: 0,
+ falseCount: 0,
+ documents: 28,
+ };
+ createAndValidateHistogram({coll, expectedHistogram: {_id: "notAPath", statistics}});
+ createAndValidateHistogram({coll, expectedHistogram: {_id: "notAPathEither", statistics}});
+
+ // Verify type count CE. Note that for non-$elemMatch preidcates, we include both array and
+ // scalar type-counts, while for $elemMatch predicates, we include only array type counts in
+ // our estimate.
+ forceCE("histogram");
+
+ // Note: the hint is omitted because if we hint on a 'notAPath' index, optimization fails by
+ // running out of memory.
+ verifyCEForMatch({coll, predicate: {notAPath: {$eq: null}}, expected: docs});
+ verifyCEForMatch({coll, predicate: {notAPath: {$elemMatch: {$eq: null}}}, expected: []});
+ verifyCEForMatch({coll, predicate: {notAPathEither: {$eq: 1}}, expected: []});
+ verifyCEForMatch({coll, predicate: {notAPathEither: {$elemMatch: {$eq: 1}}}, expected: []});
+ verifyCEForMatch({
+ coll,
+ predicate: {$and: [{notAPath: {$eq: null}}, {notAPathEither: {$eq: null}}]},
+ expected: docs
+ });
+ verifyCEForMatch({
+ coll,
+ predicate: {$and: [{notAPath: {$eq: 1}}, {notAPathEither: {$eq: 1}}]},
+ expected: []
+ });
});
}());
diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
index 22452726877..2fe98f094c3 100644
--- a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
+++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
@@ -279,8 +279,12 @@ double getTypeCard(const ArrayHistogram& ah,
// all arrays, regardless of whether or not they are nested.
break;
}
- // TODO SERVER-71377: Use both missing & null counters for null equality.
- // case value::TypeTags::Null: {}
+ case value::TypeTags::Null: {
+ // The predicate {$eq: null} matches both missing and null values.
+ count += ah.getTypeCount(value::TypeTags::Nothing);
+ count += ah.getTypeCount(value::TypeTags::Null);
+ break;
+ }
default: { count += ah.getTypeCount(tag); }
}
}