SERVER-71377 Adding missing count to CE for null

author: Alya Berciu <alya.berciu@mongodb.com> 2022-12-19 12:18:38 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-12-19 12:49:17 +0000
commit: 72ac89f5e7f44d93bda22eb4686b4827b03b65ed (patch)
tree: 445df5ae107d71146dd9f3d44fe3886d57087b40
parent: 014381421c79a4d24bed1ebf82b7075db2ce7bd9 (diff)
download: mongo-72ac89f5e7f44d93bda22eb4686b4827b03b65ed.tar.gz
2 files changed, 49 insertions, 10 deletions
diff --git a/jstests/cqf/analyze/type_counts.js b/jstests/cqf/analyze/type_counts.js
index 84f253c54ea..49ebe0f1eb9 100644
--- a/jstests/cqf/analyze/type_counts.js
+++ b/jstests/cqf/analyze/type_counts.js
@@ -203,7 +203,7 @@ runHistogramsTest(function testTypeCounts() {
     verifyCEForMatch({coll, predicate: {a: {c: 3}}, expected: [{_id: 10, a: {c: 3}}], ce: 3, hint});
     verifyCEForMatch({coll, predicate: {a: {notInColl: 1}}, expected: [], ce: 3, hint});
 
-    // Test null predicate match. TODO SERVER-71377: make estimate include missing values.
+    // Test null predicate match.
     verifyCEForMatch({
         coll,
         predicate: {a: null},
@@ -214,7 +214,6 @@ runHistogramsTest(function testTypeCounts() {
             {_id: 6, b: 2},
             {_id: 7},
         ],
-        ce: 3,
         hint
     });
 
@@ -422,7 +421,7 @@ runHistogramsTest(function testTypeCounts() {
         {coll, predicate: {"a.b": {c: 1}}, expected: [{_id: 15, a: {b: {c: 1}}}], ce: 2, hint});
     verifyCEForMatch({coll, predicate: {"a.b": {c: 2}}, expected: [], ce: 2, hint});
 
-    // Test null predicate match. TODO SERVER-71377: make estimate include missing values.
+    // Test null predicate match.
     verifyCEForMatch({
         coll,
         predicate: {"a.b": null},
@@ -437,14 +436,13 @@ runHistogramsTest(function testTypeCounts() {
             {_id: 10, "a.b": 1},
             {_id: 11, "a.b.c": 1},
         ],
-        ce: 1,
         hint
     });
 
     // Set up a collection to test CE for nested arrays and non-histogrammable types in arrays.
     coll.drop();
     assert.commandWorked(coll.createIndex({a: 1}));
-    assert.commandWorked(coll.insertMany([
+    const docs = [
         /* Booleans. */
         {_id: 0, a: true},
         {_id: 1, a: false},
@@ -478,7 +476,8 @@ runHistogramsTest(function testTypeCounts() {
         {_id: 26, a: [[null]]},
         /* Mixed array type-counts. */
         {_id: 27, a: [null, true, false, [], [1, 2, 3], ["a", "b", "c"], {a: 1}, {}]},
-    ]));
+    ];
+    assert.commandWorked(coll.insertMany(docs));
 
     // TODO SERVER-71057: Only count types once per array.
     createAndValidateHistogram({
@@ -745,7 +744,7 @@ runHistogramsTest(function testTypeCounts() {
         hint
     });
 
-    // Verify null CE. TODO SERVER-71377: make estimate include missing values.
+    // Verify null CE.
     // TODO SERVER-71057: Only count each null once per array.
     verifyCEForMatch({
         coll,
@@ -757,7 +756,7 @@ runHistogramsTest(function testTypeCounts() {
             {_id: 25, a: [null, null, null]},
             {_id: 27, a: [null, true, false, [], [1, 2, 3], ["a", "b", "c"], {a: 1}, {}]},
         ],
-        ce: 6,
+        ce: 7,
         hint
     });
     verifyCEForMatch({
@@ -792,5 +791,41 @@ runHistogramsTest(function testTypeCounts() {
         ce: 13,
         hint
     });
+
+    // Now create histograms on the same collection for paths that don't exist on any documents.
+    const statistics = {
+        typeCount: [
+            {typeName: "Nothing", count: 28},
+        ],
+        scalarHistogram: {buckets: [], bounds: []},
+        emptyArrayCount: 0,
+        trueCount: 0,
+        falseCount: 0,
+        documents: 28,
+    };
+    createAndValidateHistogram({coll, expectedHistogram: {_id: "notAPath", statistics}});
+    createAndValidateHistogram({coll, expectedHistogram: {_id: "notAPathEither", statistics}});
+
+    // Verify type count CE. Note that for non-$elemMatch preidcates, we include both array and
+    // scalar type-counts, while for $elemMatch predicates, we include only array type counts in
+    // our estimate.
+    forceCE("histogram");
+
+    // Note: the hint is omitted because if we hint on a 'notAPath' index, optimization fails by
+    // running out of memory.
+    verifyCEForMatch({coll, predicate: {notAPath: {$eq: null}}, expected: docs});
+    verifyCEForMatch({coll, predicate: {notAPath: {$elemMatch: {$eq: null}}}, expected: []});
+    verifyCEForMatch({coll, predicate: {notAPathEither: {$eq: 1}}, expected: []});
+    verifyCEForMatch({coll, predicate: {notAPathEither: {$elemMatch: {$eq: 1}}}, expected: []});
+    verifyCEForMatch({
+        coll,
+        predicate: {$and: [{notAPath: {$eq: null}}, {notAPathEither: {$eq: null}}]},
+        expected: docs
+    });
+    verifyCEForMatch({
+        coll,
+        predicate: {$and: [{notAPath: {$eq: 1}}, {notAPathEither: {$eq: 1}}]},
+        expected: []
+    });
 });
 }());
diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
index 22452726877..2fe98f094c3 100644
--- a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
+++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
@@ -279,8 +279,12 @@ double getTypeCard(const ArrayHistogram& ah,
                 // all arrays, regardless of whether or not they are nested.
                 break;
             }
-            // TODO SERVER-71377: Use both missing & null counters for null equality.
-            // case value::TypeTags::Null: {}
+            case value::TypeTags::Null: {
+                // The predicate {$eq: null} matches both missing and null values.
+                count += ah.getTypeCount(value::TypeTags::Nothing);
+                count += ah.getTypeCount(value::TypeTags::Null);
+                break;
+            }
             default: { count += ah.getTypeCount(tag); }
         }
     }
author	Alya Berciu <alya.berciu@mongodb.com>	2022-12-19 12:18:38 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-12-19 12:49:17 +0000
commit	72ac89f5e7f44d93bda22eb4686b4827b03b65ed (patch)
tree	445df5ae107d71146dd9f3d44fe3886d57087b40
parent	014381421c79a4d24bed1ebf82b7075db2ce7bd9 (diff)
download	mongo-72ac89f5e7f44d93bda22eb4686b4827b03b65ed.tar.gz