diff options
author | Alya Berciu <alya.berciu@mongodb.com> | 2022-12-19 12:18:38 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-12-19 12:49:17 +0000 |
commit | 72ac89f5e7f44d93bda22eb4686b4827b03b65ed (patch) | |
tree | 445df5ae107d71146dd9f3d44fe3886d57087b40 | |
parent | 014381421c79a4d24bed1ebf82b7075db2ce7bd9 (diff) | |
download | mongo-72ac89f5e7f44d93bda22eb4686b4827b03b65ed.tar.gz |
SERVER-71377 Adding missing count to CE for null
-rw-r--r-- | jstests/cqf/analyze/type_counts.js | 51 | ||||
-rw-r--r-- | src/mongo/db/query/ce/histogram_predicate_estimation.cpp | 8 |
2 files changed, 49 insertions, 10 deletions
diff --git a/jstests/cqf/analyze/type_counts.js b/jstests/cqf/analyze/type_counts.js index 84f253c54ea..49ebe0f1eb9 100644 --- a/jstests/cqf/analyze/type_counts.js +++ b/jstests/cqf/analyze/type_counts.js @@ -203,7 +203,7 @@ runHistogramsTest(function testTypeCounts() { verifyCEForMatch({coll, predicate: {a: {c: 3}}, expected: [{_id: 10, a: {c: 3}}], ce: 3, hint}); verifyCEForMatch({coll, predicate: {a: {notInColl: 1}}, expected: [], ce: 3, hint}); - // Test null predicate match. TODO SERVER-71377: make estimate include missing values. + // Test null predicate match. verifyCEForMatch({ coll, predicate: {a: null}, @@ -214,7 +214,6 @@ runHistogramsTest(function testTypeCounts() { {_id: 6, b: 2}, {_id: 7}, ], - ce: 3, hint }); @@ -422,7 +421,7 @@ runHistogramsTest(function testTypeCounts() { {coll, predicate: {"a.b": {c: 1}}, expected: [{_id: 15, a: {b: {c: 1}}}], ce: 2, hint}); verifyCEForMatch({coll, predicate: {"a.b": {c: 2}}, expected: [], ce: 2, hint}); - // Test null predicate match. TODO SERVER-71377: make estimate include missing values. + // Test null predicate match. verifyCEForMatch({ coll, predicate: {"a.b": null}, @@ -437,14 +436,13 @@ runHistogramsTest(function testTypeCounts() { {_id: 10, "a.b": 1}, {_id: 11, "a.b.c": 1}, ], - ce: 1, hint }); // Set up a collection to test CE for nested arrays and non-histogrammable types in arrays. coll.drop(); assert.commandWorked(coll.createIndex({a: 1})); - assert.commandWorked(coll.insertMany([ + const docs = [ /* Booleans. */ {_id: 0, a: true}, {_id: 1, a: false}, @@ -478,7 +476,8 @@ runHistogramsTest(function testTypeCounts() { {_id: 26, a: [[null]]}, /* Mixed array type-counts. */ {_id: 27, a: [null, true, false, [], [1, 2, 3], ["a", "b", "c"], {a: 1}, {}]}, - ])); + ]; + assert.commandWorked(coll.insertMany(docs)); // TODO SERVER-71057: Only count types once per array. createAndValidateHistogram({ @@ -745,7 +744,7 @@ runHistogramsTest(function testTypeCounts() { hint }); - // Verify null CE. TODO SERVER-71377: make estimate include missing values. + // Verify null CE. // TODO SERVER-71057: Only count each null once per array. verifyCEForMatch({ coll, @@ -757,7 +756,7 @@ runHistogramsTest(function testTypeCounts() { {_id: 25, a: [null, null, null]}, {_id: 27, a: [null, true, false, [], [1, 2, 3], ["a", "b", "c"], {a: 1}, {}]}, ], - ce: 6, + ce: 7, hint }); verifyCEForMatch({ @@ -792,5 +791,41 @@ runHistogramsTest(function testTypeCounts() { ce: 13, hint }); + + // Now create histograms on the same collection for paths that don't exist on any documents. + const statistics = { + typeCount: [ + {typeName: "Nothing", count: 28}, + ], + scalarHistogram: {buckets: [], bounds: []}, + emptyArrayCount: 0, + trueCount: 0, + falseCount: 0, + documents: 28, + }; + createAndValidateHistogram({coll, expectedHistogram: {_id: "notAPath", statistics}}); + createAndValidateHistogram({coll, expectedHistogram: {_id: "notAPathEither", statistics}}); + + // Verify type count CE. Note that for non-$elemMatch preidcates, we include both array and + // scalar type-counts, while for $elemMatch predicates, we include only array type counts in + // our estimate. + forceCE("histogram"); + + // Note: the hint is omitted because if we hint on a 'notAPath' index, optimization fails by + // running out of memory. + verifyCEForMatch({coll, predicate: {notAPath: {$eq: null}}, expected: docs}); + verifyCEForMatch({coll, predicate: {notAPath: {$elemMatch: {$eq: null}}}, expected: []}); + verifyCEForMatch({coll, predicate: {notAPathEither: {$eq: 1}}, expected: []}); + verifyCEForMatch({coll, predicate: {notAPathEither: {$elemMatch: {$eq: 1}}}, expected: []}); + verifyCEForMatch({ + coll, + predicate: {$and: [{notAPath: {$eq: null}}, {notAPathEither: {$eq: null}}]}, + expected: docs + }); + verifyCEForMatch({ + coll, + predicate: {$and: [{notAPath: {$eq: 1}}, {notAPathEither: {$eq: 1}}]}, + expected: [] + }); }); }()); diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp index 22452726877..2fe98f094c3 100644 --- a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp +++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp @@ -279,8 +279,12 @@ double getTypeCard(const ArrayHistogram& ah, // all arrays, regardless of whether or not they are nested. break; } - // TODO SERVER-71377: Use both missing & null counters for null equality. - // case value::TypeTags::Null: {} + case value::TypeTags::Null: { + // The predicate {$eq: null} matches both missing and null values. + count += ah.getTypeCount(value::TypeTags::Nothing); + count += ah.getTypeCount(value::TypeTags::Null); + break; + } default: { count += ah.getTypeCount(tag); } } } |