summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorMilena Ivanova <milena.ivanova@mongodb.com>2023-02-08 10:56:38 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-08 12:17:43 +0000
commit709346fbd32a726d1a13fde1f2246fc3932652fb (patch)
tree264bb2af499be58853bd7fe69c97e7c33d8e0f41 /src/mongo/db/query
parent2e6ee8f329d2cf122856d8ee789ff82a5e75a686 (diff)
downloadmongo-709346fbd32a726d1a13fde1f2246fc3932652fb.tar.gz
SERVER-73432 Extend CE accuracy testing for arrays
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/ce/histogram_estimator_test.cpp30
-rw-r--r--src/mongo/db/query/ce/histogram_predicate_estimation.cpp4
2 files changed, 16 insertions, 18 deletions
diff --git a/src/mongo/db/query/ce/histogram_estimator_test.cpp b/src/mongo/db/query/ce/histogram_estimator_test.cpp
index 4276803f8e6..dccb8cefbcf 100644
--- a/src/mongo/db/query/ce/histogram_estimator_test.cpp
+++ b/src/mongo/db/query/ce/histogram_estimator_test.cpp
@@ -571,20 +571,20 @@ TEST_F(CEHistogramTest, TestArrayHistogramOnAtomicPredicates) {
ASSERT_EQ_ELEMMATCH_CE(t, 0.0 /* CE */, 0.0 /* $elemMatch CE */, "a", "{$gt: 10}");
ASSERT_EQ_ELEMMATCH_CE(t, 1.0 /* CE */, 1.0 /* $elemMatch CE */, "a", "{$gte: 10}");
- ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 4.0 /* $elemMatch CE */, "a", "{$lte: 10}");
- ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 4.0 /* $elemMatch CE */, "a", "{$lt: 10}");
- ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 4.0 /* $elemMatch CE */, "a", "{$gt: 1}");
- ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 4.0 /* $elemMatch CE */, "a", "{$gte: 1}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lte: 10}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lt: 10}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gt: 1}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 5.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 1}");
- ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 4.0 /* $elemMatch CE */, "a", "{$lte: 5}");
- ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 4.0 /* $elemMatch CE */, "a", "{$lt: 5}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lte: 5}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 4.0 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$lt: 5}");
ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.0 /* $elemMatch CE */, "a", "{$gt: 5}");
- ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.55085 /* $elemMatch CE */, "a", "{$gte: 5}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 2.0 /* CE */, 2.40822 /* $elemMatch CE */, "a", "{$gte: 5}");
- ASSERT_EQ_ELEMMATCH_CE(t, 2.45 /* CE */, 2.55085 /* $elemMatch CE */, "a", "{$gt: 2, $lt: 5}");
- ASSERT_EQ_ELEMMATCH_CE(t, 3.27 /* CE */, 4.0 /* $elemMatch CE */, "a", "{$gte: 2, $lt: 5}");
- ASSERT_EQ_ELEMMATCH_CE(t, 2.45 /* CE */, 3.40113 /* $elemMatch CE */, "a", "{$gt: 2, $lte: 5}");
- ASSERT_EQ_ELEMMATCH_CE(t, 3.27 /* CE */, 4.0 /* $elemMatch CE */, "a", "{$gte: 2, $lte: 5}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 2.45 /* CE */, 2.40822 /* $elemMatch CE */, "a", "{$gt: 2, $lt: 5}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 3.27 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 2, $lt: 5}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 2.45 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gt: 2, $lte: 5}");
+ ASSERT_EQ_ELEMMATCH_CE(t, 3.27 /* CE */, 3.0 /* $elemMatch CE */, "a", "{$gte: 2, $lte: 5}");
}
TEST_F(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
@@ -715,8 +715,8 @@ TEST_F(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$eq: 5}}, array: {$eq: 5}}", 35.0);
// Test case with multiple predicates and ranges.
- ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$lt: 5}}", 70.2156);
- ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$gt: 5}}", 28.4848);
+ ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$lt: 5}}", 67.1508);
+ ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$gt: 5}}", 27.8562);
// Test multiple $elemMatches.
ASSERT_MATCH_CE(t, "{scalar: {$elemMatch: {$eq: 5}}, array: {$elemMatch: {$eq: 5}}}", 0.0);
@@ -734,8 +734,8 @@ TEST_F(CEHistogramTest, TestArrayHistogramOnCompositePredicates) {
"{scalar: {$elemMatch: {$eq: 5}}, mixed: {$elemMatch: {$eq: 5}}, array: "
"{$elemMatch: {$eq: 5}}}",
0.0);
- ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$lt: 5}}}", 34.1434);
- ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$gt: 5}}}", 45.5246);
+ ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$lt: 5}}}", 31.7118);
+ ASSERT_MATCH_CE(t, "{array: {$elemMatch: {$lt: 5}}, mixed: {$elemMatch: {$gt: 5}}}", 42.2825);
// Verify that we still return an estimate of 0.0 for any $elemMatch predicate on a scalar
// field when we have a non-multikey index.
diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
index 4075323c684..3671cd4ea6a 100644
--- a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
+++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
@@ -322,9 +322,7 @@ CEType estimateCardRange(const ArrayHistogram& ah,
const auto arrayMaxEst = estRange(ah.getArrayMax());
const auto arrayUniqueEst = estRange(ah.getArrayUnique());
- // ToDo: try using ah.getArrayCount() - ah.getEmptyArrayCount();
- // when the number of empty arrays is provided by the statistics.
- const double totalArrayCount = ah.getArrayCount();
+ const double totalArrayCount = ah.getArrayCount() - ah.getEmptyArrayCount();
uassert(
6715101, "Array histograms should contain at least one array", totalArrayCount > 0);