summaryrefslogtreecommitdiff
path: root/src/mongo/db/query/ce/ce_edge_cases_test.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/query/ce/ce_edge_cases_test.cpp')
-rw-r--r--src/mongo/db/query/ce/ce_edge_cases_test.cpp508
1 files changed, 508 insertions, 0 deletions
diff --git a/src/mongo/db/query/ce/ce_edge_cases_test.cpp b/src/mongo/db/query/ce/ce_edge_cases_test.cpp
new file mode 100644
index 00000000000..14cf86e17de
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_edge_cases_test.cpp
@@ -0,0 +1,508 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/ce/array_histogram.h"
+#include "mongo/db/query/ce/ce_test_utils.h"
+#include "mongo/db/query/ce/histogram_estimation.h"
+#include "mongo/db/query/sbe_stage_builder_helpers.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::ce {
+namespace {
+
+using namespace sbe;
+
+TEST(EstimatorTest, OneBucketIntHistogram) {
+ // Data set of 10 values, each with frequency 3, in the range (-inf, 100].
+ // Example: { -100, -20, 0, 20, 50, 60, 70, 80, 90, 100}.
+ std::vector<BucketData> data{{100, 3.0, 27.0, 9.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(30.0, getTotals(hist).card);
+
+ // Estimates with the bucket bound.
+ ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+ ASSERT_EQ(27.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+ ASSERT_EQ(30.0, estimateIntValCard(hist, 100, EstimationType::kLessOrEqual));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+ ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kGreaterOrEqual));
+
+ // Estimates with a value inside the bucket.
+ ASSERT_EQ(3.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
+ // No interpolation possible for estimates of inequalities in a single bucket. The estimates
+ // are based on the default cardinality of half bucket +/- the estimate of equality inside of
+ // the bucket.
+ ASSERT_EQ(10.5, estimateIntValCard(hist, 10, EstimationType::kLess));
+ ASSERT_EQ(13.5, estimateIntValCard(hist, 10, EstimationType::kLessOrEqual));
+ ASSERT_EQ(16.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
+ ASSERT_EQ(19.5, estimateIntValCard(hist, 10, EstimationType::kGreaterOrEqual));
+
+ // Estimates for a value larger than the last bucket bound.
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
+ ASSERT_EQ(30.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
+ ASSERT_EQ(30.0, estimateIntValCard(hist, 1000, EstimationType::kLessOrEqual));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreaterOrEqual));
+}
+
+TEST(EstimatorTest, OneExclusiveBucketIntHistogram) {
+ // Data set of a single value.
+ // By exclusive bucket we mean a bucket with only boundary, that is the range frequency and NDV
+ // are zero.
+ std::vector<BucketData> data{{100, 2.0, 0.0, 0.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(2.0, getTotals(hist).card);
+
+ // Estimates with the bucket boundary.
+ ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 0, EstimationType::kEqual));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 0, EstimationType::kLess));
+ ASSERT_EQ(2.0, estimateIntValCard(hist, 0, EstimationType::kGreater));
+
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
+ ASSERT_EQ(2.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
+}
+
+TEST(EstimatorTest, OneBucketTwoIntValuesHistogram) {
+ // Data set of two values, example {5, 100, 100}.
+ std::vector<BucketData> data{{100, 2.0, 1.0, 1.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(3.0, getTotals(hist).card);
+
+ // Estimates with the bucket boundary.
+ ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+ ASSERT_EQ(1.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+
+ ASSERT_EQ(1.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
+ // Default estimate of half of the bucket's range frequency = 0.5.
+ ASSERT_EQ(0.5, estimateIntValCard(hist, 10, EstimationType::kLess));
+ ASSERT_EQ(2.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
+
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
+ ASSERT_EQ(3.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
+}
+
+TEST(EstimatorTest, OneBucketTwoIntValuesHistogram2) {
+ // Similar to the above test with higher frequency for the second value.
+ // Example {5, 5, 5, 100, 100}.
+ std::vector<BucketData> data{{100, 2.0, 3.0, 1.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(5.0, getTotals(hist).card);
+
+ // Estimates with the bucket boundary.
+ ASSERT_EQ(2.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+ ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+
+ ASSERT_EQ(3.0, estimateIntValCard(hist, 10, EstimationType::kEqual));
+ // Default estimate of half of the bucket's range frequency = 1.5.
+ ASSERT_EQ(1.5, estimateIntValCard(hist, 10, EstimationType::kLess));
+ ASSERT_EQ(3.5, estimateIntValCard(hist, 10, EstimationType::kGreater));
+
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kEqual));
+ ASSERT_EQ(5.0, estimateIntValCard(hist, 1000, EstimationType::kLess));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1000, EstimationType::kGreater));
+}
+
+
+TEST(EstimatorTest, TwoBucketsIntHistogram) {
+ // Data set of 10 values in the range [1, 100].
+ std::vector<BucketData> data{{1, 1.0, 0.0, 0.0}, {100, 3.0, 26.0, 8.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(30.0, getTotals(hist).card);
+
+ // Estimates for a value smaller than the first bucket.
+ ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kEqual));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kLess));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, -42, EstimationType::kLessOrEqual));
+ ASSERT_EQ(30.0, estimateIntValCard(hist, -42, EstimationType::kGreater));
+ ASSERT_EQ(30.0, estimateIntValCard(hist, -42, EstimationType::kGreaterOrEqual));
+
+ // Estimates with bucket bounds.
+ ASSERT_EQ(1.0, estimateIntValCard(hist, 1, EstimationType::kEqual));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 1, EstimationType::kLess));
+ ASSERT_EQ(1.0, estimateIntValCard(hist, 1, EstimationType::kLessOrEqual));
+ ASSERT_EQ(29.0, estimateIntValCard(hist, 1, EstimationType::kGreater));
+ ASSERT_EQ(30.0, estimateIntValCard(hist, 1, EstimationType::kGreaterOrEqual));
+
+ ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kEqual));
+ ASSERT_EQ(27.0, estimateIntValCard(hist, 100, EstimationType::kLess));
+ ASSERT_EQ(30.0, estimateIntValCard(hist, 100, EstimationType::kLessOrEqual));
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 100, EstimationType::kGreater));
+ ASSERT_EQ(3.0, estimateIntValCard(hist, 100, EstimationType::kGreaterOrEqual));
+
+ // Estimates with a value inside the bucket. The estimates use interpolation.
+ // The bucket ratio for the value of 10 is smaller than the estimate for equality
+ // and the estimates for Less and LessOrEqual are the same.
+ ASSERT_APPROX_EQUAL(3.3, estimateIntValCard(hist, 10, EstimationType::kEqual), 0.1);
+ ASSERT_APPROX_EQUAL(3.4, estimateIntValCard(hist, 10, EstimationType::kLess), 0.1);
+ ASSERT_APPROX_EQUAL(3.4, estimateIntValCard(hist, 10, EstimationType::kLessOrEqual), 0.1);
+ ASSERT_APPROX_EQUAL(26.6, estimateIntValCard(hist, 10, EstimationType::kGreater), 0.1);
+ ASSERT_APPROX_EQUAL(26.6, estimateIntValCard(hist, 10, EstimationType::kGreaterOrEqual), 0.1);
+
+ // Different estimates for Less and LessOrEqual for the value of 50.
+ ASSERT_APPROX_EQUAL(3.3, estimateIntValCard(hist, 50, EstimationType::kEqual), 0.1);
+ ASSERT_APPROX_EQUAL(10.6, estimateIntValCard(hist, 50, EstimationType::kLess), 0.1);
+ ASSERT_APPROX_EQUAL(13.9, estimateIntValCard(hist, 50, EstimationType::kLessOrEqual), 0.1);
+ ASSERT_APPROX_EQUAL(16.1, estimateIntValCard(hist, 50, EstimationType::kGreater), 0.1);
+ ASSERT_APPROX_EQUAL(19.4, estimateIntValCard(hist, 50, EstimationType::kGreaterOrEqual), 0.1);
+}
+
+TEST(EstimatorTest, ThreeExclusiveBucketsIntHistogram) {
+ std::vector<BucketData> data{{1, 1.0, 0.0, 0.0}, {10, 8.0, 0.0, 0.0}, {100, 1.0, 0.0, 0.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(10.0, getTotals(hist).card);
+
+ ASSERT_EQ(0.0, estimateIntValCard(hist, 5, EstimationType::kEqual));
+ ASSERT_EQ(1.0, estimateIntValCard(hist, 5, EstimationType::kLess));
+ ASSERT_EQ(1.0, estimateIntValCard(hist, 5, EstimationType::kLessOrEqual));
+ ASSERT_EQ(9.0, estimateIntValCard(hist, 5, EstimationType::kGreater));
+ ASSERT_EQ(9.0, estimateIntValCard(hist, 5, EstimationType::kGreaterOrEqual));
+}
+TEST(EstimatorTest, OneBucketStrHistogram) {
+ std::vector<BucketData> data{{"xyz", 3.0, 27.0, 9.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(30.0, getTotals(hist).card);
+
+ // Estimates with bucket bound.
+ auto [tag, value] = value::makeNewString("xyz"_sd);
+ value::ValueGuard vg(tag, value);
+ double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(3.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(27.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+ ASSERT_EQ(30.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+ ASSERT_EQ(3.0, expectedCard);
+
+ // Estimates for a value inside the bucket. Since there is no low value bound in the histogram
+ // all values smaller than the upper bound will be estimated the same way using half of the
+ // bucket cardinality.
+ std::tie(tag, value) = value::makeNewString("a"_sd);
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(3.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(10.5, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+ ASSERT_EQ(13.5, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(16.5, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+ ASSERT_EQ(19.5, expectedCard);
+
+ std::tie(tag, value) = value::makeNewString(""_sd);
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(3.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(10.5, expectedCard);
+ // Can we do better? Figure out that the query value is the smallest in its data type.
+
+ // Estimates for a value larger than the upper bound.
+ std::tie(tag, value) = value::makeNewString("z"_sd);
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(30.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+}
+
+TEST(EstimatorTest, TwoBucketsStrHistogram) {
+ // Data set of 100 strings in the range ["abc", "xyz"], with average frequency of 2.
+ std::vector<BucketData> data{{"abc", 2.0, 0.0, 0.0}, {"xyz", 3.0, 95.0, 48.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(100.0, getTotals(hist).card);
+
+ // Estimates for a value smaller than the first bucket bound.
+ auto [tag, value] = value::makeNewString("a"_sd);
+ value::ValueGuard vg(tag, value);
+
+ double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(100.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+ ASSERT_EQ(100.0, expectedCard);
+
+ // Estimates with bucket bounds.
+ std::tie(tag, value) = value::makeNewString("abc"_sd);
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(2.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+ ASSERT_EQ(2.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(98.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+ ASSERT_EQ(100.0, expectedCard);
+
+ std::tie(tag, value) = value::makeNewString("xyz"_sd);
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(3.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(97.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+ ASSERT_EQ(100.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+ ASSERT_EQ(3.0, expectedCard);
+
+ // Estimates for a value inside the bucket.
+ std::tie(tag, value) = value::makeNewString("sun"_sd);
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_APPROX_EQUAL(2.0, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_APPROX_EQUAL(74.4, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+ ASSERT_APPROX_EQUAL(76.4, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_APPROX_EQUAL(23.6, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+ ASSERT_APPROX_EQUAL(25.6, expectedCard, 0.1);
+
+ // Estimate for a value very close to the bucket bound.
+ std::tie(tag, value) = value::makeNewString("xyw"_sd);
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_APPROX_EQUAL(2.0, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_APPROX_EQUAL(95.0, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLessOrEqual).card;
+ ASSERT_APPROX_EQUAL(97.0, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_APPROX_EQUAL(3.0, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreaterOrEqual).card;
+ ASSERT_APPROX_EQUAL(5.0, expectedCard, 0.1);
+}
+
+TEST(EstimatorTest, TwoBucketsDateHistogram) {
+ // June 6, 2017 -- June 7, 2017.
+ const int64_t startInstant = 1496777923000LL;
+ const int64_t endInstant = 1496864323000LL;
+ const auto startDate = Date_t::fromMillisSinceEpoch(startInstant);
+ const auto endDate = Date_t::fromMillisSinceEpoch(endInstant);
+
+ std::vector<BucketData> data{{Value(startDate), 3.0, 0.0, 0.0},
+ {Value(endDate), 1.0, 96.0, 48.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(100.0, getTotals(hist).card);
+
+ const auto valueBefore = value::bitcastFrom<int64_t>(startInstant - 1);
+ double expectedCard =
+ estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kLess).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard =
+ estimate(hist, value::TypeTags::Date, valueBefore, EstimationType::kGreater).card;
+ ASSERT_EQ(100.0, expectedCard);
+
+ const auto valueStart = value::bitcastFrom<int64_t>(startInstant);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kEqual).card;
+ ASSERT_EQ(3.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kLess).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueStart, EstimationType::kGreater).card;
+ ASSERT_EQ(97.0, expectedCard);
+
+ const auto valueEnd = value::bitcastFrom<int64_t>(endInstant);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kEqual).card;
+ ASSERT_EQ(1.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kLess).card;
+ ASSERT_EQ(99.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueEnd, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+
+ const auto valueIn = value::bitcastFrom<int64_t>(startInstant + 43000000);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kEqual).card;
+ ASSERT_EQ(2.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kLess).card;
+ ASSERT_APPROX_EQUAL(48.8, expectedCard, 0.1);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueIn, EstimationType::kGreater).card;
+ ASSERT_APPROX_EQUAL(49.2, expectedCard, 0.1);
+
+ const auto valueAfter = value::bitcastFrom<int64_t>(endInstant + 100);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kLess).card;
+ ASSERT_EQ(100.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Date, valueAfter, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+}
+
+TEST(EstimatorTest, TwoBucketsTimestampHistogram) {
+ // June 6, 2017 -- June 7, 2017 in seconds.
+ const int64_t startInstant = 1496777923LL;
+ const int64_t endInstant = 1496864323LL;
+ const Timestamp startTs{Seconds(startInstant), 0};
+ const Timestamp endTs{Seconds(endInstant), 0};
+
+ std::vector<BucketData> data{{Value(startTs), 3.0, 0.0, 0.0}, {Value(endTs), 1.0, 96.0, 48.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(100.0, getTotals(hist).card);
+
+ const auto valueBefore = value::bitcastFrom<int64_t>(startTs.asULL() - 1);
+ double expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kLess).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueBefore, EstimationType::kGreater).card;
+ ASSERT_EQ(100.0, expectedCard);
+
+ const auto valueStart = value::bitcastFrom<int64_t>(
+ startTs.asULL()); // NB: startTs.asInt64() produces different value.
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kEqual).card;
+ ASSERT_EQ(3.0, expectedCard);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kLess).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueStart, EstimationType::kGreater).card;
+ ASSERT_EQ(97.0, expectedCard);
+
+ const auto valueEnd = value::bitcastFrom<int64_t>(endTs.asULL());
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kEqual).card;
+ ASSERT_EQ(1.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kLess).card;
+ ASSERT_EQ(99.0, expectedCard);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueEnd, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+
+ const auto valueIn = value::bitcastFrom<int64_t>((startTs.asULL() + endTs.asULL()) / 2);
+ expectedCard = estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kEqual).card;
+ ASSERT_EQ(2.0, expectedCard);
+ expectedCard = estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kLess).card;
+ ASSERT_APPROX_EQUAL(49.0, expectedCard, 0.1);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueIn, EstimationType::kGreater).card;
+ ASSERT_APPROX_EQUAL(49.0, expectedCard, 0.1);
+
+ const auto valueAfter = value::bitcastFrom<int64_t>(endTs.asULL() + 100);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kLess).card;
+ ASSERT_EQ(100.0, expectedCard);
+ expectedCard =
+ estimate(hist, value::TypeTags::Timestamp, valueAfter, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+}
+
+TEST(EstimatorTest, TwoBucketsObjectIdHistogram) {
+ const auto startOid = OID("63340d8d27afef2de7357e8d");
+ const auto endOid = OID("63340dbed6cd8af737d4139a");
+ ASSERT_TRUE(startOid < endOid);
+
+ std::vector<BucketData> data{{Value(startOid), 2.0, 0.0, 0.0},
+ {Value(endOid), 1.0, 97.0, 77.0}};
+ const ScalarHistogram hist = createHistogram(data);
+
+ ASSERT_EQ(100.0, getTotals(hist).card);
+
+ auto [tag, value] = value::makeNewObjectId();
+ value::ValueGuard vg(tag, value);
+ const auto oidBefore = OID("63340d8d27afef2de7357e8c");
+ oidBefore.view().readInto(value::getObjectIdView(value));
+
+ double expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(100.0, expectedCard);
+
+ // Bucket bounds.
+ startOid.view().readInto(value::getObjectIdView(value));
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(2.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(98.0, expectedCard);
+
+ endOid.view().readInto(value::getObjectIdView(value));
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(1.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(99.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+
+ // ObjectId value inside the bucket.
+ const auto oidInside = OID("63340db2cd4d46ff39178e9d");
+ oidInside.view().readInto(value::getObjectIdView(value));
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_APPROX_EQUAL(1.2, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_APPROX_EQUAL(83.9, expectedCard, 0.1);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_APPROX_EQUAL(14.8, expectedCard, 0.1);
+
+ const auto oidAfter = OID("63340dbed6cd8af737d4139b");
+ oidAfter.view().readInto(value::getObjectIdView(value));
+ expectedCard = estimate(hist, tag, value, EstimationType::kEqual).card;
+ ASSERT_EQ(0.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kLess).card;
+ ASSERT_EQ(100.0, expectedCard);
+ expectedCard = estimate(hist, tag, value, EstimationType::kGreater).card;
+ ASSERT_EQ(0.0, expectedCard);
+}
+
+} // namespace
+} // namespace mongo::ce