summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Redman <joel.redman@mongodb.com>2022-09-27 16:19:20 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-27 17:30:37 +0000
commit03d5afb49317b85f322ff8f902e72037467cdbcd (patch)
tree878ca2baa139d9de3592ec331b6c95ba841f3b88
parentc2b252ae8d3508aa2bbea016d9ad84f7e1424f2e (diff)
downloadmongo-03d5afb49317b85f322ff8f902e72037467cdbcd.tar.gz
SERVER-66469 Fix correctness when filtering on dates before the epoch
-rw-r--r--jstests/core/timeseries/timeseries_filter_extended_range.js185
-rw-r--r--src/mongo/db/exec/bucket_unpacker.cpp228
-rw-r--r--src/mongo/db/exec/bucket_unpacker.h12
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp7
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.h5
-rw-r--r--src/mongo/db/views/resolved_view.cpp25
-rw-r--r--src/mongo/db/views/resolved_view.h9
-rw-r--r--src/mongo/db/views/view_catalog_helpers.cpp5
8 files changed, 382 insertions, 94 deletions
diff --git a/jstests/core/timeseries/timeseries_filter_extended_range.js b/jstests/core/timeseries/timeseries_filter_extended_range.js
new file mode 100644
index 00000000000..86dc112dbe0
--- /dev/null
+++ b/jstests/core/timeseries/timeseries_filter_extended_range.js
@@ -0,0 +1,185 @@
+/**
+ * Test that find/match type queries work properly on dates ouside the 32 bit epoch range,
+ * [1970-01-01 00:00:00 UTC - 2038-01-29 03:13:07 UTC].
+ *
+ * @tags: [
+ * requires_fcv_62,
+ * # We need a timeseries collection.
+ * requires_timeseries,
+ * ]
+ */
+
+(function() {
+"use strict";
+const timeFieldName = "time";
+
+/*
+ * Creates a collection, populates it, runs the `query` and ensures that the result set
+ * is equal to `results`.
+ *
+ * If overflow is set we create a document with dates above the 32 bit range (year 2040)
+ * If underflow is set, we create a document with dates below the 32 bit range (year 1965)
+ */
+function runTest(underflow, overflow, query, results) {
+ // Setup our DB & our collections.
+ const tsColl = db.getCollection(jsTestName());
+ tsColl.drop();
+
+ assert.commandWorked(
+ db.createCollection(tsColl.getName(), {timeseries: {timeField: timeFieldName}}));
+
+ const dates = [
+ // If underflow, we want to insert a date that would fall below the epoch
+ // i.e. 1970-01-01 00:00:00 UTC. Otherwise we use a date within the epoch.
+ {[timeFieldName]: underflow ? new Date("1965-01-01") : new Date("1971-01-01")},
+ {[timeFieldName]: new Date("1975-01-01")},
+ {[timeFieldName]: new Date("1980-01-01")},
+ {[timeFieldName]: new Date("1995-01-01")},
+ // If overflow, we want to insert a date that would use more than 32 bit milliseconds after
+ // the epoch. This overflow will occur 2038-01-29 03:13:07 UTC. Otherwise we go slightly
+ // before the end of the 32 bit epoch.
+ {[timeFieldName]: overflow ? new Date("2040-01-01") : new Date("2030-01-01")}
+ ];
+ assert.commandWorked(tsColl.insert(dates));
+
+ // Make sure the expected results are in the correct order for comparison below.
+ function cmpTimeFields(a, b) {
+ return (b[timeFieldName].getTime() - a[timeFieldName].getTime());
+ }
+ results.sort(cmpTimeFields);
+
+ const pipeline = [{$match: query}, {$project: {_id: 0, [timeFieldName]: 1}}];
+
+ // Verify agg pipeline. We don't want to go through a plan that encourages a sort order to
+ // avoid BUS and index selection, so we sort after gathering the results.
+ const aggActuals = tsColl.aggregate(pipeline).toArray();
+ aggActuals.sort(cmpTimeFields);
+ assert.docEq(aggActuals, results);
+
+ // Verify the equivalent find command. We again don't want to go through a plan that
+ // encourages a sort order to avoid BUS and index selection, so we sort after gathering the
+ // results.
+ let findActuals = tsColl.find(query, {_id: 0, [timeFieldName]: 1}).toArray();
+ findActuals.sort(cmpTimeFields);
+ assert.docEq(findActuals, results);
+}
+
+runTest(false,
+ false,
+ {[timeFieldName]: {$eq: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1980-01-01")}]);
+runTest(false,
+ true,
+ {[timeFieldName]: {$eq: new Date("2040-01-01")}},
+ [{[timeFieldName]: new Date("2040-01-01")}]);
+runTest(true,
+ false,
+ {[timeFieldName]: {$eq: new Date("1965-01-01")}},
+ [{[timeFieldName]: new Date("1965-01-01")}]);
+
+runTest(false,
+ false,
+ {[timeFieldName]: {$lt: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1971-01-01")}, {[timeFieldName]: new Date("1975-01-01")}]);
+runTest(false,
+ true,
+ {[timeFieldName]: {$lt: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1971-01-01")}, {[timeFieldName]: new Date("1975-01-01")}]);
+runTest(true,
+ false,
+ {[timeFieldName]: {$lt: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1965-01-01")}, {[timeFieldName]: new Date("1975-01-01")}]);
+runTest(true,
+ true,
+ {[timeFieldName]: {$lt: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1965-01-01")}, {[timeFieldName]: new Date("1975-01-01")}]);
+
+runTest(false,
+ false,
+ {[timeFieldName]: {$gt: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1995-01-01")}, {[timeFieldName]: new Date("2030-01-01")}]);
+runTest(false,
+ true,
+ {[timeFieldName]: {$gt: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1995-01-01")}, {[timeFieldName]: new Date("2040-01-01")}]);
+runTest(true,
+ false,
+ {[timeFieldName]: {$gt: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1995-01-01")}, {[timeFieldName]: new Date("2030-01-01")}]);
+runTest(true,
+ true,
+ {[timeFieldName]: {$gt: new Date("1980-01-01")}},
+ [{[timeFieldName]: new Date("1995-01-01")}, {[timeFieldName]: new Date("2040-01-01")}]);
+
+runTest(false, false, {[timeFieldName]: {$lte: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1971-01-01")},
+ {[timeFieldName]: new Date("1975-01-01")},
+ {[timeFieldName]: new Date("1980-01-01")}
+]);
+runTest(false, true, {[timeFieldName]: {$lte: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1971-01-01")},
+ {[timeFieldName]: new Date("1975-01-01")},
+ {[timeFieldName]: new Date("1980-01-01")}
+]);
+runTest(true, false, {[timeFieldName]: {$lte: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1965-01-01")},
+ {[timeFieldName]: new Date("1975-01-01")},
+ {[timeFieldName]: new Date("1980-01-01")}
+]);
+runTest(true, true, {[timeFieldName]: {$lte: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1965-01-01")},
+ {[timeFieldName]: new Date("1975-01-01")},
+ {[timeFieldName]: new Date("1980-01-01")}
+]);
+
+runTest(false, false, {[timeFieldName]: {$gte: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1980-01-01")},
+ {[timeFieldName]: new Date("1995-01-01")},
+ {[timeFieldName]: new Date("2030-01-01")}
+]);
+runTest(false, true, {[timeFieldName]: {$gte: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1980-01-01")},
+ {[timeFieldName]: new Date("1995-01-01")},
+ {[timeFieldName]: new Date("2040-01-01")}
+]);
+runTest(true, false, {[timeFieldName]: {$gte: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1980-01-01")},
+ {[timeFieldName]: new Date("1995-01-01")},
+ {[timeFieldName]: new Date("2030-01-01")}
+]);
+runTest(true, true, {[timeFieldName]: {$gte: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1980-01-01")},
+ {[timeFieldName]: new Date("1995-01-01")},
+ {[timeFieldName]: new Date("2040-01-01")}
+]);
+
+// Verify ranges that straddle the lower epoch work properly
+runTest(
+ true, false, {[timeFieldName]: {$gt: new Date("1920-01-01"), $lt: new Date("1980-01-01")}}, [
+ {[timeFieldName]: new Date("1965-01-01")},
+ {[timeFieldName]: new Date("1975-01-01")},
+ ]);
+
+runTest(
+ false, true, {[timeFieldName]: {$gt: new Date("1980-01-01"), $lt: new Date("2050-01-01")}}, [
+ {[timeFieldName]: new Date("1995-01-01")},
+ {[timeFieldName]: new Date("2040-01-01")},
+ ]);
+
+// TODO: SERVER-69952 Literals outside the epoch are currently compared to _id, generally,
+// so we cannot match against them. This will have to be fixed in a similar manner by determining
+// whether the compared dates can be outside the epoch range and not relying on _id in that case.
+//
+// The following scenarios fail:
+// runTest(
+// false, false, {[timeFieldName]: {$gt: new Date("1920-01-01"), $lt: new Date("1980-01-01")}}, [
+// {[timeFieldName]: new Date("1971-01-01")},
+// {[timeFieldName]: new Date("1975-01-01")},
+// ]);
+// runTest(
+// false, false, {[timeFieldName]: {$gt: new Date("1980-01-01"), $lt: new Date("2050-01-01")}},
+// [
+// {[timeFieldName]: new Date("1995-01-01")},
+// {[timeFieldName]: new Date("2030-01-01")},
+// ]);
+})(); \ No newline at end of file
diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp
index 37847fc80c4..fd3e790ba5a 100644
--- a/src/mongo/db/exec/bucket_unpacker.cpp
+++ b/src/mongo/db/exec/bucket_unpacker.cpp
@@ -282,8 +282,9 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
// For $eq, make both a $lte against 'control.min' and a $gte predicate against
// 'control.max'.
//
- // If the comparison is against the 'time' field, include a predicate against the _id
- // field which is converted to the maximum for the corresponding range of ObjectIds and
+ // If the comparison is against the 'time' field and we haven't stored a time outside of
+ // the 32 bit range, include a predicate against the _id field which is converted to
+ // the maximum for the corresponding range of ObjectIds and
// is adjusted by the max range for a bucket to approximate the max bucket value given
// the min. Also include a predicate against the _id field which is converted to the
// minimum for the range of ObjectIds corresponding to the given date. In
@@ -293,60 +294,79 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
//
// The same procedure applies to aggregation expressions of the form
// {$expr: {$eq: [...]}} that can be rewritten to use $_internalExprEq.
- return isTimeField
- ? makePredicate(
- MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData),
- MatchExprPredicate<InternalExprGTEMatchExpression>(minPath,
- minTime.firstElement()),
- MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData),
- MatchExprPredicate<InternalExprLTEMatchExpression>(maxPath,
- maxTime.firstElement()),
- MatchExprPredicate<LTEMatchExpression, Value>(
- kBucketIdFieldName,
- constructObjectIdValue<LTEMatchExpression>(matchExprData,
- bucketMaxSpanSeconds)),
- MatchExprPredicate<GTEMatchExpression, Value>(
- kBucketIdFieldName,
- constructObjectIdValue<GTEMatchExpression>(matchExprData,
- bucketMaxSpanSeconds)))
- : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
- makePredicate(MatchExprPredicate<InternalExprLTEMatchExpression>(
- minPath, matchExprData),
- MatchExprPredicate<InternalExprGTEMatchExpression>(
- maxPath, matchExprData)),
- createTypeEqualityPredicate(
- pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ if (!isTimeField) {
+ return makeOr(makeVector<std::unique_ptr<MatchExpression>>(
+ makePredicate(
+ MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData),
+ MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData)),
+ createTypeEqualityPredicate(pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ } else if (bucketSpec.usesExtendedRange()) {
+ return makePredicate(
+ MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData),
+ MatchExprPredicate<InternalExprGTEMatchExpression>(minPath,
+ minTime.firstElement()),
+ MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData),
+ MatchExprPredicate<InternalExprLTEMatchExpression>(maxPath,
+ maxTime.firstElement()));
+ } else {
+ return makePredicate(
+ MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData),
+ MatchExprPredicate<InternalExprGTEMatchExpression>(minPath,
+ minTime.firstElement()),
+ MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData),
+ MatchExprPredicate<InternalExprLTEMatchExpression>(maxPath,
+ maxTime.firstElement()),
+ MatchExprPredicate<LTEMatchExpression, Value>(
+ kBucketIdFieldName,
+ constructObjectIdValue<LTEMatchExpression>(matchExprData,
+ bucketMaxSpanSeconds)),
+ MatchExprPredicate<GTEMatchExpression, Value>(
+ kBucketIdFieldName,
+ constructObjectIdValue<GTEMatchExpression>(matchExprData,
+ bucketMaxSpanSeconds)));
+ }
+ MONGO_UNREACHABLE_TASSERT(6646903);
case MatchExpression::GT:
case MatchExpression::INTERNAL_EXPR_GT:
// For $gt, make a $gt predicate against 'control.max'. In addition, if the comparison
- // is against the 'time' field, include a predicate against the _id field which is
- // converted to the maximum for the corresponding range of ObjectIds and is adjusted
- // by the max range for a bucket to approximate the max bucket value given the min. In
- // addition, we include a {'control.min' : {$gt: 'time - bucketMaxSpanSeconds'}}
+ // is against the 'time' field, and the collection doesn't contain times outside the
+ // 32 bit range, include a predicate against the _id field which is converted to the
+ // maximum for the corresponding range of ObjectIds and is adjusted by the max range
+ // for a bucket to approximate the max bucket value given the min.
+ //
+ // In addition, we include a {'control.min' : {$gt: 'time - bucketMaxSpanSeconds'}}
// predicate which will be helpful in reducing bounds for index scans on 'time' field
// and routing on mongos.
//
// The same procedure applies to aggregation expressions of the form
// {$expr: {$gt: [...]}} that can be rewritten to use $_internalExprGt.
- return isTimeField
- ? makePredicate(
- MatchExprPredicate<InternalExprGTMatchExpression>(maxPath, matchExprData),
- MatchExprPredicate<InternalExprGTMatchExpression>(minPath,
- minTime.firstElement()),
- MatchExprPredicate<GTMatchExpression, Value>(
- kBucketIdFieldName,
- constructObjectIdValue<GTMatchExpression>(matchExprData,
- bucketMaxSpanSeconds)))
- : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
- std::make_unique<InternalExprGTMatchExpression>(maxPath, matchExprData),
- createTypeEqualityPredicate(
- pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ if (!isTimeField) {
+ return makeOr(makeVector<std::unique_ptr<MatchExpression>>(
+ std::make_unique<InternalExprGTMatchExpression>(maxPath, matchExprData),
+ createTypeEqualityPredicate(pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ } else if (bucketSpec.usesExtendedRange()) {
+ return makePredicate(
+ MatchExprPredicate<InternalExprGTMatchExpression>(maxPath, matchExprData),
+ MatchExprPredicate<InternalExprGTMatchExpression>(minPath,
+ minTime.firstElement()));
+ } else {
+ return makePredicate(
+ MatchExprPredicate<InternalExprGTMatchExpression>(maxPath, matchExprData),
+ MatchExprPredicate<InternalExprGTMatchExpression>(minPath,
+ minTime.firstElement()),
+ MatchExprPredicate<GTMatchExpression, Value>(
+ kBucketIdFieldName,
+ constructObjectIdValue<GTMatchExpression>(matchExprData,
+ bucketMaxSpanSeconds)));
+ }
+ MONGO_UNREACHABLE_TASSERT(6646904);
case MatchExpression::GTE:
case MatchExpression::INTERNAL_EXPR_GTE:
// For $gte, make a $gte predicate against 'control.max'. In addition, if the comparison
- // is against the 'time' field, include a predicate against the _id field which is
+ // is against the 'time' field, and the collection doesn't contain times outside the
+ // 32 bit range, include a predicate against the _id field which is
// converted to the minimum for the corresponding range of ObjectIds and is adjusted
// by the max range for a bucket to approximate the max bucket value given the min. In
// addition, we include a {'control.min' : {$gte: 'time - bucketMaxSpanSeconds'}}
@@ -355,49 +375,67 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
//
// The same procedure applies to aggregation expressions of the form
// {$expr: {$gte: [...]}} that can be rewritten to use $_internalExprGte.
- return isTimeField
- ? makePredicate(
- MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData),
- MatchExprPredicate<InternalExprGTEMatchExpression>(minPath,
- minTime.firstElement()),
- MatchExprPredicate<GTEMatchExpression, Value>(
- kBucketIdFieldName,
- constructObjectIdValue<GTEMatchExpression>(matchExprData,
- bucketMaxSpanSeconds)))
- : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
- std::make_unique<InternalExprGTEMatchExpression>(maxPath, matchExprData),
- createTypeEqualityPredicate(
- pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ if (!isTimeField) {
+ return makeOr(makeVector<std::unique_ptr<MatchExpression>>(
+ std::make_unique<InternalExprGTEMatchExpression>(maxPath, matchExprData),
+ createTypeEqualityPredicate(pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ } else if (bucketSpec.usesExtendedRange()) {
+ return makePredicate(
+ MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData),
+ MatchExprPredicate<InternalExprGTEMatchExpression>(minPath,
+ minTime.firstElement()));
+ } else {
+ return makePredicate(
+ MatchExprPredicate<InternalExprGTEMatchExpression>(maxPath, matchExprData),
+ MatchExprPredicate<InternalExprGTEMatchExpression>(minPath,
+ minTime.firstElement()),
+ MatchExprPredicate<GTEMatchExpression, Value>(
+ kBucketIdFieldName,
+ constructObjectIdValue<GTEMatchExpression>(matchExprData,
+ bucketMaxSpanSeconds)));
+ }
+ MONGO_UNREACHABLE_TASSERT(6646905);
case MatchExpression::LT:
case MatchExpression::INTERNAL_EXPR_LT:
// For $lt, make a $lt predicate against 'control.min'. In addition, if the comparison
// is against the 'time' field, include a predicate against the _id field which is
- // converted to the minimum for the corresponding range of ObjectIds. In
- // addition, we include a {'control.max' : {$lt: 'time + bucketMaxSpanSeconds'}}
+ // converted to the minimum for the corresponding range of ObjectIds, unless the
+ // collection contain extended range dates which won't fit int the 32 bits allocated
+ // for _id.
+ //
+ // In addition, we include a {'control.max' : {$lt: 'time + bucketMaxSpanSeconds'}}
// predicate which will be helpful in reducing bounds for index scans on 'time' field
// and routing on mongos.
//
// The same procedure applies to aggregation expressions of the form
// {$expr: {$lt: [...]}} that can be rewritten to use $_internalExprLt.
- return isTimeField
- ? makePredicate(
- MatchExprPredicate<InternalExprLTMatchExpression>(minPath, matchExprData),
- MatchExprPredicate<InternalExprLTMatchExpression>(maxPath,
- maxTime.firstElement()),
- MatchExprPredicate<LTMatchExpression, Value>(
- kBucketIdFieldName,
- constructObjectIdValue<LTMatchExpression>(matchExprData,
- bucketMaxSpanSeconds)))
- : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
- std::make_unique<InternalExprLTMatchExpression>(minPath, matchExprData),
- createTypeEqualityPredicate(
- pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ if (!isTimeField) {
+ return makeOr(makeVector<std::unique_ptr<MatchExpression>>(
+ std::make_unique<InternalExprLTMatchExpression>(minPath, matchExprData),
+ createTypeEqualityPredicate(pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ } else if (bucketSpec.usesExtendedRange()) {
+ return makePredicate(
+ MatchExprPredicate<InternalExprLTMatchExpression>(minPath, matchExprData),
+ MatchExprPredicate<InternalExprLTMatchExpression>(maxPath,
+ maxTime.firstElement()));
+ } else {
+ return makePredicate(
+ MatchExprPredicate<InternalExprLTMatchExpression>(minPath, matchExprData),
+ MatchExprPredicate<InternalExprLTMatchExpression>(maxPath,
+ maxTime.firstElement()),
+ MatchExprPredicate<LTMatchExpression, Value>(
+ kBucketIdFieldName,
+ constructObjectIdValue<LTMatchExpression>(matchExprData,
+ bucketMaxSpanSeconds)));
+ }
+ MONGO_UNREACHABLE_TASSERT(6646906);
case MatchExpression::LTE:
case MatchExpression::INTERNAL_EXPR_LTE:
// For $lte, make a $lte predicate against 'control.min'. In addition, if the comparison
- // is against the 'time' field, include a predicate against the _id field which is
+ // is against the 'time' field, and the collection doesn't contain times outside the
+ // 32 bit range, include a predicate against the _id field which is
// converted to the maximum for the corresponding range of ObjectIds. In
// addition, we include a {'control.max' : {$lte: 'time + bucketMaxSpanSeconds'}}
// predicate which will be helpful in reducing bounds for index scans on 'time' field
@@ -405,19 +443,26 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
//
// The same procedure applies to aggregation expressions of the form
// {$expr: {$lte: [...]}} that can be rewritten to use $_internalExprLte.
- return isTimeField
- ? makePredicate(
- MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData),
- MatchExprPredicate<InternalExprLTEMatchExpression>(maxPath,
- maxTime.firstElement()),
- MatchExprPredicate<LTEMatchExpression, Value>(
- kBucketIdFieldName,
- constructObjectIdValue<LTEMatchExpression>(matchExprData,
- bucketMaxSpanSeconds)))
- : makeOr(makeVector<std::unique_ptr<MatchExpression>>(
- std::make_unique<InternalExprLTEMatchExpression>(minPath, matchExprData),
- createTypeEqualityPredicate(
- pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ if (!isTimeField) {
+ return makeOr(makeVector<std::unique_ptr<MatchExpression>>(
+ std::make_unique<InternalExprLTEMatchExpression>(minPath, matchExprData),
+ createTypeEqualityPredicate(pExpCtx, matchExprPath, assumeNoMixedSchemaData)));
+ } else if (bucketSpec.usesExtendedRange()) {
+ return makePredicate(
+ MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData),
+ MatchExprPredicate<InternalExprLTEMatchExpression>(maxPath,
+ maxTime.firstElement()));
+ } else {
+ return makePredicate(
+ MatchExprPredicate<InternalExprLTEMatchExpression>(minPath, matchExprData),
+ MatchExprPredicate<InternalExprLTEMatchExpression>(maxPath,
+ maxTime.firstElement()),
+ MatchExprPredicate<LTEMatchExpression, Value>(
+ kBucketIdFieldName,
+ constructObjectIdValue<LTEMatchExpression>(matchExprData,
+ bucketMaxSpanSeconds)));
+ }
+ MONGO_UNREACHABLE_TASSERT(6646907);
default:
MONGO_UNREACHABLE_TASSERT(5348302);
@@ -1009,12 +1054,14 @@ std::size_t BucketUnpackerV2::numberOfFields() {
BucketSpec::BucketSpec(const std::string& timeField,
const boost::optional<std::string>& metaField,
const std::set<std::string>& fields,
- const std::set<std::string>& computedProjections)
+ const std::set<std::string>& computedProjections,
+ bool usesExtendedRange)
: _fieldSet(fields),
_computedMetaProjFields(computedProjections),
_timeField(timeField),
_timeFieldHashed(FieldNameHasher().hashedFieldName(_timeField)),
- _metaField(metaField) {
+ _metaField(metaField),
+ _usesExtendedRange(usesExtendedRange) {
if (_metaField) {
_metaFieldHashed = FieldNameHasher().hashedFieldName(*_metaField);
}
@@ -1025,7 +1072,8 @@ BucketSpec::BucketSpec(const BucketSpec& other)
_computedMetaProjFields(other._computedMetaProjFields),
_timeField(other._timeField),
_timeFieldHashed(HashedFieldName{_timeField, other._timeFieldHashed->hash()}),
- _metaField(other._metaField) {
+ _metaField(other._metaField),
+ _usesExtendedRange(other._usesExtendedRange) {
if (_metaField) {
_metaFieldHashed = HashedFieldName{*_metaField, other._metaFieldHashed->hash()};
}
@@ -1036,7 +1084,8 @@ BucketSpec::BucketSpec(BucketSpec&& other)
_computedMetaProjFields(std::move(other._computedMetaProjFields)),
_timeField(std::move(other._timeField)),
_timeFieldHashed(HashedFieldName{_timeField, other._timeFieldHashed->hash()}),
- _metaField(std::move(other._metaField)) {
+ _metaField(std::move(other._metaField)),
+ _usesExtendedRange(other._usesExtendedRange) {
if (_metaField) {
_metaFieldHashed = HashedFieldName{*_metaField, other._metaFieldHashed->hash()};
}
@@ -1052,6 +1101,7 @@ BucketSpec& BucketSpec::operator=(const BucketSpec& other) {
if (_metaField) {
_metaFieldHashed = HashedFieldName{*_metaField, other._metaFieldHashed->hash()};
}
+ _usesExtendedRange = other._usesExtendedRange;
}
return *this;
}
diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h
index 7e32629407d..8f7f8210618 100644
--- a/src/mongo/db/exec/bucket_unpacker.h
+++ b/src/mongo/db/exec/bucket_unpacker.h
@@ -58,7 +58,8 @@ public:
BucketSpec(const std::string& timeField,
const boost::optional<std::string>& metaField,
const std::set<std::string>& fields = {},
- const std::set<std::string>& computedProjections = {});
+ const std::set<std::string>& computedProjections = {},
+ bool usesExtendedRange = false);
BucketSpec(const BucketSpec&);
BucketSpec(BucketSpec&&);
@@ -104,6 +105,14 @@ public:
_computedMetaProjFields.erase(field);
}
+ void setUsesExtendedRange(bool usesExtendedRange) {
+ _usesExtendedRange = usesExtendedRange;
+ }
+
+ bool usesExtendedRange() const {
+ return _usesExtendedRange;
+ }
+
// Returns whether 'field' depends on a pushed down $addFields or computed $project.
bool fieldIsComputed(StringData field) const;
@@ -197,6 +206,7 @@ private:
boost::optional<std::string> _metaField = boost::none;
boost::optional<HashedFieldName> _metaFieldHashed = boost::none;
+ bool _usesExtendedRange = false;
};
/**
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index a950aeee4f4..9ef6d892a51 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -246,6 +246,7 @@ DocumentSourceInternalUnpackBucket::DocumentSourceInternalUnpackBucket(
bool assumeNoMixedSchemaData)
: DocumentSource(kStageNameInternal, expCtx),
_assumeNoMixedSchemaData(assumeNoMixedSchemaData),
+
_bucketUnpacker(std::move(bucketUnpacker)),
_bucketMaxSpanSeconds{bucketMaxSpanSeconds} {}
@@ -353,6 +354,12 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
<< " field must be a bool, got: " << elem.type(),
elem.type() == BSONType::Bool);
bucketSpec.includeMaxTimeAsMetadata = elem.boolean();
+ } else if (fieldName == kUsesExtendedRange) {
+ uassert(6646901,
+ str::stream() << kUsesExtendedRange
+ << " field must be a bool, got: " << elem.type(),
+ elem.type() == BSONType::Bool);
+ bucketSpec.setUsesExtendedRange(elem.boolean());
} else {
uasserted(5346506,
str::stream()
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
index 76cb9c7ca5f..4dd93046533 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
@@ -47,6 +47,7 @@ public:
static constexpr StringData kInclude = "include"_sd;
static constexpr StringData kExclude = "exclude"_sd;
static constexpr StringData kAssumeNoMixedSchemaData = "assumeNoMixedSchemaData"_sd;
+ static constexpr StringData kUsesExtendedRange = "usesExtendedRange"_sd;
static constexpr StringData kBucketMaxSpanSeconds = "bucketMaxSpanSeconds"_sd;
static constexpr StringData kIncludeMinTimeAsMetadata = "includeMinTimeAsMetadata"_sd;
static constexpr StringData kIncludeMaxTimeAsMetadata = "includeMaxTimeAsMetadata"_sd;
@@ -250,6 +251,10 @@ private:
// predicates in order to ensure correctness.
bool _assumeNoMixedSchemaData = false;
+ // If any bucket contains dates outside the range of 1970-2038, we are unable to rely on
+ // the _id index, as _id is truncates to 32 bits
+ bool _usesExtendedRange = false;
+
BucketUnpacker _bucketUnpacker;
int _bucketMaxSpanSeconds;
diff --git a/src/mongo/db/views/resolved_view.cpp b/src/mongo/db/views/resolved_view.cpp
index 77577aba02d..4c864be4696 100644
--- a/src/mongo/db/views/resolved_view.cpp
+++ b/src/mongo/db/views/resolved_view.cpp
@@ -91,11 +91,22 @@ ResolvedView ResolvedView::fromBSON(const BSONObj& commandResponseObj) {
mixedSchema = boost::optional<bool>(mixedSchemaElem.boolean());
}
+ boost::optional<bool> usesExtendedRange = boost::none;
+ if (auto usesExtendedRangeElem = viewDef[kTimeseriesUsesExtendedRange]) {
+ uassert(6646910,
+ str::stream() << "view definition must have " << kTimeseriesUsesExtendedRange
+ << " of type bool or no such field",
+ usesExtendedRangeElem.type() == BSONType::Bool);
+
+ usesExtendedRange = boost::optional<bool>(usesExtendedRangeElem.boolean());
+ }
+
return {NamespaceString(viewDef["ns"].valueStringData()),
std::move(pipeline),
std::move(collationSpec),
std::move(timeseriesOptions),
- std::move(mixedSchema)};
+ std::move(mixedSchema),
+ std::move(usesExtendedRange)};
}
void ResolvedView::serialize(BSONObjBuilder* builder) const {
@@ -109,6 +120,10 @@ void ResolvedView::serialize(BSONObjBuilder* builder) const {
// Only serialize if it doesn't contain mixed data.
if ((_timeseriesMayContainMixedData && !(*_timeseriesMayContainMixedData)))
subObj.append(kTimeseriesMayContainMixedData, *_timeseriesMayContainMixedData);
+
+ if ((_timeseriesUsesExtendedRange && (*_timeseriesUsesExtendedRange)))
+ subObj.append(kTimeseriesUsesExtendedRange, *_timeseriesUsesExtendedRange);
+
if (!_defaultCollation.isEmpty()) {
subObj.append("collation", _defaultCollation);
}
@@ -151,6 +166,7 @@ AggregateCommandRequest ResolvedView::asExpandedViewAggregation(
builder.append(elem);
}
}
+
resolvedPipeline[1] =
BSON(DocumentSourceInternalConvertBucketIndexStats::kStageName << builder.obj());
} else if (resolvedPipeline.size() >= 1 &&
@@ -164,6 +180,10 @@ AggregateCommandRequest ResolvedView::asExpandedViewAggregation(
}
builder.append(DocumentSourceInternalUnpackBucket::kAssumeNoMixedSchemaData,
((_timeseriesMayContainMixedData && !(*_timeseriesMayContainMixedData))));
+
+ builder.append(DocumentSourceInternalUnpackBucket::kUsesExtendedRange,
+ ((_timeseriesUsesExtendedRange && *_timeseriesUsesExtendedRange)));
+
resolvedPipeline[0] =
BSON(DocumentSourceInternalUnpackBucket::kStageNameInternal << builder.obj());
}
@@ -190,6 +210,9 @@ AggregateCommandRequest ResolvedView::asExpandedViewAggregation(
}
}
expandedRequest.setHint(rewritten);
+
+ // JLR Possibly need to handle inappropriately forcing clustered here. Will likely be
+ // handled by removing the _id predicate, however.
} else {
expandedRequest.setHint(request.getHint());
}
diff --git a/src/mongo/db/views/resolved_view.h b/src/mongo/db/views/resolved_view.h
index c0b3ae3d503..82238556288 100644
--- a/src/mongo/db/views/resolved_view.h
+++ b/src/mongo/db/views/resolved_view.h
@@ -48,12 +48,14 @@ public:
std::vector<BSONObj> pipeline,
BSONObj defaultCollation,
boost::optional<TimeseriesOptions> timeseriesOptions = boost::none,
- boost::optional<bool> timeseriesMayContainMixedData = boost::none)
+ boost::optional<bool> timeseriesMayContainMixedData = boost::none,
+ boost::optional<bool> timeseriesUsesExtendedRange = boost::none)
: _namespace(collectionNs),
_pipeline(std::move(pipeline)),
_defaultCollation(std::move(defaultCollation)),
_timeseriesOptions(timeseriesOptions),
- _timeseriesMayContainMixedData(timeseriesMayContainMixedData) {}
+ _timeseriesMayContainMixedData(timeseriesMayContainMixedData),
+ _timeseriesUsesExtendedRange(timeseriesUsesExtendedRange) {}
static ResolvedView fromBSON(const BSONObj& commandResponseObj);
@@ -80,6 +82,8 @@ public:
static constexpr auto code = ErrorCodes::CommandOnShardedViewNotSupportedOnMongod;
static constexpr StringData kTimeseriesMayContainMixedData = "timeseriesMayContainMixedData"_sd;
static constexpr StringData kTimeseriesOptions = "timeseriesOptions"_sd;
+ static constexpr StringData kTimeseriesUsesExtendedRange = "timeseriesUsesExtendedRange"_sd;
+
void serialize(BSONObjBuilder* bob) const final;
static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj&);
@@ -97,6 +101,7 @@ private:
boost::optional<TimeseriesOptions> _timeseriesOptions;
boost::optional<bool> _timeseriesMayContainMixedData;
+ boost::optional<bool> _timeseriesUsesExtendedRange;
};
} // namespace mongo
diff --git a/src/mongo/db/views/view_catalog_helpers.cpp b/src/mongo/db/views/view_catalog_helpers.cpp
index 1dbda87d45c..4e47c0900eb 100644
--- a/src/mongo/db/views/view_catalog_helpers.cpp
+++ b/src/mongo/db/views/view_catalog_helpers.cpp
@@ -150,6 +150,7 @@ StatusWith<ResolvedView> resolveView(OperationContext* opCtx,
int depth = 0;
boost::optional<bool> mixedData = boost::none;
boost::optional<TimeseriesOptions> tsOptions = boost::none;
+ boost::optional<bool> hasExtendedRange = boost::none;
for (; depth < ViewGraph::kMaxViewDepth; depth++) {
auto view = catalog->lookupView(opCtx, *resolvedNss);
@@ -173,7 +174,8 @@ StatusWith<ResolvedView> resolveView(OperationContext* opCtx,
std::move(resolvedPipeline),
collation ? std::move(collation.value()) : CollationSpec::kSimpleSpec,
tsOptions,
- mixedData});
+ mixedData,
+ hasExtendedRange});
}
resolvedNss = &view->viewOn();
@@ -193,6 +195,7 @@ StatusWith<ResolvedView> resolveView(OperationContext* opCtx,
if (tsCollection) {
mixedData = tsCollection->getTimeseriesBucketsMayHaveMixedSchemaData();
tsOptions = tsCollection->getTimeseriesOptions();
+ hasExtendedRange = tsCollection->getRequiresTimeseriesExtendedRangeSupport();
}
}