summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Redman <77073003+joredman@users.noreply.github.com>2022-10-18 23:43:27 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-01-31 22:00:24 +0000
commit2b65016588fc5f868b0e396b64d1fe9da916a343 (patch)
treef052351f52e1e8402f117e1ea9326dda88017999
parentaedaf14d6dcc60253f3944252eb5cfc588e08ecb (diff)
downloadmongo-2b65016588fc5f868b0e396b64d1fe9da916a343.tar.gz
SERVER-66793 Use explicit sort when extended range dates are used
-rw-r--r--jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js226
-rw-r--r--src/mongo/db/pipeline/pipeline_d.cpp15
2 files changed, 239 insertions, 2 deletions
diff --git a/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js b/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js
new file mode 100644
index 00000000000..7e0d7f2d91f
--- /dev/null
+++ b/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js
@@ -0,0 +1,226 @@
+/**
+ * Test that sort queries work properly on dates ouside the 32 bit epoch range,
+ * [1970-01-01 00:00:00 UTC - 2038-01-29 03:13:07 UTC], when a collection scan is used.
+ *
+ * @tags: [
+ * # Explain of a resolved view must be executed by mongos.
+ * directly_against_shardsvrs_incompatible,
+ * # This complicates aggregation extraction.
+ * do_not_wrap_aggregations_in_facets,
+ * # Refusing to run a test that issues an aggregation command with explain because it may
+ * # return incomplete results if interrupted by a stepdown.
+ * does_not_support_stepdowns,
+ * # We need a timeseries collection.
+ * requires_timeseries,
+ * # Cannot insert into a time-series collection in a multi-document transaction.
+ * does_not_support_transactions,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/aggregation/extras/utils.js"); // For getExplainedPipelineFromAggregation.
+load("jstests/core/timeseries/libs/timeseries.js");
+load('jstests/libs/analyze_plan.js');
+
+if (!TimeseriesTest.bucketUnpackWithSortEnabled(db.getMongo())) {
+ jsTestLog("Skipping test because 'BucketUnpackWithSort' is disabled.");
+ return;
+}
+
+const timeFieldName = "t";
+
+// Create unindexed collection
+const coll = db.timeseries_internal_bounded_sort;
+const buckets = db['system.buckets.' + coll.getName()];
+coll.drop();
+assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField: 't'}}));
+
+// Create collection indexed on time
+const collIndexed = db.timeseries_internal_bounded_sort_with_index;
+const bucketsIndexed = db['system.buckets.' + collIndexed.getName()];
+collIndexed.drop();
+
+assert.commandWorked(db.createCollection(collIndexed.getName(), {timeseries: {timeField: 't'}}));
+assert.commandWorked(collIndexed.createIndex({'t': 1}));
+
+jsTestLog(collIndexed.getIndexes());
+jsTestLog(bucketsIndexed.getIndexes());
+
+const intervalMillis = 60000;
+function insertBucket(start) {
+ jsTestLog("Inserting bucket starting with " + Date(start).toString());
+
+ const batchSize = 1000;
+
+ const batch =
+ Array.from({length: batchSize}, (_, j) => ({t: new Date(start + j * intervalMillis)}));
+
+ assert.commandWorked(coll.insert(batch));
+ assert.commandWorked(collIndexed.insert(batch));
+}
+
+// Insert some data. We'll insert 5 buckets in each range, with values < 0,
+// values between 0 and FFFFFFFF(unsigned), and values > FFFFFFFF. It turns out, however,
+// that Javascript's Date doesn't handle dates beyond 2039 either, so we rely on lower dates
+// to test for unexpected behavior.
+function insertDocuments() {
+ // We want to choose the underflow and overflow lower bits in such a way that we
+ // encourage wrong results when the upper bytes are removed.
+ const underflowMin = new Date("1969-01-01").getTime(); // Day before the 32 bit epoch
+ const normalMin = new Date("2002-01-01").getTime(); // Middle of the 32 bit epoch
+
+ insertBucket(underflowMin);
+
+ var numBatches = 5;
+
+ const batchOffset = Math.floor(intervalMillis / (numBatches + 1));
+ for (let i = 0; i < numBatches; ++i) {
+ const start = normalMin + i * batchOffset;
+ insertBucket(start);
+ }
+ assert.gt(buckets.aggregate([{$count: 'n'}]).next().n, 1, 'Expected more than one bucket');
+}
+
+insertDocuments();
+
+const unpackStage = getAggPlanStages(coll.explain().aggregate(), '$_internalUnpackBucket')[0];
+
+function assertSorted(result, ascending) {
+ let prev = ascending ? {t: -Infinity} : {t: Infinity};
+ for (const doc of result) {
+ if (ascending) {
+ assert.lt(+prev.t,
+ +doc.t,
+ 'Found two docs not in ascending time order: ' + tojson({prev, doc}));
+ } else {
+ assert.gt(+prev.t,
+ +doc.t,
+ 'Found two docs not in descending time order: ' + tojson({prev, doc}));
+ }
+
+ prev = doc;
+ }
+}
+
+function checkAgainstReferenceBoundedSortUnexpected(
+ collection, reference, pipeline, hint, sortOrder) {
+ const options = hint ? {hint: hint} : {};
+
+ const opt = collection.aggregate(pipeline, options).toArray();
+ assertSorted(opt, sortOrder);
+
+ assert.eq(reference.length, opt.length);
+ for (var i = 0; i < opt.length; ++i) {
+ assert.docEq(reference[i], opt[i]);
+ }
+
+ const plan = collection.explain({}).aggregate(pipeline, options);
+ const stages = getAggPlanStages(plan, "$_internalBoundedSort");
+ assert.eq([], stages, plan);
+}
+
+function checkAgainstReferenceBoundedSortExpected(
+ collection, reference, pipeline, hint, sortOrder) {
+ const options = hint ? {hint: hint} : {};
+
+ const opt = collection.aggregate(pipeline, options).toArray();
+ assertSorted(opt, sortOrder);
+
+ assert.eq(reference.length, opt.length);
+ for (var i = 0; i < opt.length; ++i) {
+ assert.docEq(reference[i], opt[i]);
+ }
+
+ const plan = collection.explain({}).aggregate(pipeline, options);
+ const stages = getAggPlanStages(plan, "$_internalBoundedSort");
+ assert.neq([], stages, plan);
+}
+
+function runTest(ascending) {
+ const reference = buckets
+ .aggregate([
+ unpackStage,
+ {$_internalInhibitOptimization: {}},
+ {$sort: {t: ascending ? 1 : -1}},
+ ])
+ .toArray();
+ assertSorted(reference, ascending);
+
+ // Check plan using collection scan
+ checkAgainstReferenceBoundedSortUnexpected(coll,
+ reference,
+ [
+ {$sort: {t: ascending ? 1 : -1}},
+ ],
+ {},
+ ascending);
+
+ // Check plan using hinted collection scan
+ checkAgainstReferenceBoundedSortUnexpected(coll,
+ reference,
+ [
+ {$sort: {t: ascending ? 1 : -1}},
+ ],
+ {"$natural": ascending ? 1 : -1},
+ ascending);
+
+ const referenceIndexed = bucketsIndexed
+ .aggregate([
+ unpackStage,
+ {$_internalInhibitOptimization: {}},
+ {$sort: {t: ascending ? 1 : -1}},
+ ])
+ .toArray();
+ assertSorted(referenceIndexed, ascending);
+
+ // Check plan using index scan. If we've inserted a date before 1-1-1970, we round the min up
+ // towards 1970, rather then down, which has the effect of increasing the control.min.t. This
+ // means the minimum time in the bucket is likely to be lower than indicated and thus, actual
+ // dates may be out of order relative to what's indicated by the bucket bounds.
+ checkAgainstReferenceBoundedSortUnexpected(collIndexed,
+ referenceIndexed,
+ [
+ {$sort: {t: ascending ? 1 : -1}},
+ ],
+ {},
+ ascending);
+
+ // Check plan using hinted index scan
+ checkAgainstReferenceBoundedSortUnexpected(collIndexed,
+ referenceIndexed,
+ [
+ {$sort: {t: ascending ? 1 : -1}},
+ ],
+ {"t": 1},
+ ascending);
+
+ // Check plan using hinted collection scan
+ checkAgainstReferenceBoundedSortUnexpected(collIndexed,
+ referenceIndexed,
+ [
+ {$sort: {t: ascending ? 1 : -1}},
+ ],
+ {"$natural": ascending ? 1 : -1},
+ ascending);
+
+ // The workaround in all cases is to create a reverse index on the time field, though
+ // it's necessary to force use of the reversed index.
+ const reverseIdxName = "reverseIdx";
+ collIndexed.createIndex({t: -1}, {name: reverseIdxName});
+
+ checkAgainstReferenceBoundedSortExpected(collIndexed,
+ referenceIndexed,
+ [
+ {$sort: {t: ascending ? 1 : -1}},
+ ],
+ {"t": -1},
+ ascending);
+
+ collIndexed.dropIndex(reverseIdxName);
+}
+
+runTest(false); // descending
+runTest(true); // ascending
+})();
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index dcad7e866f9..d2c09df5604 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -928,8 +928,10 @@ PipelineD::supportsSort(const BucketUnpacker& bucketUnpacker,
const CollectionScan* scan = static_cast<CollectionScan*>(root);
if (sort.size() == 1) {
auto part = sort[0];
- // Check the sort we're asking for is on time.
- if (part.fieldPath && *part.fieldPath == bucketUnpacker.getTimeField()) {
+ // Check the sort we're asking for is on time, and that the buckets are actually
+ // ordered on time.
+ if (part.fieldPath && *part.fieldPath == bucketUnpacker.getTimeField() &&
+ !bucketUnpacker.bucketSpec().usesExtendedRange()) {
// Check that the directions agree.
if ((scan->getDirection() == CollectionScanParams::Direction::FORWARD) ==
part.isAscending)
@@ -1036,6 +1038,15 @@ PipelineD::supportsSort(const BucketUnpacker& bucketUnpacker,
if (ixField != controlMinTime && ixField != controlMaxTime)
return boost::none;
+ // If we've inserted a date before 1-1-1970, we round the min up towards 1970,
+ // rather then down, which has the effect of increasing the control.min.t.
+ // This means the minimum time in the bucket is likely to be lower than
+ // indicated and thus, actual dates may be out of order relative to what's
+ // indicated by the bucket bounds.
+ if (ixField == controlMinTime &&
+ bucketUnpacker.bucketSpec().usesExtendedRange())
+ return boost::none;
+
if (!directionCompatible(*keyPatternIter, *sortIter))
return boost::none;