diff options
author | David Percy <david.percy@mongodb.com> | 2023-01-23 18:32:43 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-01-31 22:00:24 +0000 |
commit | e855f80487814a0dc008389c041e3d10e7e8dc17 (patch) | |
tree | 3f6339defcb05be266f3dd3837aa68f1e1e18006 | |
parent | cff019c9fe824ecfabbfdb35a1051c3f482210da (diff) | |
download | mongo-e855f80487814a0dc008389c041e3d10e7e8dc17.tar.gz |
SERVER-73110 Each shard should check extended-range for timeseries sorting
-rw-r--r-- | etc/backports_required_for_multiversion_tests.yml | 4 | ||||
-rw-r--r-- | jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js | 68 | ||||
-rw-r--r-- | src/mongo/db/commands/run_aggregate.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_context.h | 11 | ||||
-rw-r--r-- | src/mongo/db/query/multiple_collection_accessor.h | 11 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter.cpp | 3 |
7 files changed, 103 insertions, 14 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 4ce9d53b0d0..5cc3e03d326 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -246,6 +246,8 @@ last-continuous: ticket: SERVER-68648 - test_file: jstests/sharding/timeseries_cluster_indexstats.js ticket: SERVER-72620 + - test_file: jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js + ticket: SERVER-73110 suites: null last-lts: all: @@ -569,4 +571,6 @@ last-lts: ticket: SERVER-68648 - test_file: jstests/sharding/timeseries_cluster_indexstats.js ticket: SERVER-72620 + - test_file: jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js + ticket: SERVER-73110 suites: null diff --git a/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js b/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js index 9f7e846e13c..06b40a81537 100644 --- a/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js +++ b/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js @@ -47,6 +47,33 @@ assert.commandWorked(collIndexed.createIndex({'t': 1})); jsTestLog(collIndexed.getIndexes()); jsTestLog(bucketsIndexed.getIndexes()); +function numShards() { + return db.getSiblingDB('config').shards.count(); +} +for (const collection of [buckets, bucketsIndexed]) { + if (FixtureHelpers.isSharded(collection) && numShards() >= 2) { + // Split and move data to create an interesting scenario: we have some data on each shard, + // but all the extended-range data is on a non-primary shard. This means view resolution is + // unaware of the extended-range data, because that happens on the primary shard. + + const shards = db.getSiblingDB('config').shards.find().toArray(); + const [shardName0, shardName1] = shards.map(doc => doc._id); + + assert.commandWorked(db.adminCommand({movePrimary: db.getName(), to: shardName0})); + const collName = collection.getFullName(); + // Our example data has documents between 2000-2003, and these dates are non-wrapping. + // So this goes on the primary shard, and everything else goes on the non-primary. + assert.commandWorked(sh.splitAt(collName, {'control.min.t': ISODate('2000-01-01')})); + assert.commandWorked(sh.splitAt(collName, {'control.min.t': ISODate('2003-01-01')})); + assert.commandWorked( + sh.moveChunk(collName, {'control.min.t': ISODate('1969-01-01')}, shardName1)); + assert.commandWorked( + sh.moveChunk(collName, {'control.min.t': ISODate('2000-01-01')}, shardName0)); + assert.commandWorked( + sh.moveChunk(collName, {'control.min.t': ISODate('2003-01-01')}, shardName1)); + } +} + const intervalMillis = 60000; function insertBucket(start) { jsTestLog("Inserting bucket starting with " + Date(start).toString()); @@ -67,7 +94,7 @@ function insertBucket(start) { function insertDocuments() { // We want to choose the underflow and overflow lower bits in such a way that we // encourage wrong results when the upper bytes are removed. - const underflowMin = new Date("1969-01-01").getTime(); // Day before the 32 bit epoch + const underflowMin = new Date("1969-01-01").getTime(); // Year before the 32 bit epoch const normalMin = new Date("2002-01-01").getTime(); // Middle of the 32 bit epoch insertBucket(underflowMin); @@ -107,6 +134,25 @@ function checkAgainstReferenceBoundedSortUnexpected( collection, reference, pipeline, hint, sortOrder) { const options = hint ? {hint: hint} : {}; + const bucket = db['system.buckets.' + coll.getName()]; + + const plan = collection.explain().aggregate(pipeline, options); + if (FixtureHelpers.isSharded(buckets) && numShards() >= 2) { + // With a sharded collection, some shards might not have any extended-range data, + // so they might still use $_internalBoundedSort. But we know at least one + // shard has extended-range data, so we know at least one shard has to + // use a blocking sort. + const bounded = getAggPlanStages(plan, "$_internalBoundedSort"); + const blocking = getAggPlanStages(plan, "$sort"); + assert.gt(blocking.length, 0, {bounded, blocking, plan}); + assert.lt(bounded.length, + FixtureHelpers.numberOfShardsForCollection(buckets), + {bounded, blocking, plan}); + } else { + const stages = getAggPlanStages(plan, "$_internalBoundedSort"); + assert.eq([], stages, plan); + } + const opt = collection.aggregate(pipeline, options).toArray(); assertSorted(opt, sortOrder); @@ -114,16 +160,16 @@ function checkAgainstReferenceBoundedSortUnexpected( for (var i = 0; i < opt.length; ++i) { assert.docEq(reference[i], opt[i]); } - - const plan = collection.explain({}).aggregate(pipeline, options); - const stages = getAggPlanStages(plan, "$_internalBoundedSort"); - assert.eq([], stages, plan); } function checkAgainstReferenceBoundedSortExpected( collection, reference, pipeline, hint, sortOrder) { const options = hint ? {hint: hint} : {}; + const plan = collection.explain().aggregate(pipeline, options); + const stages = getAggPlanStages(plan, "$_internalBoundedSort"); + assert.neq([], stages, plan); + const opt = collection.aggregate(pipeline, options).toArray(); assertSorted(opt, sortOrder); @@ -131,10 +177,6 @@ function checkAgainstReferenceBoundedSortExpected( for (var i = 0; i < opt.length; ++i) { assert.docEq(reference[i], opt[i]); } - - const plan = collection.explain({}).aggregate(pipeline, options); - const stages = getAggPlanStages(plan, "$_internalBoundedSort"); - assert.neq([], stages, plan); } function runTest(ascending) { @@ -174,10 +216,10 @@ function runTest(ascending) { .toArray(); assertSorted(referenceIndexed, ascending); - // Check plan using index scan. If we've inserted a date before 1-1-1970, we round the min up - // towards 1970, rather then down, which has the effect of increasing the control.min.t. This - // means the minimum time in the bucket is likely to be lower than indicated and thus, actual - // dates may be out of order relative to what's indicated by the bucket bounds. + // Check plan using index scan. If we've inserted a date before 1-1-1970, we round the min + // up towards 1970, rather then down, which has the effect of increasing the control.min.t. + // This means the minimum time in the bucket is likely to be lower than indicated and thus, + // actual dates may be out of order relative to what's indicated by the bucket bounds. checkAgainstReferenceBoundedSortUnexpected(collIndexed, referenceIndexed, [ diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index 9172103b493..200dc47401f 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -914,6 +914,13 @@ Status runAggregate(OperationContext* opCtx, expCtx = makeExpressionContext( opCtx, request, std::move(*collatorToUse), uuid, collatorToUseMatchesDefault); + // If any involved collection contains extended-range data, set a flag which individual + // DocumentSource parsers can check. + collections.forEach([&](const CollectionPtr& coll) { + if (coll->getRequiresTimeseriesExtendedRangeSupport()) + expCtx->setRequiresTimeseriesExtendedRangeSupport(true); + }); + expCtx->startExpressionCounters(); auto pipeline = Pipeline::parse(request.getPipeline(), expCtx); expCtx->stopExpressionCounters(); diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index 49ec3811575..15831e48ae7 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -259,6 +259,11 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF // if that's the case, no field will be added to 'bucketSpec.fieldSet' in the for-loop below. BucketUnpacker::Behavior unpackerBehavior = BucketUnpacker::Behavior::kExclude; BucketSpec bucketSpec; + // Use extended-range support if any individual collection requires it, even if 'specElem' + // doesn't mention this flag. + if (expCtx->getRequiresTimeseriesExtendedRangeSupport()) { + bucketSpec.setUsesExtendedRange(true); + } auto hasIncludeExclude = false; auto hasTimeField = false; auto hasBucketMaxSpanSeconds = false; @@ -456,6 +461,14 @@ void DocumentSourceInternalUnpackBucket::serializeToArray( if (_assumeNoMixedSchemaData) out.addField(kAssumeNoMixedSchemaData, Value(_assumeNoMixedSchemaData)); + if (spec.usesExtendedRange()) { + // Include this flag so that 'explain' is more helpful. + // But this is not so useful for communicating from one process to another, + // because mongos and/or the primary shard don't know whether any other shard + // has extended-range data. + out.addField(kUsesExtendedRange, Value{true}); + } + if (!spec.computedMetaProjFields().empty()) out.addField("computedMetaProjFields", Value{[&] { std::vector<Value> compFields; diff --git a/src/mongo/db/pipeline/expression_context.h b/src/mongo/db/pipeline/expression_context.h index 3139762d5cf..f159b240721 100644 --- a/src/mongo/db/pipeline/expression_context.h +++ b/src/mongo/db/pipeline/expression_context.h @@ -489,6 +489,15 @@ public: // expression counting. bool enabledCounters = true; + // Sets or clears a flag which tells DocumentSource parsers whether any involved Collection + // may contain extended-range dates. + void setRequiresTimeseriesExtendedRangeSupport(bool v) { + _requiresTimeseriesExtendedRangeSupport = v; + } + bool getRequiresTimeseriesExtendedRangeSupport() const { + return _requiresTimeseriesExtendedRangeSupport; + } + protected: static const int kInterruptCheckPeriod = 128; @@ -513,6 +522,8 @@ protected: bool _isCappedDelete = false; + bool _requiresTimeseriesExtendedRangeSupport = false; + private: boost::optional<ExpressionCounters> _expressionCounters = boost::none; }; diff --git a/src/mongo/db/query/multiple_collection_accessor.h b/src/mongo/db/query/multiple_collection_accessor.h index 26fc081000e..9d302ce2b28 100644 --- a/src/mongo/db/query/multiple_collection_accessor.h +++ b/src/mongo/db/query/multiple_collection_accessor.h @@ -101,6 +101,17 @@ public: _secondaryColls.clear(); } + void forEach(std::function<void(const CollectionPtr&)> func) const { + if (hasMainCollection()) { + func(getMainCollection()); + } + for (const auto& [name, coll] : getSecondaryCollections()) { + if (coll) { + func(coll); + } + } + } + private: const CollectionPtr* _mainColl{&CollectionPtr::null}; diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp index a18297e493d..93ba45e4d74 100644 --- a/src/mongo/db/sorter/sorter.cpp +++ b/src/mongo/db/sorter/sorter.cpp @@ -1423,7 +1423,8 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::add(Key key, Value value invariant(!_done); // If a new value violates what we thought was our min bound, something has gone wrong. uassert(6369910, - "BoundedSorter input is too out-of-order.", + str::stream() << "BoundedSorter input is too out-of-order: with bound " + << _min->toString() << ", did not expect input " << key.toString(), !_checkInput || !_min || compare(*_min, key) <= 0); // Each new item can potentially give us a tighter bound (a higher min). |