summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2023-01-23 18:32:43 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-01-31 22:00:24 +0000
commite855f80487814a0dc008389c041e3d10e7e8dc17 (patch)
tree3f6339defcb05be266f3dd3837aa68f1e1e18006
parentcff019c9fe824ecfabbfdb35a1051c3f482210da (diff)
downloadmongo-e855f80487814a0dc008389c041e3d10e7e8dc17.tar.gz
SERVER-73110 Each shard should check extended-range for timeseries sorting
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js68
-rw-r--r--src/mongo/db/commands/run_aggregate.cpp7
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp13
-rw-r--r--src/mongo/db/pipeline/expression_context.h11
-rw-r--r--src/mongo/db/query/multiple_collection_accessor.h11
-rw-r--r--src/mongo/db/sorter/sorter.cpp3
7 files changed, 103 insertions, 14 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 4ce9d53b0d0..5cc3e03d326 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -246,6 +246,8 @@ last-continuous:
ticket: SERVER-68648
- test_file: jstests/sharding/timeseries_cluster_indexstats.js
ticket: SERVER-72620
+ - test_file: jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js
+ ticket: SERVER-73110
suites: null
last-lts:
all:
@@ -569,4 +571,6 @@ last-lts:
ticket: SERVER-68648
- test_file: jstests/sharding/timeseries_cluster_indexstats.js
ticket: SERVER-72620
+ - test_file: jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js
+ ticket: SERVER-73110
suites: null
diff --git a/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js b/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js
index 9f7e846e13c..06b40a81537 100644
--- a/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js
+++ b/jstests/core/timeseries/bucket_unpacking_with_sort_extended_range.js
@@ -47,6 +47,33 @@ assert.commandWorked(collIndexed.createIndex({'t': 1}));
jsTestLog(collIndexed.getIndexes());
jsTestLog(bucketsIndexed.getIndexes());
+function numShards() {
+ return db.getSiblingDB('config').shards.count();
+}
+for (const collection of [buckets, bucketsIndexed]) {
+ if (FixtureHelpers.isSharded(collection) && numShards() >= 2) {
+ // Split and move data to create an interesting scenario: we have some data on each shard,
+ // but all the extended-range data is on a non-primary shard. This means view resolution is
+ // unaware of the extended-range data, because that happens on the primary shard.
+
+ const shards = db.getSiblingDB('config').shards.find().toArray();
+ const [shardName0, shardName1] = shards.map(doc => doc._id);
+
+ assert.commandWorked(db.adminCommand({movePrimary: db.getName(), to: shardName0}));
+ const collName = collection.getFullName();
+ // Our example data has documents between 2000-2003, and these dates are non-wrapping.
+ // So this goes on the primary shard, and everything else goes on the non-primary.
+ assert.commandWorked(sh.splitAt(collName, {'control.min.t': ISODate('2000-01-01')}));
+ assert.commandWorked(sh.splitAt(collName, {'control.min.t': ISODate('2003-01-01')}));
+ assert.commandWorked(
+ sh.moveChunk(collName, {'control.min.t': ISODate('1969-01-01')}, shardName1));
+ assert.commandWorked(
+ sh.moveChunk(collName, {'control.min.t': ISODate('2000-01-01')}, shardName0));
+ assert.commandWorked(
+ sh.moveChunk(collName, {'control.min.t': ISODate('2003-01-01')}, shardName1));
+ }
+}
+
const intervalMillis = 60000;
function insertBucket(start) {
jsTestLog("Inserting bucket starting with " + Date(start).toString());
@@ -67,7 +94,7 @@ function insertBucket(start) {
function insertDocuments() {
// We want to choose the underflow and overflow lower bits in such a way that we
// encourage wrong results when the upper bytes are removed.
- const underflowMin = new Date("1969-01-01").getTime(); // Day before the 32 bit epoch
+ const underflowMin = new Date("1969-01-01").getTime(); // Year before the 32 bit epoch
const normalMin = new Date("2002-01-01").getTime(); // Middle of the 32 bit epoch
insertBucket(underflowMin);
@@ -107,6 +134,25 @@ function checkAgainstReferenceBoundedSortUnexpected(
collection, reference, pipeline, hint, sortOrder) {
const options = hint ? {hint: hint} : {};
+ const bucket = db['system.buckets.' + coll.getName()];
+
+ const plan = collection.explain().aggregate(pipeline, options);
+ if (FixtureHelpers.isSharded(buckets) && numShards() >= 2) {
+ // With a sharded collection, some shards might not have any extended-range data,
+ // so they might still use $_internalBoundedSort. But we know at least one
+ // shard has extended-range data, so we know at least one shard has to
+ // use a blocking sort.
+ const bounded = getAggPlanStages(plan, "$_internalBoundedSort");
+ const blocking = getAggPlanStages(plan, "$sort");
+ assert.gt(blocking.length, 0, {bounded, blocking, plan});
+ assert.lt(bounded.length,
+ FixtureHelpers.numberOfShardsForCollection(buckets),
+ {bounded, blocking, plan});
+ } else {
+ const stages = getAggPlanStages(plan, "$_internalBoundedSort");
+ assert.eq([], stages, plan);
+ }
+
const opt = collection.aggregate(pipeline, options).toArray();
assertSorted(opt, sortOrder);
@@ -114,16 +160,16 @@ function checkAgainstReferenceBoundedSortUnexpected(
for (var i = 0; i < opt.length; ++i) {
assert.docEq(reference[i], opt[i]);
}
-
- const plan = collection.explain({}).aggregate(pipeline, options);
- const stages = getAggPlanStages(plan, "$_internalBoundedSort");
- assert.eq([], stages, plan);
}
function checkAgainstReferenceBoundedSortExpected(
collection, reference, pipeline, hint, sortOrder) {
const options = hint ? {hint: hint} : {};
+ const plan = collection.explain().aggregate(pipeline, options);
+ const stages = getAggPlanStages(plan, "$_internalBoundedSort");
+ assert.neq([], stages, plan);
+
const opt = collection.aggregate(pipeline, options).toArray();
assertSorted(opt, sortOrder);
@@ -131,10 +177,6 @@ function checkAgainstReferenceBoundedSortExpected(
for (var i = 0; i < opt.length; ++i) {
assert.docEq(reference[i], opt[i]);
}
-
- const plan = collection.explain({}).aggregate(pipeline, options);
- const stages = getAggPlanStages(plan, "$_internalBoundedSort");
- assert.neq([], stages, plan);
}
function runTest(ascending) {
@@ -174,10 +216,10 @@ function runTest(ascending) {
.toArray();
assertSorted(referenceIndexed, ascending);
- // Check plan using index scan. If we've inserted a date before 1-1-1970, we round the min up
- // towards 1970, rather then down, which has the effect of increasing the control.min.t. This
- // means the minimum time in the bucket is likely to be lower than indicated and thus, actual
- // dates may be out of order relative to what's indicated by the bucket bounds.
+ // Check plan using index scan. If we've inserted a date before 1-1-1970, we round the min
+ // up towards 1970, rather then down, which has the effect of increasing the control.min.t.
+ // This means the minimum time in the bucket is likely to be lower than indicated and thus,
+ // actual dates may be out of order relative to what's indicated by the bucket bounds.
checkAgainstReferenceBoundedSortUnexpected(collIndexed,
referenceIndexed,
[
diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp
index 9172103b493..200dc47401f 100644
--- a/src/mongo/db/commands/run_aggregate.cpp
+++ b/src/mongo/db/commands/run_aggregate.cpp
@@ -914,6 +914,13 @@ Status runAggregate(OperationContext* opCtx,
expCtx = makeExpressionContext(
opCtx, request, std::move(*collatorToUse), uuid, collatorToUseMatchesDefault);
+ // If any involved collection contains extended-range data, set a flag which individual
+ // DocumentSource parsers can check.
+ collections.forEach([&](const CollectionPtr& coll) {
+ if (coll->getRequiresTimeseriesExtendedRangeSupport())
+ expCtx->setRequiresTimeseriesExtendedRangeSupport(true);
+ });
+
expCtx->startExpressionCounters();
auto pipeline = Pipeline::parse(request.getPipeline(), expCtx);
expCtx->stopExpressionCounters();
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index 49ec3811575..15831e48ae7 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -259,6 +259,11 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
// if that's the case, no field will be added to 'bucketSpec.fieldSet' in the for-loop below.
BucketUnpacker::Behavior unpackerBehavior = BucketUnpacker::Behavior::kExclude;
BucketSpec bucketSpec;
+ // Use extended-range support if any individual collection requires it, even if 'specElem'
+ // doesn't mention this flag.
+ if (expCtx->getRequiresTimeseriesExtendedRangeSupport()) {
+ bucketSpec.setUsesExtendedRange(true);
+ }
auto hasIncludeExclude = false;
auto hasTimeField = false;
auto hasBucketMaxSpanSeconds = false;
@@ -456,6 +461,14 @@ void DocumentSourceInternalUnpackBucket::serializeToArray(
if (_assumeNoMixedSchemaData)
out.addField(kAssumeNoMixedSchemaData, Value(_assumeNoMixedSchemaData));
+ if (spec.usesExtendedRange()) {
+ // Include this flag so that 'explain' is more helpful.
+ // But this is not so useful for communicating from one process to another,
+ // because mongos and/or the primary shard don't know whether any other shard
+ // has extended-range data.
+ out.addField(kUsesExtendedRange, Value{true});
+ }
+
if (!spec.computedMetaProjFields().empty())
out.addField("computedMetaProjFields", Value{[&] {
std::vector<Value> compFields;
diff --git a/src/mongo/db/pipeline/expression_context.h b/src/mongo/db/pipeline/expression_context.h
index 3139762d5cf..f159b240721 100644
--- a/src/mongo/db/pipeline/expression_context.h
+++ b/src/mongo/db/pipeline/expression_context.h
@@ -489,6 +489,15 @@ public:
// expression counting.
bool enabledCounters = true;
+ // Sets or clears a flag which tells DocumentSource parsers whether any involved Collection
+ // may contain extended-range dates.
+ void setRequiresTimeseriesExtendedRangeSupport(bool v) {
+ _requiresTimeseriesExtendedRangeSupport = v;
+ }
+ bool getRequiresTimeseriesExtendedRangeSupport() const {
+ return _requiresTimeseriesExtendedRangeSupport;
+ }
+
protected:
static const int kInterruptCheckPeriod = 128;
@@ -513,6 +522,8 @@ protected:
bool _isCappedDelete = false;
+ bool _requiresTimeseriesExtendedRangeSupport = false;
+
private:
boost::optional<ExpressionCounters> _expressionCounters = boost::none;
};
diff --git a/src/mongo/db/query/multiple_collection_accessor.h b/src/mongo/db/query/multiple_collection_accessor.h
index 26fc081000e..9d302ce2b28 100644
--- a/src/mongo/db/query/multiple_collection_accessor.h
+++ b/src/mongo/db/query/multiple_collection_accessor.h
@@ -101,6 +101,17 @@ public:
_secondaryColls.clear();
}
+ void forEach(std::function<void(const CollectionPtr&)> func) const {
+ if (hasMainCollection()) {
+ func(getMainCollection());
+ }
+ for (const auto& [name, coll] : getSecondaryCollections()) {
+ if (coll) {
+ func(coll);
+ }
+ }
+ }
+
private:
const CollectionPtr* _mainColl{&CollectionPtr::null};
diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp
index a18297e493d..93ba45e4d74 100644
--- a/src/mongo/db/sorter/sorter.cpp
+++ b/src/mongo/db/sorter/sorter.cpp
@@ -1423,7 +1423,8 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::add(Key key, Value value
invariant(!_done);
// If a new value violates what we thought was our min bound, something has gone wrong.
uassert(6369910,
- "BoundedSorter input is too out-of-order.",
+ str::stream() << "BoundedSorter input is too out-of-order: with bound "
+ << _min->toString() << ", did not expect input " << key.toString(),
!_checkInput || !_min || compare(*_min, key) <= 0);
// Each new item can potentially give us a tighter bound (a higher min).