From b731a58ff399ae1558bbaa0fb9b3da1d49d18313 Mon Sep 17 00:00:00 2001 From: Ian Boros Date: Tue, 5 Apr 2022 21:59:50 -0400 Subject: SERVER-65270 Fix bug related to queries on large documents not respecting the sort option (cherry picked from commit 5b5b505f0db2e145f40bde4e2ac2d5c56bc0b263) --- .../sort_big_documents_with_multi_planning.js | 46 ++++++++++++++++++++++ src/mongo/db/commands/find_cmd.cpp | 2 +- src/mongo/db/commands/getmore_cmd.cpp | 2 +- src/mongo/db/commands/list_collections.cpp | 2 +- src/mongo/db/commands/list_indexes.cpp | 2 +- src/mongo/db/commands/run_aggregate.cpp | 2 +- src/mongo/db/exec/multi_plan.cpp | 4 +- src/mongo/db/exec/sbe_cmd.cpp | 2 +- src/mongo/db/pipeline/plan_executor_pipeline.cpp | 4 +- src/mongo/db/pipeline/plan_executor_pipeline.h | 2 +- src/mongo/db/query/plan_executor.h | 10 ++--- src/mongo/db/query/plan_executor_impl.cpp | 6 +-- src/mongo/db/query/plan_executor_impl.h | 4 +- src/mongo/db/query/plan_executor_sbe.cpp | 6 +-- src/mongo/db/query/plan_executor_sbe.h | 4 +- src/mongo/db/query/plan_ranker.h | 2 +- src/mongo/db/query/sbe_runtime_planner.cpp | 2 +- .../db/s/migration_chunk_cloner_source_legacy.cpp | 2 +- 18 files changed, 75 insertions(+), 29 deletions(-) create mode 100644 jstests/noPassthroughWithMongod/sort_big_documents_with_multi_planning.js diff --git a/jstests/noPassthroughWithMongod/sort_big_documents_with_multi_planning.js b/jstests/noPassthroughWithMongod/sort_big_documents_with_multi_planning.js new file mode 100644 index 00000000000..a711654a736 --- /dev/null +++ b/jstests/noPassthroughWithMongod/sort_big_documents_with_multi_planning.js @@ -0,0 +1,46 @@ +// This test is designed to reproduce the test described in SERVER-65270, where a query +// which uses multi-planning and large documents does not respect the sort order. +(function() { +"use strict"; + +const coll = db.sort_big_documents_with_multi_planning; +coll.drop(); + +function makeDoc(i) { + return {_id: i, filterKey: 1, num: i, bytes: BinData(0, "A".repeat(13981014) + "==")}; +} + +for (let i = 0; i < 10; i++) { + assert.commandWorked(coll.insert(makeDoc(i))); +} + +// Two possible indexes can answer the query. +assert.commandWorked(coll.createIndex({filterKey: 1, num: 1, foo: 1})); +assert.commandWorked(coll.createIndex({filterKey: 1, num: 1})); + +const sortSpec = { + num: 1 +}; + +const kExpectedNums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + +{ + // We do a "client side projection," to avoid printing out the massive BinData string if + // there's an error. + const nums = []; + coll.find({filterKey: 1}).sort(sortSpec).forEach(doc => nums.push(doc.num)); + + // The results should be in order. + assert.eq(nums, kExpectedNums); +} + +// Same test, but with aggregation. +{ + const nums = []; + coll.aggregate([{$match: {filterKey: 1}}, {$sort: sortSpec}]) + .forEach(doc => nums.push(doc.num)); + + // The results should be in order. + assert.eq(nums, kExpectedNums); +} +})(); diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 732233dd194..fcc1052f232 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -619,7 +619,7 @@ public: // If we can't fit this result inside the current batch, then we stash it for // later. if (!FindCommon::haveSpaceForNext(obj, numResults, firstBatch.bytesUsed())) { - exec->enqueue(obj); + exec->stashResult(obj); stashedResult = true; break; } diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 58c37b80edf..9eea1df4a08 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -337,7 +337,7 @@ public: // If adding this object will cause us to exceed the message size limit, then we // stash it for later. if (!FindCommon::haveSpaceForNext(obj, *numResults, nextBatch->bytesUsed())) { - exec->enqueue(obj); + exec->stashResult(obj); break; } diff --git a/src/mongo/db/commands/list_collections.cpp b/src/mongo/db/commands/list_collections.cpp index 741e19ba454..074a5cd3223 100644 --- a/src/mongo/db/commands/list_collections.cpp +++ b/src/mongo/db/commands/list_collections.cpp @@ -514,7 +514,7 @@ public: // If we can't fit this result inside the current batch, then we stash it for // later. if (!FindCommon::haveSpaceForNext(nextDoc, objCount, bytesBuffered)) { - exec->enqueue(nextDoc); + exec->stashResult(nextDoc); break; } diff --git a/src/mongo/db/commands/list_indexes.cpp b/src/mongo/db/commands/list_indexes.cpp index 89fde689852..4198a8c8886 100644 --- a/src/mongo/db/commands/list_indexes.cpp +++ b/src/mongo/db/commands/list_indexes.cpp @@ -283,7 +283,7 @@ public: // If we can't fit this result inside the current batch, then we stash it for // later. if (!FindCommon::haveSpaceForNext(nextDoc, objCount, bytesBuffered)) { - exec->enqueue(nextDoc); + exec->stashResult(nextDoc); break; } diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index e20b3fb8087..c6b761fd4d0 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -238,7 +238,7 @@ bool handleCursorCommand(OperationContext* opCtx, // for later. if (!FindCommon::haveSpaceForNext(nextDoc, objCount, responseBuilder.bytesUsed())) { - exec->enqueue(nextDoc); + exec->stashResult(nextDoc); stashedResult = true; break; } diff --git a/src/mongo/db/exec/multi_plan.cpp b/src/mongo/db/exec/multi_plan.cpp index 96aaf4ca52d..f7ca2b1432b 100644 --- a/src/mongo/db/exec/multi_plan.cpp +++ b/src/mongo/db/exec/multi_plan.cpp @@ -111,7 +111,7 @@ PlanStage::StageState MultiPlanStage::doWork(WorkingSetID* out) { // Look for an already produced result that provides the data the caller wants. if (!bestPlan.results.empty()) { *out = bestPlan.results.front(); - bestPlan.results.pop(); + bestPlan.results.pop_front(); return PlanStage::ADVANCED; } @@ -265,7 +265,7 @@ bool MultiPlanStage::workAllPlans(size_t numResults, PlanYieldPolicy* yieldPolic // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we choose to // return the results from the 'candidate' plan. member->makeObjOwnedIfNeeded(); - candidate.results.push(id); + candidate.results.push_back(id); // Once a plan returns enough results, stop working. if (candidate.results.size() >= numResults) { diff --git a/src/mongo/db/exec/sbe_cmd.cpp b/src/mongo/db/exec/sbe_cmd.cpp index c76885bec0d..a2399cc3077 100644 --- a/src/mongo/db/exec/sbe_cmd.cpp +++ b/src/mongo/db/exec/sbe_cmd.cpp @@ -139,7 +139,7 @@ public: // If we can't fit this result inside the current batch, then we stash it for later. if (!FindCommon::haveSpaceForNext(next, objCount, firstBatch.len())) { - exec->enqueue(next); + exec->stashResult(next); break; } diff --git a/src/mongo/db/pipeline/plan_executor_pipeline.cpp b/src/mongo/db/pipeline/plan_executor_pipeline.cpp index 2b1d70b5681..9bc0089e151 100644 --- a/src/mongo/db/pipeline/plan_executor_pipeline.cpp +++ b/src/mongo/db/pipeline/plan_executor_pipeline.cpp @@ -92,8 +92,8 @@ PlanExecutor::ExecState PlanExecutorPipeline::getNextDocument(Document* docOut, invariant(!recordIdOut); invariant(docOut); - // Callers which use 'enqueue()' are not allowed to use 'getNextDocument()', and must instead - // use 'getNext()'. + // Callers which use 'stashResult()' are not allowed to use 'getNextDocument()', and must + // instead use 'getNext()'. invariant(_stash.empty()); if (auto next = _getNext()) { diff --git a/src/mongo/db/pipeline/plan_executor_pipeline.h b/src/mongo/db/pipeline/plan_executor_pipeline.h index 66313dd22c5..8a325792f8b 100644 --- a/src/mongo/db/pipeline/plan_executor_pipeline.h +++ b/src/mongo/db/pipeline/plan_executor_pipeline.h @@ -106,7 +106,7 @@ public: _pipeline->dispose(opCtx); } - void enqueue(const BSONObj& obj) override { + void stashResult(const BSONObj& obj) override { _stash.push(obj.getOwned()); } diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h index 1d8b70057b6..9c6f2c7be5a 100644 --- a/src/mongo/db/query/plan_executor.h +++ b/src/mongo/db/query/plan_executor.h @@ -308,16 +308,16 @@ public: virtual void dispose(OperationContext* opCtx) = 0; /** - * Stash the BSONObj so that it gets returned from the PlanExecutor on a later call to - * getNext(). Implementations should NOT support returning queued BSON objects using - * 'getNextDocument()'. Only 'getNext()' should return the queued BSON objects. + * Stash the BSONObj so that it gets returned from the PlanExecutor a subsequent call to + * getNext(). Implementations should NOT support returning stashed BSON objects using + * 'getNextDocument()'. Only 'getNext()' should return the stashed BSON objects. * - * Enqueued documents are returned in FIFO order. The queued results are exhausted before + * Enqueued documents are returned in LIFO order. The stashed results are exhausted before * generating further results from the underlying query plan. * * Subsequent calls to getNext() must request the BSONObj and *not* the RecordId. */ - virtual void enqueue(const BSONObj& obj) = 0; + virtual void stashResult(const BSONObj& obj) = 0; virtual bool isMarkedAsKilled() const = 0; virtual Status getKillStatus() = 0; diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp index 241b52ed58f..7492393ad6a 100644 --- a/src/mongo/db/query/plan_executor_impl.cpp +++ b/src/mongo/db/query/plan_executor_impl.cpp @@ -329,7 +329,7 @@ PlanExecutor::ExecState PlanExecutorImpl::_getNextImpl(Snapshotted* ob if (!_stash.empty()) { invariant(objOut && !dlOut); *objOut = {SnapshotId(), _stash.front()}; - _stash.pop(); + _stash.pop_front(); return PlanExecutor::ADVANCED; } @@ -571,8 +571,8 @@ long long PlanExecutorImpl::executeDelete() { } } -void PlanExecutorImpl::enqueue(const BSONObj& obj) { - _stash.push(Document{obj.getOwned()}); +void PlanExecutorImpl::stashResult(const BSONObj& obj) { + _stash.push_front(Document{obj.getOwned()}); } bool PlanExecutorImpl::isMarkedAsKilled() const { diff --git a/src/mongo/db/query/plan_executor_impl.h b/src/mongo/db/query/plan_executor_impl.h index c4642dd777e..87137cdbd4d 100644 --- a/src/mongo/db/query/plan_executor_impl.h +++ b/src/mongo/db/query/plan_executor_impl.h @@ -81,7 +81,7 @@ public: long long executeDelete() override; void markAsKilled(Status killStatus) final; void dispose(OperationContext* opCtx) final; - void enqueue(const BSONObj& obj) final; + void stashResult(const BSONObj& obj) final; bool isMarkedAsKilled() const final; Status getKillStatus() final; bool isDisposed() const final; @@ -170,7 +170,7 @@ private: // A stash of results generated by this plan that the user of the PlanExecutor didn't want // to consume yet. We empty the queue before retrieving further results from the plan // stages. - std::queue _stash; + std::deque _stash; // The output document that is used by getNext BSON API. This allows us to avoid constantly // allocating and freeing DocumentStorage. diff --git a/src/mongo/db/query/plan_executor_sbe.cpp b/src/mongo/db/query/plan_executor_sbe.cpp index e1dded76398..791c345dc7b 100644 --- a/src/mongo/db/query/plan_executor_sbe.cpp +++ b/src/mongo/db/query/plan_executor_sbe.cpp @@ -183,10 +183,10 @@ void PlanExecutorSBE::dispose(OperationContext* opCtx) { _isDisposed = true; } -void PlanExecutorSBE::enqueue(const BSONObj& obj) { +void PlanExecutorSBE::stashResult(const BSONObj& obj) { invariant(_state == State::kOpened); invariant(!_isDisposed); - _stash.push({obj.getOwned(), boost::none}); + _stash.push_front({obj.getOwned(), boost::none}); } PlanExecutor::ExecState PlanExecutorSBE::getNextDocument(Document* objOut, RecordId* dlOut) { @@ -213,7 +213,7 @@ PlanExecutor::ExecState PlanExecutorSBE::getNext(BSONObj* out, RecordId* dlOut) if (dlOut && recordId) { *dlOut = *recordId; } - _stash.pop(); + _stash.pop_front(); return PlanExecutor::ExecState::ADVANCED; } else if (_root->getCommonStats()->isEOF) { // If we had stashed elements and consumed them all, but the PlanStage has also diff --git a/src/mongo/db/query/plan_executor_sbe.h b/src/mongo/db/query/plan_executor_sbe.h index f20e399d36e..018d785f7bd 100644 --- a/src/mongo/db/query/plan_executor_sbe.h +++ b/src/mongo/db/query/plan_executor_sbe.h @@ -101,7 +101,7 @@ public: void dispose(OperationContext* opCtx); - void enqueue(const BSONObj& obj); + void stashResult(const BSONObj& obj); bool isMarkedAsKilled() const override { return !_killStatus.isOK(); @@ -163,7 +163,7 @@ private: boost::optional _resumeRecordIdSlot; - std::queue>> _stash; + std::deque>> _stash; // If we are returning owned result (i.e. value is moved out of the result accessor) then its // lifetime must extend up to the next getNext (or saveState). BSONObj _lastGetNext; diff --git a/src/mongo/db/query/plan_ranker.h b/src/mongo/db/query/plan_ranker.h index 4389b0b1fec..63d4b25d57d 100644 --- a/src/mongo/db/query/plan_ranker.h +++ b/src/mongo/db/query/plan_ranker.h @@ -190,7 +190,7 @@ struct BaseCandidatePlan { // non-OK status. Status status{Status::OK()}; // Any results produced during the plan's execution prior to scoring are retained here. - std::queue results; + std::deque results; }; using CandidatePlan = BaseCandidatePlan; diff --git a/src/mongo/db/query/sbe_runtime_planner.cpp b/src/mongo/db/query/sbe_runtime_planner.cpp index 09e70caf434..c2eddab1eeb 100644 --- a/src/mongo/db/query/sbe_runtime_planner.cpp +++ b/src/mongo/db/query/sbe_runtime_planner.cpp @@ -74,7 +74,7 @@ FetchDocStatus fetchNextDocument( invariant(state == PlanState::ADVANCED); invariant(obj.isOwned()); - candidate->results.push({obj, {recordIdSlot != nullptr, recordId}}); + candidate->results.push_back({obj, {recordIdSlot != nullptr, recordId}}); } catch (const ExceptionFor&) { return FetchDocStatus::exitedEarly; } catch (const ExceptionFor& ex) { diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp index 96077785587..c607e46946a 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp @@ -633,7 +633,7 @@ void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromIndexScan(OperationCon // that we take into consideration the overhead of BSONArray indices. if (arrBuilder->arrSize() && (arrBuilder->len() + obj.objsize() + 1024) > BSONObjMaxUserSize) { - _jumboChunkCloneState->clonerExec->enqueue(obj); + _jumboChunkCloneState->clonerExec->stashResult(obj); break; } -- cgit v1.2.1