diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2017-08-28 15:10:42 -0400 |
---|---|---|
committer | Charlie Swanson <charlie.swanson@mongodb.com> | 2017-09-01 15:36:35 -0400 |
commit | bc3e230523e4677e2f3fed64ea89c369182a9272 (patch) | |
tree | bb35904e784f224e6d5ab87b508c69c72f447dd3 /src/mongo/db | |
parent | 4e01e3582541fc00ec2e83c97cac89b59fbfeb34 (diff) | |
download | mongo-bc3e230523e4677e2f3fed64ea89c369182a9272.tar.gz |
SERVER-30704 Use ARM to merge agg cursors on mongos.
Diffstat (limited to 'src/mongo/db')
25 files changed, 438 insertions, 188 deletions
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp index bb6b7175ae5..a1fc3b88a19 100644 --- a/src/mongo/db/pipeline/dependencies.cpp +++ b/src/mongo/db/pipeline/dependencies.cpp @@ -41,34 +41,40 @@ using std::vector; namespace str = mongoutils::str; -BSONObj DepsTracker::toProjection() const { - if (fields.empty() && !needWholeDocument) { - if (_needTextScore) { - // We only need the text score, but there is no easy way to express this in the query - // projection language. We use $noFieldsNeeded with a textScore meta-projection since - // this is an inclusion projection which will exclude all existing fields but add the - // textScore metadata. - return BSON("_id" << 0 << "$noFieldsNeeded" << 1 << Document::metaFieldTextScore - << BSON("$meta" - << "textScore")); - } else { - // We truly need no information (we are doing a count or something similar). In this - // case, the DocumentSourceCursor will know there aren't any dependencies, and we can - // ignore the documents returned from the query system. We pass an empty object as the - // projection so that we have a chance of using the COUNT_SCAN optimization. - return BSONObj(); - } +bool DepsTracker::_appendMetaProjections(BSONObjBuilder* projectionBuilder) const { + if (_needTextScore) { + projectionBuilder->append(Document::metaFieldTextScore, + BSON("$meta" + << "textScore")); } + if (_needSortKey) { + projectionBuilder->append(Document::metaFieldSortKey, + BSON("$meta" + << "sortKey")); + } + return (_needTextScore || _needSortKey); +} +BSONObj DepsTracker::toProjection() const { BSONObjBuilder bb; - if (_needTextScore) - bb.append(Document::metaFieldTextScore, - BSON("$meta" - << "textScore")); + const bool needsMetadata = _appendMetaProjections(&bb); - if (needWholeDocument) + if (needWholeDocument) { return bb.obj(); + } + + if (fields.empty()) { + if (needsMetadata) { + // We only need metadata, but there is no easy way to express this in the query + // projection language. We use $noFieldsNeeded with a meta-projection since this is an + // inclusion projection which will exclude all existing fields but add the metadata. + bb.append("_id", 0); + bb.append("$noFieldsNeeded", 1); + } + // We either need nothing (as we would if this was logically a count), or only the metadata. + return bb.obj(); + } bool needId = false; string last; diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h index 9afd9adbaec..19cf3ee33f8 100644 --- a/src/mongo/db/pipeline/dependencies.h +++ b/src/mongo/db/pipeline/dependencies.h @@ -47,7 +47,7 @@ struct DepsTracker { enum MetadataAvailable { kNoMetadata = 0, kTextScore = 1 }; DepsTracker(MetadataAvailable metadataAvailable = kNoMetadata) - : needWholeDocument(false), _metadataAvailable(metadataAvailable), _needTextScore(false) {} + : _metadataAvailable(metadataAvailable) {} /** * Returns a projection object covering the dependencies tracked by this class. @@ -56,10 +56,6 @@ struct DepsTracker { boost::optional<ParsedDeps> toParsedDeps() const; - std::set<std::string> fields; // names of needed fields in dotted notation - bool needWholeDocument; // if true, ignore fields and assume the whole document is needed - - bool hasNoRequirements() const { return fields.empty() && !needWholeDocument && !_needTextScore; } @@ -85,9 +81,29 @@ struct DepsTracker { _needTextScore = needTextScore; } + bool getNeedSortKey() const { + return _needSortKey; + } + + void setNeedSortKey(bool needSortKey) { + // We don't expect to ever unset '_needSortKey'. + invariant(!_needSortKey || needSortKey); + _needSortKey = needSortKey; + } + + std::set<std::string> fields; // The names of needed fields in dotted notation. + bool needWholeDocument = false; // If true, ignore 'fields' and assume the whole document is + // needed. private: + /** + * Appends the meta projections for the sort key and/or text score to 'bb' if necessary. Returns + * true if either type of metadata was needed, and false otherwise. + */ + bool _appendMetaProjections(BSONObjBuilder* bb) const; + MetadataAvailable _metadataAvailable; - bool _needTextScore; + bool _needTextScore = false; // if true, add a {$meta: "textScore"} to the projection. + bool _needSortKey = false; // if true, add a {$meta: "sortKey"} to the projection. }; /** diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp index 5bfd008bbfe..cb33e1fcc9f 100644 --- a/src/mongo/db/pipeline/dependencies_test.cpp +++ b/src/mongo/db/pipeline/dependencies_test.cpp @@ -145,8 +145,9 @@ TEST(DependenciesToProjectionTest, ShouldAttemptToExcludeOtherFieldsIfOnlyTextSc deps.needWholeDocument = false; deps.setNeedTextScore(true); ASSERT_BSONOBJ_EQ(deps.toProjection(), - BSON("_id" << 0 << "$noFieldsNeeded" << 1 << Document::metaFieldTextScore - << metaTextScore)); + BSON(Document::metaFieldTextScore << metaTextScore << "_id" << 0 + << "$noFieldsNeeded" + << 1)); } TEST(DependenciesToProjectionTest, diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp index 835ae1bc716..4f531fe72f3 100644 --- a/src/mongo/db/pipeline/document.cpp +++ b/src/mongo/db/pipeline/document.cpp @@ -45,6 +45,9 @@ using std::vector; const DocumentStorage DocumentStorage::kEmptyDoc; +const std::vector<StringData> Document::allMetadataFieldNames = { + Document::metaFieldTextScore, Document::metaFieldRandVal, Document::metaFieldSortKey}; + Position DocumentStorage::findField(StringData requested) const { int reqSize = requested.size(); // get size calculation out of the way if needed @@ -201,6 +204,7 @@ intrusive_ptr<DocumentStorage> DocumentStorage::clone() const { out->_metaFields = _metaFields; out->_textScore = _textScore; out->_randVal = _randVal; + out->_sortKey = _sortKey.getOwned(); // Tell values that they have been memcpyed (updates ref counts) for (DocumentStorageIterator it = out->iteratorAll(); !it.atEnd(); it.advance()) { @@ -265,8 +269,9 @@ BSONObj Document::toBson() const { return bb.obj(); } -const StringData Document::metaFieldTextScore("$textScore"_sd); -const StringData Document::metaFieldRandVal("$randVal"_sd); +constexpr StringData Document::metaFieldTextScore; +constexpr StringData Document::metaFieldRandVal; +constexpr StringData Document::metaFieldSortKey; BSONObj Document::toBsonWithMetaData() const { BSONObjBuilder bb; @@ -275,6 +280,8 @@ BSONObj Document::toBsonWithMetaData() const { bb.append(metaFieldTextScore, getTextScore()); if (hasRandMetaField()) bb.append(metaFieldRandVal, getRandMetaField()); + if (hasSortKeyMetaField()) + bb.append(metaFieldSortKey, getSortKeyMetaField()); return bb.obj(); } @@ -292,6 +299,9 @@ Document Document::fromBsonWithMetaData(const BSONObj& bson) { } else if (fieldName == metaFieldRandVal) { md.setRandMetaField(elem.Double()); continue; + } else if (fieldName == metaFieldSortKey) { + md.setSortKeyMetaField(elem.Obj()); + continue; } } @@ -465,6 +475,10 @@ void Document::serializeForSorter(BufBuilder& buf) const { buf.appendNum(char(DocumentStorage::MetaType::RAND_VAL + 1)); buf.appendNum(getRandMetaField()); } + if (hasSortKeyMetaField()) { + buf.appendNum(char(DocumentStorage::MetaType::SORT_KEY + 1)); + getSortKeyMetaField().appendSelfToBufBuilder(buf); + } buf.appendNum(char(0)); } @@ -481,6 +495,9 @@ Document Document::deserializeForSorter(BufReader& buf, const SorterDeserializeS doc.setTextScore(buf.read<LittleEndian<double>>()); } else if (marker == char(DocumentStorage::MetaType::RAND_VAL) + 1) { doc.setRandMetaField(buf.read<LittleEndian<double>>()); + } else if (marker == char(DocumentStorage::MetaType::SORT_KEY) + 1) { + doc.setSortKeyMetaField( + BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings())); } else { uasserted(28744, "Unrecognized marker, unable to deserialize buffer"); } diff --git a/src/mongo/db/pipeline/document.h b/src/mongo/db/pipeline/document.h index 5e5980f5a51..a8ebfd1c4d2 100644 --- a/src/mongo/db/pipeline/document.h +++ b/src/mongo/db/pipeline/document.h @@ -90,6 +90,12 @@ public: const Document& rhs; }; + static constexpr StringData metaFieldTextScore = "$textScore"_sd; + static constexpr StringData metaFieldRandVal = "$randVal"_sd; + static constexpr StringData metaFieldSortKey = "$sortKey"_sd; + + static const std::vector<StringData> allMetadataFieldNames; + /// Empty Document (does no allocation) Document() {} @@ -211,6 +217,12 @@ public: */ static Document fromBsonWithMetaData(const BSONObj& bson); + /** + * Given a BSON object that may have metadata fields added as part of toBsonWithMetadata(), + * returns the same object without any of the metadata fields. + */ + static BSONObj stripMetadataFields(const BSONObj& bsonWithMetadata); + // Support BSONObjBuilder and BSONArrayBuilder "stream" API friend BSONObjBuilder& operator<<(BSONObjBuilderValueStream& builder, const Document& d); @@ -233,7 +245,6 @@ public: return Document(storage().clone().get()); } - static const StringData metaFieldTextScore; // "$textScore" bool hasTextScore() const { return storage().hasTextScore(); } @@ -241,7 +252,6 @@ public: return storage().getTextScore(); } - static const StringData metaFieldRandVal; // "$randVal" bool hasRandMetaField() const { return storage().hasRandMetaField(); } @@ -249,6 +259,13 @@ public: return storage().getRandMetaField(); } + bool hasSortKeyMetaField() const { + return storage().hasSortKeyMetaField(); + } + BSONObj getSortKeyMetaField() const { + return storage().getSortKeyMetaField(); + } + /// members for Sorter struct SorterDeserializeSettings {}; // unused void serializeForSorter(BufBuilder& buf) const; @@ -493,6 +510,10 @@ public: storage().setRandMetaField(val); } + void setSortKeyMetaField(BSONObj sortKey) { + storage().setSortKeyMetaField(sortKey); + } + /** Convert to a read-only document and release reference. * * Call this to indicate that you are done with this Document and will diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h index 2fd3c78cf34..baa68e60658 100644 --- a/src/mongo/db/pipeline/document_internal.h +++ b/src/mongo/db/pipeline/document_internal.h @@ -196,6 +196,7 @@ public: enum MetaType : char { TEXT_SCORE, RAND_VAL, + SORT_KEY, NUM_FIELDS }; @@ -280,6 +281,9 @@ public: if (source.hasRandMetaField()) { setRandMetaField(source.getRandMetaField()); } + if (source.hasSortKeyMetaField()) { + setSortKeyMetaField(source.getSortKeyMetaField()); + } } bool hasTextScore() const { @@ -304,6 +308,17 @@ public: _randVal = val; } + bool hasSortKeyMetaField() const { + return _metaFields.test(MetaType::SORT_KEY); + } + BSONObj getSortKeyMetaField() const { + return _sortKey; + } + void setSortKeyMetaField(BSONObj sortKey) { + _metaFields.set(MetaType::SORT_KEY); + _sortKey = sortKey.getOwned(); + } + private: /// Same as lastElement->next() or firstElement() if empty. const ValueElement* end() const { @@ -385,6 +400,7 @@ private: std::bitset<MetaType::NUM_FIELDS> _metaFields; double _textScore; double _randVal; + BSONObj _sortKey; // When adding a field, make sure to update clone() method // Defined in document.cpp diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index 134a8a86e3d..b0793113863 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -449,7 +449,7 @@ public: EXHAUSTIVE_FIELDS = 0x2, // Later stages won't need more metadata from input. For example, a $group stage will group - // documents together, discarding their text score. + // documents together, discarding their text score and sort keys. EXHAUSTIVE_META = 0x4, // Later stages won't need either fields or metadata. diff --git a/src/mongo/db/pipeline/document_source_limit.cpp b/src/mongo/db/pipeline/document_source_limit.cpp index 704ba907165..8256d055805 100644 --- a/src/mongo/db/pipeline/document_source_limit.cpp +++ b/src/mongo/db/pipeline/document_source_limit.cpp @@ -49,9 +49,7 @@ REGISTER_DOCUMENT_SOURCE(limit, LiteParsedDocumentSourceDefault::parse, DocumentSourceLimit::createFromBson); -const char* DocumentSourceLimit::getSourceName() const { - return "$limit"; -} +constexpr StringData DocumentSourceLimit::kStageName; Pipeline::SourceContainer::iterator DocumentSourceLimit::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { diff --git a/src/mongo/db/pipeline/document_source_limit.h b/src/mongo/db/pipeline/document_source_limit.h index f5f93fff1a4..7bf47638b21 100644 --- a/src/mongo/db/pipeline/document_source_limit.h +++ b/src/mongo/db/pipeline/document_source_limit.h @@ -34,9 +34,25 @@ namespace mongo { class DocumentSourceLimit final : public DocumentSource, public SplittableDocumentSource { public: - // virtuals from DocumentSource + static constexpr StringData kStageName = "$limit"_sd; + + /** + * Create a new $limit stage. + */ + static boost::intrusive_ptr<DocumentSourceLimit> create( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx, long long limit); + + /** + * Parse a $limit stage from a BSON stage specification. 'elem's field name must be "$limit". + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + GetNextResult getNext() final; - const char* getSourceName() const final; + const char* getSourceName() const final { + return kStageName.rawData(); + } + BSONObjSet getOutputSorts() final { return pSource ? pSource->getOutputSorts() : SimpleBSONObjComparator::kInstance.makeBSONObjSet(); @@ -60,15 +76,6 @@ public: } /** - Create a new limiting DocumentSource. - - @param pExpCtx the expression context for the pipeline - @returns the DocumentSource - */ - static boost::intrusive_ptr<DocumentSourceLimit> create( - const boost::intrusive_ptr<ExpressionContext>& pExpCtx, long long limit); - - /** * Returns the current DocumentSourceLimit for use in the shards pipeline. Running this stage on * the shards is an optimization, but is not strictly necessary in order to produce correct * pipeline output. @@ -93,20 +100,6 @@ public: _limit = newLimit; } - /** - Create a limiting DocumentSource from BSON. - - This is a convenience method that uses the above, and operates on - a BSONElement that has been deteremined to be an Object with an - element named $limit. - - @param pBsonElement the BSONELement that defines the limit - @param pExpCtx the expression context - @returns the grouping DocumentSource - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - private: DocumentSourceLimit(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, long long limit); diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index 5493d21ecd1..17f8cc34e83 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -471,10 +471,11 @@ BSONObj DocumentSourceMatch::getQuery() const { DocumentSource::GetDepsReturn DocumentSourceMatch::getDependencies(DepsTracker* deps) const { if (isTextQuery()) { - // A $text aggregation field should return EXHAUSTIVE_ALL, since we don't necessarily know - // what field it will be searching without examining indices. + // A $text aggregation field should return EXHAUSTIVE_FIELDS, since we don't necessarily + // know what field it will be searching without examining indices. deps->needWholeDocument = true; - return EXHAUSTIVE_ALL; + deps->setNeedTextScore(true); + return EXHAUSTIVE_FIELDS; } addDependencies(deps); @@ -493,7 +494,11 @@ void DocumentSourceMatch::addDependencies(DepsTracker* deps) const { DocumentSourceMatch::DocumentSourceMatch(const BSONObj& query, const intrusive_ptr<ExpressionContext>& pExpCtx) - : DocumentSource(pExpCtx), _predicate(query.getOwned()), _isTextQuery(isTextQuery(query)) { + : DocumentSource(pExpCtx), + _predicate(query.getOwned()), + _isTextQuery(isTextQuery(query)), + _dependencies(_isTextQuery ? DepsTracker::MetadataAvailable::kTextScore + : DepsTracker::MetadataAvailable::kNoMetadata) { StatusWithMatchExpression status = uassertStatusOK( MatchExpressionParser::parse(_predicate, pExpCtx->getCollator(), diff --git a/src/mongo/db/pipeline/document_source_match.h b/src/mongo/db/pipeline/document_source_match.h index 135dbc39e17..06219b22596 100644 --- a/src/mongo/db/pipeline/document_source_match.h +++ b/src/mongo/db/pipeline/document_source_match.h @@ -160,11 +160,11 @@ private: std::unique_ptr<MatchExpression> _expression; - // Cache the dependencies so that we know what fields we need to serialize to BSON for matching. - DepsTracker _dependencies; - BSONObj _predicate; const bool _isTextQuery; + + // Cache the dependencies so that we know what fields we need to serialize to BSON for matching. + DepsTracker _dependencies; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_match_test.cpp b/src/mongo/db/pipeline/document_source_match_test.cpp index c01ae31db9a..cbe9c522aeb 100644 --- a/src/mongo/db/pipeline/document_source_match_test.cpp +++ b/src/mongo/db/pipeline/document_source_match_test.cpp @@ -218,10 +218,10 @@ TEST_F(DocumentSourceMatchTest, ShouldAddDependenciesOfAllBranchesOfOrClause) { TEST_F(DocumentSourceMatchTest, TextSearchShouldRequireWholeDocumentAndTextScore) { auto match = DocumentSourceMatch::create(fromjson("{$text: {$search: 'hello'} }"), getExpCtx()); - DepsTracker dependencies; - ASSERT_EQUALS(DocumentSource::EXHAUSTIVE_ALL, match->getDependencies(&dependencies)); + DepsTracker dependencies(DepsTracker::MetadataAvailable::kTextScore); + ASSERT_EQUALS(DocumentSource::EXHAUSTIVE_FIELDS, match->getDependencies(&dependencies)); ASSERT_EQUALS(true, dependencies.needWholeDocument); - ASSERT_EQUALS(false, dependencies.getNeedTextScore()); + ASSERT_EQUALS(true, dependencies.getNeedTextScore()); } TEST_F(DocumentSourceMatchTest, ShouldOnlyAddOuterFieldAsDependencyOfImplicitEqualityPredicate) { diff --git a/src/mongo/db/pipeline/document_source_merge_cursors.cpp b/src/mongo/db/pipeline/document_source_merge_cursors.cpp index d53c2183443..23d413d6b60 100644 --- a/src/mongo/db/pipeline/document_source_merge_cursors.cpp +++ b/src/mongo/db/pipeline/document_source_merge_cursors.cpp @@ -39,6 +39,8 @@ using std::make_pair; using std::string; using std::vector; +constexpr StringData DocumentSourceMergeCursors::kStageName; + DocumentSourceMergeCursors::DocumentSourceMergeCursors( std::vector<CursorDescriptor> cursorDescriptors, const intrusive_ptr<ExpressionContext>& pExpCtx) @@ -48,10 +50,6 @@ REGISTER_DOCUMENT_SOURCE(mergeCursors, LiteParsedDocumentSourceDefault::parse, DocumentSourceMergeCursors::createFromBson); -const char* DocumentSourceMergeCursors::getSourceName() const { - return "$mergeCursors"; -} - intrusive_ptr<DocumentSource> DocumentSourceMergeCursors::create( std::vector<CursorDescriptor> cursorDescriptors, const intrusive_ptr<ExpressionContext>& pExpCtx) { @@ -96,7 +94,7 @@ Value DocumentSourceMergeCursors::serialize( << "id" << _cursorDescriptors[i].cursorId))); } - return Value(DOC(getSourceName() << Value(cursors))); + return Value(DOC(kStageName << Value(cursors))); } DocumentSourceMergeCursors::CursorAndConnection::CursorAndConnection( diff --git a/src/mongo/db/pipeline/document_source_merge_cursors.h b/src/mongo/db/pipeline/document_source_merge_cursors.h index e83316fa6ea..63521e2c742 100644 --- a/src/mongo/db/pipeline/document_source_merge_cursors.h +++ b/src/mongo/db/pipeline/document_source_merge_cursors.h @@ -37,6 +37,8 @@ namespace mongo { class DocumentSourceMergeCursors : public DocumentSource { public: + static constexpr StringData kStageName = "$mergeCursors"_sd; + struct CursorDescriptor { CursorDescriptor(ConnectionString connectionString, std::string ns, CursorId cursorId) : connectionString(std::move(connectionString)), @@ -48,14 +50,17 @@ public: CursorId cursorId; }; - // virtuals from DocumentSource GetNextResult getNext() final; - const char* getSourceName() const final; + + const char* getSourceName() const final { + return kStageName.rawData(); + } + Value serialize(boost::optional<ExplainOptions::Verbosity> explain = boost::none) const final; StageConstraints constraints() const final { StageConstraints constraints; - constraints.hostRequirement = HostTypeRequirement::kAnyShardOrMongoS; + constraints.hostRequirement = HostTypeRequirement::kAnyShard; constraints.requiredPosition = PositionRequirement::kFirst; constraints.requiresInputDocSource = false; constraints.isAllowedInsideFacetStage = false; diff --git a/src/mongo/db/pipeline/document_source_sample.cpp b/src/mongo/db/pipeline/document_source_sample.cpp index c174d95d935..09e3d431493 100644 --- a/src/mongo/db/pipeline/document_source_sample.cpp +++ b/src/mongo/db/pipeline/document_source_sample.cpp @@ -40,6 +40,8 @@ namespace mongo { using boost::intrusive_ptr; +constexpr StringData DocumentSourceSample::kStageName; + DocumentSourceSample::DocumentSourceSample(const intrusive_ptr<ExpressionContext>& pExpCtx) : DocumentSource(pExpCtx), _size(0) {} @@ -47,10 +49,6 @@ REGISTER_DOCUMENT_SOURCE(sample, LiteParsedDocumentSourceDefault::parse, DocumentSourceSample::createFromBson); -const char* DocumentSourceSample::getSourceName() const { - return "$sample"; -} - DocumentSource::GetNextResult DocumentSourceSample::getNext() { if (_size == 0) return GetNextResult::makeEOF(); @@ -84,7 +82,7 @@ DocumentSource::GetNextResult DocumentSourceSample::getNext() { } Value DocumentSourceSample::serialize(boost::optional<ExplainOptions::Verbosity> explain) const { - return Value(DOC(getSourceName() << DOC("size" << _size))); + return Value(DOC(kStageName << DOC("size" << _size))); } namespace { diff --git a/src/mongo/db/pipeline/document_source_sample.h b/src/mongo/db/pipeline/document_source_sample.h index 662c6a9a49d..07a85c77e33 100644 --- a/src/mongo/db/pipeline/document_source_sample.h +++ b/src/mongo/db/pipeline/document_source_sample.h @@ -35,8 +35,12 @@ namespace mongo { class DocumentSourceSample final : public DocumentSource, public SplittableDocumentSource { public: + static constexpr StringData kStageName = "$sample"_sd; + GetNextResult getNext() final; - const char* getSourceName() const final; + const char* getSourceName() const final { + return kStageName.rawData(); + } Value serialize(boost::optional<ExplainOptions::Verbosity> explain = boost::none) const final; StageConstraints constraints() const final { diff --git a/src/mongo/db/pipeline/document_source_sample_from_random_cursor.cpp b/src/mongo/db/pipeline/document_source_sample_from_random_cursor.cpp index ff6c4e6ec16..b429dbba6bf 100644 --- a/src/mongo/db/pipeline/document_source_sample_from_random_cursor.cpp +++ b/src/mongo/db/pipeline/document_source_sample_from_random_cursor.cpp @@ -92,6 +92,11 @@ DocumentSource::GetNextResult DocumentSourceSampleFromRandomCursor::getNext() { MutableDocument md(nextResult.releaseDocument()); md.setRandMetaField(_randMetaFieldVal); + if (pExpCtx->needsMerge) { + // This stage will be merged by sorting results according to this random metadata field, but + // the merging logic expects to sort by the sort key metadata. + md.setSortKeyMetaField(BSON("" << _randMetaFieldVal)); + } return md.freeze(); } diff --git a/src/mongo/db/pipeline/document_source_skip.cpp b/src/mongo/db/pipeline/document_source_skip.cpp index 61125a879f4..cf1b99e6e93 100644 --- a/src/mongo/db/pipeline/document_source_skip.cpp +++ b/src/mongo/db/pipeline/document_source_skip.cpp @@ -50,9 +50,7 @@ REGISTER_DOCUMENT_SOURCE(skip, LiteParsedDocumentSourceDefault::parse, DocumentSourceSkip::createFromBson); -const char* DocumentSourceSkip::getSourceName() const { - return "$skip"; -} +constexpr StringData DocumentSourceSkip::kStageName; DocumentSource::GetNextResult DocumentSourceSkip::getNext() { pExpCtx->checkForInterrupt(); diff --git a/src/mongo/db/pipeline/document_source_skip.h b/src/mongo/db/pipeline/document_source_skip.h index 8fc27a59f87..fc87d7e1eaa 100644 --- a/src/mongo/db/pipeline/document_source_skip.h +++ b/src/mongo/db/pipeline/document_source_skip.h @@ -34,9 +34,27 @@ namespace mongo { class DocumentSourceSkip final : public DocumentSource, public SplittableDocumentSource { public: - // virtuals from DocumentSource + static constexpr StringData kStageName = "$skip"_sd; + + /** + * Convenience method for creating a $skip stage. + */ + static boost::intrusive_ptr<DocumentSourceSkip> create( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx, long long nToSkip); + + /** + * Parses the user-supplied BSON into a $skip stage. + * + * Throws a AssertionException if 'elem' is an invalid $skip specification. + */ + static boost::intrusive_ptr<DocumentSource> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + GetNextResult getNext() final; - const char* getSourceName() const final; + + const char* getSourceName() const final { + return kStageName.rawData(); + } StageConstraints constraints() const final { StageConstraints constraints; @@ -77,20 +95,6 @@ public: _nToSkip = newSkip; } - /** - * Convenience method for creating a $skip stage. - */ - static boost::intrusive_ptr<DocumentSourceSkip> create( - const boost::intrusive_ptr<ExpressionContext>& pExpCtx, long long nToSkip); - - /** - * Parses the user-supplied BSON into a $skip stage. - * - * Throws a AssertionException if 'elem' is an invalid $skip specification. - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - private: explicit DocumentSourceSkip(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, long long nToSkip); diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index 4d78d08ccaa..fd113a9c386 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -47,6 +47,54 @@ using std::make_pair; using std::string; using std::vector; +namespace { +Value missingToNull(Value maybeMissing) { + return maybeMissing.missing() ? Value(BSONNULL) : maybeMissing; +} + +/** + * Converts a Value representing an in-memory sort key to a BSONObj representing a serialized sort + * key. If 'sortPatternSize' is 1, returns a BSON object with 'value' as it's only value - and an + * empty field name. Otherwise asserts that 'value' is an array of length 'sortPatternSize', and + * returns a BSONObj with one field for each value in the array, each field using the empty field + * name. + */ +BSONObj serializeSortKey(size_t sortPatternSize, Value value) { + // Missing values don't serialize correctly in this format, so use nulls instead, since they are + // considered equivalent with woCompare(). + if (sortPatternSize == 1) { + return BSON("" << missingToNull(value)); + } + invariant(value.isArray()); + invariant(value.getArrayLength() == sortPatternSize); + BSONObjBuilder bb; + for (auto&& val : value.getArray()) { + bb << "" << missingToNull(val); + } + return bb.obj(); +} + +/** + * Converts a BSONObj representing a serialized sort key into a Value, which we use for in-memory + * comparisons. BSONObj {'': 1, '': [2, 3]} becomes Value [1, [2, 3]]. + */ +Value deserializeSortKey(size_t sortPatternSize, BSONObj bsonSortKey) { + vector<Value> keys; + keys.reserve(sortPatternSize); + for (auto&& elt : bsonSortKey) { + keys.push_back(Value{elt}); + } + invariant(keys.size() == sortPatternSize); + if (sortPatternSize == 1) { + // As a special case for a sort on a single field, we do not put the keys into an array. + return keys[0]; + } + return Value{std::move(keys)}; +} + +} // namespace +constexpr StringData DocumentSourceSort::kStageName; + DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext>& pExpCtx) : DocumentSource(pExpCtx), _mergingPresorted(false) {} @@ -54,10 +102,6 @@ REGISTER_DOCUMENT_SOURCE(sort, LiteParsedDocumentSourceDefault::parse, DocumentSourceSort::createFromBson); -const char* DocumentSourceSort::getSourceName() const { - return "$sort"; -} - DocumentSource::GetNextResult DocumentSourceSort::getNext() { pExpCtx->checkForInterrupt(); @@ -83,17 +127,18 @@ DocumentSource::GetNextResult DocumentSourceSort::getNext() { void DocumentSourceSort::serializeToArray( std::vector<Value>& array, boost::optional<ExplainOptions::Verbosity> explain) const { if (explain) { // always one Value for combined $sort + $limit - array.push_back(Value( - DOC(getSourceName() << DOC( - "sortKey" << serializeSortKey(static_cast<bool>(explain)) << "mergePresorted" - << (_mergingPresorted ? Value(true) : Value()) - << "limit" - << (limitSrc ? Value(limitSrc->getLimit()) : Value()))))); + array.push_back(Value(DOC( + kStageName << DOC("sortKey" << sortKeyPattern(SortKeySerialization::kForExplain) + << "mergePresorted" + << (_mergingPresorted ? Value(true) : Value()) + << "limit" + << (limitSrc ? Value(limitSrc->getLimit()) : Value()))))); } else { // one Value for $sort and maybe a Value for $limit - MutableDocument inner(serializeSortKey(static_cast<bool>(explain))); - if (_mergingPresorted) + MutableDocument inner(sortKeyPattern(SortKeySerialization::kForPipelineSerialization)); + if (_mergingPresorted) { inner["$mergePresorted"] = Value(true); - array.push_back(Value(DOC(getSourceName() << inner.freeze()))); + } + array.push_back(Value(DOC(kStageName << inner.freeze()))); if (limitSrc) { limitSrc->serializeToArray(array); @@ -109,7 +154,7 @@ long long DocumentSourceSort::getLimit() const { return limitSrc ? limitSrc->getLimit() : -1; } -Document DocumentSourceSort::serializeSortKey(bool explain) const { +Document DocumentSourceSort::sortKeyPattern(SortKeySerialization serializationMode) const { MutableDocument keyObj; const size_t n = _sortPattern.size(); for (size_t i = 0; i < n; ++i) { @@ -118,9 +163,22 @@ Document DocumentSourceSort::serializeSortKey(bool explain) const { keyObj.setField(_sortPattern[i].fieldPath->fullPath(), Value(_sortPattern[i].isAscending ? 1 : -1)); } else { - // For sorts that are not simply on a field path, use a made-up field name. - keyObj[string(str::stream() << "$computed" << i)] = - _sortPattern[i].expression->serialize(explain); + // Sorting by an expression, use a made up field name. + auto computedFieldName = string(str::stream() << "$computed" << i); + switch (serializationMode) { + case SortKeySerialization::kForExplain: + case SortKeySerialization::kForPipelineSerialization: { + const bool isExplain = (serializationMode == SortKeySerialization::kForExplain); + keyObj[computedFieldName] = _sortPattern[i].expression->serialize(isExplain); + break; + } + case SortKeySerialization::kForSortKeyMerging: { + // We need to be able to tell which direction the sort is. Expression sorts are + // always descending. + keyObj[computedFieldName] = Value(-1); + break; + } + } } } return keyObj.freeze(); @@ -149,6 +207,10 @@ DocumentSource::GetDepsReturn DocumentSourceSort::getDependencies(DepsTracker* d deps->fields.insert(keyPart.fieldPath->fullPath()); } } + if (pExpCtx->needsMerge) { + // Include the sort key if we will merge several sorted streams later. + deps->setNeedSortKey(true); + } return SEE_NEXT; } @@ -220,9 +282,11 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( uassert(15976, "$sort stage must have at least one sort key", !pSort->_sortPattern.empty()); - const bool isExplain = false; - pSort->_sortKeyGen = - SortKeyGenerator{pSort->serializeSortKey(isExplain).toBson(), pExpCtx->getCollator()}; + pSort->_sortKeyGen = SortKeyGenerator{ + // The SortKeyGenerator expects the expressions to be serialized in order to detect a sort + // by a metadata field. + pSort->sortKeyPattern(SortKeySerialization::kForPipelineSerialization).toBson(), + pExpCtx->getCollator()}; if (limit > 0) { pSort->setLimitSrc(DocumentSourceLimit::create(pExpCtx, limit)); @@ -269,12 +333,19 @@ DocumentSource::GetNextResult DocumentSourceSort::populate() { } } -void DocumentSourceSort::loadDocument(const Document& doc) { +void DocumentSourceSort::loadDocument(Document&& doc) { invariant(!_populated); if (!_sorter) { _sorter.reset(MySorter::make(makeSortOptions(), Comparator(*this))); } - _sorter->add(extractKey(doc), doc); + + Value sortKey; + Document docForSorter; + // We always need to extract the sort key if we've reached this point. If the query system had + // already computed the sort key we'd have split the pipeline there, would be merging presorted + // documents, and wouldn't use this method. + std::tie(sortKey, docForSorter) = extractSortKey(std::move(doc)); + _sorter->add(sortKey, docForSorter); } void DocumentSourceSort::loadingDone() { @@ -295,8 +366,18 @@ public: return _cursor->more(); } Data next() { - const Document doc = DocumentSourceMergeCursors::nextSafeFrom(_cursor); - return make_pair(_sorter->extractKey(doc), doc); + auto doc = DocumentSourceMergeCursors::nextSafeFrom(_cursor); + if (doc.hasSortKeyMetaField()) { + // We set the sort key metadata field during the first half of the sort, so just use + // that as the sort key here. + return make_pair( + deserializeSortKey(_sorter->_sortPattern.size(), doc.getSortKeyMetaField()), doc); + } else { + // It's possible this result is coming from a shard that is still on an old version. If + // that's the case, it won't tell us it's sort key - we'll have to re-compute it + // ourselves. + return _sorter->extractSortKey(std::move(doc)); + } } private: @@ -344,7 +425,7 @@ StatusWith<Value> DocumentSourceSort::extractKeyFast(const Document& doc) const return Value{std::move(keys)}; } -Value DocumentSourceSort::extractKeyWithArray(const Document& doc) const { +BSONObj DocumentSourceSort::extractKeyWithArray(const Document& doc) const { SortKeyGenerator::Metadata metadata; if (doc.hasTextScore()) { metadata.textScore = doc.getTextScore(); @@ -356,26 +437,40 @@ Value DocumentSourceSort::extractKeyWithArray(const Document& doc) const { // Convert the Document to a BSONObj, but only do the conversion for the paths we actually need. // Then run the result through the SortKeyGenerator to obtain the final sort key. auto bsonDoc = document_path_support::documentToBsonWithPaths(doc, _paths); - auto bsonKey = uassertStatusOK(_sortKeyGen->getSortKey(std::move(bsonDoc), &metadata)); - - // Convert BSON sort key, which is an object with empty field names, into the corresponding - // array of keys representation as a Value. BSONObj {'': 1, '': [2, 3]} becomes Value [1, [2, - // 3]]. - vector<Value> keys; - keys.reserve(_sortPattern.size()); - for (auto&& elt : bsonKey) { - keys.push_back(Value{elt}); - } - return Value{std::move(keys)}; + return uassertStatusOK(_sortKeyGen->getSortKey(std::move(bsonDoc), &metadata)); } -Value DocumentSourceSort::extractKey(const Document& doc) const { - auto key = extractKeyFast(doc); - if (key.isOK()) { - return key.getValue(); +std::pair<Value, Document> DocumentSourceSort::extractSortKey(Document&& doc) const { + boost::optional<BSONObj> serializedSortKey; // Only populated if we need to merge with other + // sorted results later. Serialized in the standard + // BSON sort key format with empty field names, + // e.g. {'': 1, '': [2, 3]}. + + Value inMemorySortKey; // The Value we will use for comparisons within the sorter. + + auto fastKey = extractKeyFast(doc); + if (fastKey.isOK()) { + inMemorySortKey = std::move(fastKey.getValue()); + if (pExpCtx->needsMerge) { + serializedSortKey = serializeSortKey(_sortPattern.size(), inMemorySortKey); + } + } else { + // We have to do it the slow way - through the sort key generator. This will generate a BSON + // sort key, which is an object with empty field names. We then need to convert this BSON + // representation into the corresponding array of keys as a Value. BSONObj {'': 1, '': [2, + // 3]} becomes Value [1, [2, 3]]. + serializedSortKey = extractKeyWithArray(doc); + inMemorySortKey = deserializeSortKey(_sortPattern.size(), *serializedSortKey); } - return extractKeyWithArray(doc); + MutableDocument toBeSorted(std::move(doc)); + if (pExpCtx->needsMerge) { + // We need to be merged, so will have to be serialized. Save the sort key here to avoid + // re-computing it during the merge. + invariant(serializedSortKey); + toBeSorted.setSortKeyMetaField(*serializedSortKey); + } + return {inMemorySortKey, toBeSorted.freeze()}; } int DocumentSourceSort::compare(const Value& lhs, const Value& rhs) const { diff --git a/src/mongo/db/pipeline/document_source_sort.h b/src/mongo/db/pipeline/document_source_sort.h index 5731051cbd3..2494400eaae 100644 --- a/src/mongo/db/pipeline/document_source_sort.h +++ b/src/mongo/db/pipeline/document_source_sort.h @@ -39,10 +39,20 @@ namespace mongo { class DocumentSourceSort final : public DocumentSource, public SplittableDocumentSource { public: static const uint64_t kMaxMemoryUsageBytes = 100 * 1024 * 1024; + static constexpr StringData kStageName = "$sort"_sd; + + enum class SortKeySerialization { + kForExplain, + kForPipelineSerialization, + kForSortKeyMerging, + }; - // virtuals from DocumentSource GetNextResult getNext() final; - const char* getSourceName() const final; + + const char* getSourceName() const final { + return kStageName.rawData(); + } + void serializeToArray( std::vector<Value>& array, boost::optional<ExplainOptions::Verbosity> explain = boost::none) const final; @@ -73,8 +83,10 @@ public: boost::intrusive_ptr<DocumentSource> getShardSource() final; boost::intrusive_ptr<DocumentSource> getMergeSource() final; - /// Write out a Document whose contents are the sort key. - Document serializeSortKey(bool explain) const; + /** + * Write out a Document whose contents are the sort key pattern. + */ + Document sortKeyPattern(SortKeySerialization) const; /** * Parses a $sort stage from the user-supplied BSON. @@ -101,7 +113,7 @@ public: * coming from another DocumentSource. Once all documents have been added, the caller must call * loadingDone() before using getNext() to receive the documents in sorted order. */ - void loadDocument(const Document& doc); + void loadDocument(Document&& doc); /** * Signals to the sort stage that there will be no more input documents. It is an error to call @@ -179,11 +191,15 @@ private: SortOptions makeSortOptions() const; /** - * Returns the sort key for 'doc' based on the SortPattern. Attempts to generate the key using a - * fast path that does not handle arrays. If an array is encountered, falls back on - * extractKeyWithArray(). + * Returns the sort key for 'doc', as well as the document that should be entered into the + * sorter to eventually be returned. If we will need to later merge the sorted results with + * other results, this method adds the sort key as metadata onto 'doc' to speed up the merge + * later. + * + * Attempts to generate the key using a fast path that does not handle arrays. If an array is + * encountered, falls back on extractKeyWithArray(). */ - Value extractKey(const Document& doc) const; + std::pair<Value, Document> extractSortKey(Document&& doc) const; /** * Returns the sort key for 'doc' based on the SortPattern, or ErrorCodes::InternalError if an @@ -198,9 +214,10 @@ private: StatusWith<Value> extractKeyPart(const Document& doc, const SortPatternPart& keyPart) const; /** - * Returns the sort key for 'doc' based on the SortPattern. + * Returns the sort key for 'doc' based on the SortPattern. Note this is in the BSONObj format - + * with empty field names. */ - Value extractKeyWithArray(const Document& doc) const; + BSONObj extractKeyWithArray(const Document& doc) const; int compare(const Value& lhs, const Value& rhs) const; diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index 89975fc4cf2..c5edf02f50d 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -374,10 +374,10 @@ void Pipeline::Optimizations::Sharded::moveFinalUnwindFromShardsToMerger(Pipelin void Pipeline::Optimizations::Sharded::limitFieldsSentFromShardsToMerger(Pipeline* shardPipe, Pipeline* mergePipe) { - DepsTracker mergeDeps( - mergePipe->getDependencies(DocumentSourceMatch::isTextQuery(shardPipe->getInitialQuery()) - ? DepsTracker::MetadataAvailable::kTextScore - : DepsTracker::MetadataAvailable::kNoMetadata)); + auto depsMetadata = DocumentSourceMatch::isTextQuery(shardPipe->getInitialQuery()) + ? DepsTracker::MetadataAvailable::kTextScore + : DepsTracker::MetadataAvailable::kNoMetadata; + DepsTracker mergeDeps(mergePipe->getDependencies(depsMetadata)); if (mergeDeps.needWholeDocument) return; // the merge needs all fields, so nothing we can do. @@ -399,7 +399,7 @@ void Pipeline::Optimizations::Sharded::limitFieldsSentFromShardsToMerger(Pipelin // 2) Optimization IS NOT applied immediately following a $project or $group since it would // add an unnecessary project (and therefore a deep-copy). for (auto&& source : shardPipe->_sources) { - DepsTracker dt; + DepsTracker dt(depsMetadata); if (source->getDependencies(&dt) & DocumentSource::EXHAUSTIVE_FIELDS) return; } @@ -528,6 +528,9 @@ DepsTracker Pipeline::getDependencies(DepsTracker::MetadataAvailable metadataAva if (localDeps.getNeedTextScore()) deps.setNeedTextScore(true); + if (localDeps.getNeedSortKey()) + deps.setNeedSortKey(true); + knowAllMeta = status & DocumentSource::EXHAUSTIVE_META; } @@ -552,4 +555,13 @@ DepsTracker Pipeline::getDependencies(DepsTracker::MetadataAvailable metadataAva return deps; } +boost::intrusive_ptr<DocumentSource> Pipeline::popFrontStageWithName(StringData targetStageName) { + if (_sources.empty() || _sources.front()->getSourceName() != targetStageName) { + return nullptr; + } + auto targetStage = _sources.front(); + _sources.pop_front(); + stitch(); + return targetStage; +} } // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h index 35b18954fdd..7300b8e542e 100644 --- a/src/mongo/db/pipeline/pipeline.h +++ b/src/mongo/db/pipeline/pipeline.h @@ -264,6 +264,12 @@ public: } /** + * Removes and returns the first stage of the pipeline if its name is 'targetStageName'. Returns + * nullptr if there is no first stage, or if the stage's name is not 'targetStageName'. + */ + boost::intrusive_ptr<DocumentSource> popFrontStageWithName(StringData targetStageName); + + /** * PipelineD is a "sister" class that has additional functionality for the Pipeline. It exists * because of linkage requirements. Pipeline needs to function in mongod and mongos. PipelineD * contains extra functionality required in mongod, and which can't appear in mongos because the diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 42f9ea6615c..7f6a7c5fc49 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -448,6 +448,13 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe return getExecutorFind( opCtx, collection, nss, std::move(cq.getValue()), PlanExecutor::YIELD_AUTO, plannerOpts); } + +BSONObj removeSortKeyMetaProjection(BSONObj projectionObj) { + if (!projectionObj[Document::metaFieldSortKey]) { + return projectionObj; + } + return projectionObj.removeField(Document::metaFieldSortKey); +} } // namespace void PipelineD::prepareCursorSource(Collection* collection, @@ -526,20 +533,18 @@ void PipelineD::prepareCursorSource(Collection* collection, BSONObj projForQuery = deps.toProjection(); - /* - Look for an initial sort; we'll try to add this to the - Cursor we create. If we're successful in doing that (further down), - we'll remove the $sort from the pipeline, because the documents - will already come sorted in the specified order as a result of the - index scan. - */ + // Look for an initial sort; we'll try to add this to the Cursor we create. If we're successful + // in doing that, we'll remove the $sort from the pipeline, because the documents will already + // come sorted in the specified order as a result of the index scan. intrusive_ptr<DocumentSourceSort> sortStage; BSONObj sortObj; if (!sources.empty()) { sortStage = dynamic_cast<DocumentSourceSort*>(sources.front().get()); if (sortStage) { - // build the sort key - sortObj = sortStage->serializeSortKey(/*explain*/ false).toBson(); + sortObj = sortStage + ->sortKeyPattern( + DocumentSourceSort::SortKeySerialization::kForPipelineSerialization) + .toBson(); } } @@ -611,26 +616,30 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep plannerOpts |= QueryPlannerParams::IS_COUNT; } - // The only way to get a text score is to let the query system handle the projection. In all - // other cases, unless the query system can do an index-covered projection and avoid going to - // the raw record at all, it is faster to have ParsedDeps filter the fields we need. - if (!deps.getNeedTextScore()) { + // The only way to get a text score or the sort key is to let the query system handle the + // projection. In all other cases, unless the query system can do an index-covered projection + // and avoid going to the raw record at all, it is faster to have ParsedDeps filter the fields + // we need. + if (!deps.getNeedTextScore() && !deps.getNeedSortKey()) { plannerOpts |= QueryPlannerParams::NO_UNCOVERED_PROJECTIONS; } - BSONObj emptyProjection; + const BSONObj emptyProjection; + const BSONObj metaSortProjection = BSON("$meta" + << "sortKey"); if (sortStage) { // See if the query system can provide a non-blocking sort. - auto swExecutorSort = attemptToGetExecutor(opCtx, - collection, - nss, - expCtx, - oplogReplay, - queryObj, - emptyProjection, - *sortObj, - aggRequest, - plannerOpts); + auto swExecutorSort = + attemptToGetExecutor(opCtx, + collection, + nss, + expCtx, + oplogReplay, + queryObj, + expCtx->needsMerge ? metaSortProjection : emptyProjection, + *sortObj, + aggRequest, + plannerOpts); if (swExecutorSort.isOK()) { // Success! Now see if the query system can also cover the projection. @@ -682,6 +691,14 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep // Either there was no $sort stage, or the query system could not provide a non-blocking // sort. dassert(sortObj->isEmpty()); + *projectionObj = removeSortKeyMetaProjection(*projectionObj); + if (deps.getNeedSortKey() && !deps.getNeedTextScore()) { + // A sort key requirement would have prevented us from being able to add this parameter + // before, but now we know the query system won't cover the sort, so we will be able to + // compute the sort key ourselves during the $sort stage, and thus don't need a query + // projection to do so. + plannerOpts |= QueryPlannerParams::NO_UNCOVERED_PROJECTIONS; + } // See if the query system can cover the projection. auto swExecutorProj = attemptToGetExecutor(opCtx, diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 5d5909d4aa5..56e9041dd23 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -107,7 +107,25 @@ string optionString(size_t options) { ss << "INDEX_INTERSECTION "; } if (options & QueryPlannerParams::KEEP_MUTATIONS) { - ss << "KEEP_MUTATIONS"; + ss << "KEEP_MUTATIONS "; + } + if (options & QueryPlannerParams::IS_COUNT) { + ss << "IS_COUNT "; + } + if (options & QueryPlannerParams::SPLIT_LIMITED_SORT) { + ss << "SPLIT_LIMITED_SORT "; + } + if (options & QueryPlannerParams::CANNOT_TRIM_IXISECT) { + ss << "CANNOT_TRIM_IXISECT "; + } + if (options & QueryPlannerParams::SNAPSHOT_USE_ID) { + ss << "SNAPSHOT_USE_ID "; + } + if (options & QueryPlannerParams::NO_UNCOVERED_PROJECTIONS) { + ss << "NO_UNCOVERED_PROJECTIONS "; + } + if (options & QueryPlannerParams::GENERATE_COVERED_IXSCANS) { + ss << "GENERATE_COVERED_IXSCANS "; } return ss; |