diff options
author | Misha Ivkov <misha.ivkov@10gen.com> | 2019-07-08 16:04:25 -0400 |
---|---|---|
committer | Misha Ivkov <misha.ivkov@10gen.com> | 2019-07-12 16:23:52 -0400 |
commit | deaf23e643efa664338d602b419589639409b33a (patch) | |
tree | d410b8dc6454a285950e365083c719892f24c88f | |
parent | 49b66d842d58eba32c72bb9ef8f3cd6fd7d0c9ed (diff) | |
download | mongo-deaf23e643efa664338d602b419589639409b33a.tar.gz |
SERVER-42083 refactor SortPattern into its own class
-rw-r--r-- | src/mongo/db/pipeline/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.cpp | 98 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.h | 31 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/query/SConscript | 12 | ||||
-rw-r--r-- | src/mongo/db/query/sort_pattern.cpp | 108 | ||||
-rw-r--r-- | src/mongo/db/query/sort_pattern.h | 97 |
7 files changed, 238 insertions, 114 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index adca346de94..984bdbdf89c 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -338,6 +338,7 @@ pipelineeEnv.Library( '$BUILD_DIR/mongo/db/pipeline/lite_parsed_document_source', '$BUILD_DIR/mongo/db/query/collation/collator_factory_interface', '$BUILD_DIR/mongo/db/query/collation/collator_interface', + '$BUILD_DIR/mongo/db/query/sort_pattern', '$BUILD_DIR/mongo/db/repl/oplog_entry', '$BUILD_DIR/mongo/db/repl/read_concern_args', '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface', diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index 395d517ce2d..670f6165277 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -33,7 +33,6 @@ #include "mongo/db/jsobj.h" #include "mongo/db/pipeline/document.h" -#include "mongo/db/pipeline/document_path_support.h" #include "mongo/db/pipeline/document_source_skip.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/expression_context.h" @@ -116,8 +115,9 @@ std::string nextFileName() { constexpr StringData DocumentSourceSort::kStageName; -DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext>& pExpCtx) - : DocumentSource(pExpCtx) {} +DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext>& pExpCtx, + const BSONObj& sortOrder) + : DocumentSource(pExpCtx), _rawSort(sortOrder), _sortPattern({_rawSort, pExpCtx}) {} REGISTER_DOCUMENT_SOURCE(sort, LiteParsedDocumentSourceDefault::parse, @@ -147,11 +147,12 @@ void DocumentSourceSort::serializeToArray( if (explain) { // always one Value for combined $sort + $limit array.push_back(Value(DOC( kStageName << DOC( - "sortKey" << serializeSortKeyPattern(SortKeySerialization::kForExplain) << "limit" + "sortKey" << _sortPattern.serialize(SortPattern::SortKeySerialization::kForExplain) + << "limit" << (_limitSrc ? Value(_limitSrc->getLimit()) : Value()))))); } else { // one Value for $sort and maybe a Value for $limit MutableDocument inner( - serializeSortKeyPattern(SortKeySerialization::kForPipelineSerialization)); + _sortPattern.serialize(SortPattern::SortKeySerialization::kForPipelineSerialization)); array.push_back(Value(DOC(kStageName << inner.freeze()))); if (_limitSrc) { @@ -168,36 +169,6 @@ long long DocumentSourceSort::getLimit() const { return _limitSrc ? _limitSrc->getLimit() : -1; } -Document DocumentSourceSort::serializeSortKeyPattern(SortKeySerialization serializationMode) const { - MutableDocument keyObj; - const size_t n = _sortPattern.size(); - for (size_t i = 0; i < n; ++i) { - if (_sortPattern[i].fieldPath) { - // Append a named integer based on whether the sort is ascending/descending. - keyObj.setField(_sortPattern[i].fieldPath->fullPath(), - Value(_sortPattern[i].isAscending ? 1 : -1)); - } else { - // Sorting by an expression, use a made up field name. - auto computedFieldName = string(str::stream() << "$computed" << i); - switch (serializationMode) { - case SortKeySerialization::kForExplain: - case SortKeySerialization::kForPipelineSerialization: { - const bool isExplain = (serializationMode == SortKeySerialization::kForExplain); - keyObj[computedFieldName] = _sortPattern[i].expression->serialize(isExplain); - break; - } - case SortKeySerialization::kForSortKeyMerging: { - // We need to be able to tell which direction the sort is. Expression sorts are - // always descending. - keyObj[computedFieldName] = Value(-1); - break; - } - } - } - } - return keyObj.freeze(); -} - Pipeline::SourceContainer::iterator DocumentSourceSort::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); @@ -260,61 +231,18 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create( BSONObj sortOrder, long long limit, boost::optional<uint64_t> maxMemoryUsageBytes) { - intrusive_ptr<DocumentSourceSort> pSort(new DocumentSourceSort(pExpCtx)); + intrusive_ptr<DocumentSourceSort> pSort(new DocumentSourceSort(pExpCtx, sortOrder.getOwned())); pSort->_maxMemoryUsageBytes = maxMemoryUsageBytes ? *maxMemoryUsageBytes : internalDocumentSourceSortMaxBlockingSortBytes.load(); - pSort->_rawSort = sortOrder.getOwned(); - - for (auto&& keyField : sortOrder) { - auto fieldName = keyField.fieldNameStringData(); - - SortPatternPart patternPart; - - if (keyField.type() == Object) { - BSONObj metaDoc = keyField.Obj(); - // this restriction is due to needing to figure out sort direction - uassert(17312, - "$meta is the only expression supported by $sort right now", - metaDoc.firstElement().fieldNameStringData() == "$meta"); - - uassert(ErrorCodes::FailedToParse, - "Cannot have additional keys in a $meta sort specification", - metaDoc.nFields() == 1); - - VariablesParseState vps = pExpCtx->variablesParseState; - patternPart.expression = ExpressionMeta::parse(pExpCtx, metaDoc.firstElement(), vps); - - // If sorting by textScore, sort highest scores first. If sorting by randVal, order - // doesn't matter, so just always use descending. - patternPart.isAscending = false; - - pSort->_sortPattern.push_back(std::move(patternPart)); - continue; - } - - uassert(15974, - "$sort key ordering must be specified using a number or {$meta: 'textScore'}", - keyField.isNumber()); - - int sortOrder = keyField.numberInt(); - - uassert(15975, - "$sort key ordering must be 1 (for ascending) or -1 (for descending)", - ((sortOrder == 1) || (sortOrder == -1))); - - patternPart.fieldPath = FieldPath{fieldName}; - patternPart.isAscending = (sortOrder > 0); - pSort->_paths.insert(patternPart.fieldPath->fullPath()); - pSort->_sortPattern.push_back(std::move(patternPart)); - } uassert(15976, "$sort stage must have at least one sort key", !pSort->_sortPattern.empty()); pSort->_sortKeyGen = SortKeyGenerator{ // The SortKeyGenerator expects the expressions to be serialized in order to detect a sort // by a metadata field. - pSort->serializeSortKeyPattern(SortKeySerialization::kForPipelineSerialization).toBson(), + pSort->_sortPattern.serialize(SortPattern::SortKeySerialization::kForPipelineSerialization) + .toBson(), pExpCtx->getCollator()}; if (limit > 0) { @@ -410,8 +338,8 @@ Value DocumentSourceSort::getCollationComparisonKey(const Value& val) const { return Value(output.obj().firstElement()); } -StatusWith<Value> DocumentSourceSort::extractKeyPart(const Document& doc, - const SortPatternPart& patternPart) const { +StatusWith<Value> DocumentSourceSort::extractKeyPart( + const Document& doc, const SortPattern::SortPatternPart& patternPart) const { Value plainKey; if (patternPart.fieldPath) { invariant(!patternPart.expression); @@ -459,7 +387,7 @@ BSONObj DocumentSourceSort::extractKeyWithArray(const Document& doc) const { // Convert the Document to a BSONObj, but only do the conversion for the paths we actually need. // Then run the result through the SortKeyGenerator to obtain the final sort key. - auto bsonDoc = document_path_support::documentToBsonWithPaths(doc, _paths); + auto bsonDoc = _sortPattern.documentToBsonWithSortPaths(doc); return uassertStatusOK(_sortKeyGen->getSortKey(std::move(bsonDoc), &metadata)); } @@ -535,7 +463,7 @@ boost::optional<DocumentSource::DistributedPlanLogic> DocumentSourceSort::distri DistributedPlanLogic split; split.shardsStage = this; split.inputSortPattern = - serializeSortKeyPattern(SortKeySerialization::kForSortKeyMerging).toBson(); + _sortPattern.serialize(SortPattern::SortKeySerialization::kForSortKeyMerging).toBson(); if (_limitSrc) { split.mergingStage = DocumentSourceLimit::create(pExpCtx, _limitSrc->getLimit()); } diff --git a/src/mongo/db/pipeline/document_source_sort.h b/src/mongo/db/pipeline/document_source_sort.h index 0e0310c5ca8..519b61a65e8 100644 --- a/src/mongo/db/pipeline/document_source_sort.h +++ b/src/mongo/db/pipeline/document_source_sort.h @@ -34,6 +34,7 @@ #include "mongo/db/pipeline/document_source_limit.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/query/query_knobs_gen.h" +#include "mongo/db/query/sort_pattern.h" #include "mongo/db/sorter/sorter.h" namespace mongo { @@ -41,22 +42,6 @@ namespace mongo { class DocumentSourceSort final : public DocumentSource { public: static constexpr StringData kStageName = "$sort"_sd; - enum class SortKeySerialization { - kForExplain, - kForPipelineSerialization, - kForSortKeyMerging, - }; - - // Represents one of the components in a compound sort pattern. Each component is either the - // field path by which we are sorting, or an Expression which can be used to retrieve the sort - // value in the case of a $meta-sort (but not both). - struct SortPatternPart { - bool isAscending = true; - boost::optional<FieldPath> fieldPath; - boost::intrusive_ptr<Expression> expression; - }; - - using SortPattern = std::vector<SortPatternPart>; GetNextResult getNext() final; @@ -102,11 +87,6 @@ public: } /** - * Write out a Document whose contents are the sort key pattern. - */ - Document serializeSortKeyPattern(SortKeySerialization) const; - - /** * Parses a $sort stage from the user-supplied BSON. */ static boost::intrusive_ptr<DocumentSource> createFromBson( @@ -182,7 +162,8 @@ private: const DocumentSourceSort& _source; }; - explicit DocumentSourceSort(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + explicit DocumentSourceSort(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, + const BSONObj& sortOrder); Value serialize(boost::optional<ExplainOptions::Verbosity> explain = boost::none) const final { MONGO_UNREACHABLE; // Should call serializeToArray instead. @@ -221,7 +202,8 @@ private: * Extracts the sort key component described by 'keyPart' from 'doc' and returns it. Returns * ErrorCodes::Internal error if the path for 'keyPart' contains an array in 'doc'. */ - StatusWith<Value> extractKeyPart(const Document& doc, const SortPatternPart& keyPart) const; + StatusWith<Value> extractKeyPart(const Document& doc, + const SortPattern::SortPatternPart& keyPart) const; /** * Returns the sort key for 'doc' based on the SortPattern. Note this is in the BSONObj format - @@ -256,9 +238,6 @@ private: SortPattern _sortPattern; - // The set of paths on which we're sorting. - std::set<std::string> _paths; - boost::intrusive_ptr<DocumentSourceLimit> _limitSrc; uint64_t _maxMemoryUsageBytes; diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 0e659a37da5..dc742e0437c 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -471,9 +471,8 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection, BSONObj sortObj; if (sortStage) { - sortObj = sortStage - ->serializeSortKeyPattern( - DocumentSourceSort::SortKeySerialization::kForPipelineSerialization) + sortObj = sortStage->getSortKeyPattern() + .serialize(SortPattern::SortKeySerialization::kForPipelineSerialization) .toBson(); } diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index 4c844376219..dbba9fe6c18 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -199,6 +199,18 @@ env.Library( ], ) +env.Library( + target="sort_pattern", + source=[ + "sort_pattern.cpp", + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/pipeline/document_value', + '$BUILD_DIR/mongo/db/pipeline/expression', + ], + ) + env.CppUnitTest( target="db_query_test", source=[ diff --git a/src/mongo/db/query/sort_pattern.cpp b/src/mongo/db/query/sort_pattern.cpp new file mode 100644 index 00000000000..506ee856805 --- /dev/null +++ b/src/mongo/db/query/sort_pattern.cpp @@ -0,0 +1,108 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/sort_pattern.h" + +namespace mongo { +SortPattern::SortPattern(const BSONObj& obj, + const boost::intrusive_ptr<ExpressionContext>& pExpCtx) { + for (auto&& keyField : obj) { + auto fieldName = keyField.fieldNameStringData(); + + SortPatternPart patternPart; + + if (keyField.type() == Object) { + BSONObj metaDoc = keyField.Obj(); + // this restriction is due to needing to figure out sort direction + uassert(17312, + "$meta is the only expression supported by $sort right now", + metaDoc.firstElement().fieldNameStringData() == "$meta"); + + uassert(ErrorCodes::FailedToParse, + "Cannot have additional keys in a $meta sort specification", + metaDoc.nFields() == 1); + + VariablesParseState vps = pExpCtx->variablesParseState; + patternPart.expression = ExpressionMeta::parse(pExpCtx, metaDoc.firstElement(), vps); + + // If sorting by textScore, sort highest scores first. If sorting by randVal, order + // doesn't matter, so just always use descending. + patternPart.isAscending = false; + + _sortPattern.push_back(std::move(patternPart)); + continue; + } + + uassert(15974, + "$sort key ordering must be specified using a number or {$meta: 'textScore'}", + keyField.isNumber()); + + int sortOrder = keyField.numberInt(); + + uassert(15975, + "$sort key ordering must be 1 (for ascending) or -1 (for descending)", + ((sortOrder == 1) || (sortOrder == -1))); + + patternPart.fieldPath = FieldPath{fieldName}; + patternPart.isAscending = (sortOrder > 0); + _paths.insert(patternPart.fieldPath->fullPath()); + _sortPattern.push_back(std::move(patternPart)); + } +} + +Document SortPattern::serialize(SortKeySerialization serializationMode) const { + MutableDocument keyObj; + const size_t n = _sortPattern.size(); + for (size_t i = 0; i < n; ++i) { + if (_sortPattern[i].fieldPath) { + // Append a named integer based on whether the sort is ascending/descending. + keyObj.setField(_sortPattern[i].fieldPath->fullPath(), + Value(_sortPattern[i].isAscending ? 1 : -1)); + } else { + // Sorting by an expression, use a made up field name. + auto computedFieldName = std::string(str::stream() << "$computed" << i); + switch (serializationMode) { + case SortKeySerialization::kForExplain: + case SortKeySerialization::kForPipelineSerialization: { + const bool isExplain = (serializationMode == SortKeySerialization::kForExplain); + keyObj[computedFieldName] = _sortPattern[i].expression->serialize(isExplain); + break; + } + case SortKeySerialization::kForSortKeyMerging: { + // We need to be able to tell which direction the sort is. Expression sorts are + // always descending. + keyObj[computedFieldName] = Value(-1); + break; + } + } + } + } + return keyObj.freeze(); +} +} // namespace mongo diff --git a/src/mongo/db/query/sort_pattern.h b/src/mongo/db/query/sort_pattern.h new file mode 100644 index 00000000000..a8d57efd114 --- /dev/null +++ b/src/mongo/db/query/sort_pattern.h @@ -0,0 +1,97 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ +#pragma once + +#include <vector> + +#include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/document_path_support.h" +#include "mongo/db/pipeline/expression.h" +#include "mongo/db/pipeline/field_path.h" + +namespace mongo { + +class SortPattern { +public: + enum class SortKeySerialization { + kForExplain, + kForPipelineSerialization, + kForSortKeyMerging, + }; + + // Represents one of the components in a compound sort pattern. Each component is either the + // field path by which we are sorting, or an Expression which can be used to retrieve the sort + // value in the case of a $meta-sort (but not both). + struct SortPatternPart { + bool isAscending = true; + boost::optional<FieldPath> fieldPath; + boost::intrusive_ptr<Expression> expression; + }; + + SortPattern(const BSONObj&, const boost::intrusive_ptr<ExpressionContext>&); + + /** + * Write out a Document whose contents are the sort key pattern. + */ + Document serialize(SortKeySerialization) const; + + /** + * Serializes the document to BSON, only keeping the paths specified in the sort pattern. + */ + BSONObj documentToBsonWithSortPaths(const Document& doc) const { + return document_path_support::documentToBsonWithPaths(doc, _paths); + } + + size_t size() const { + return _sortPattern.size(); + } + + bool empty() const { + return _sortPattern.empty(); + } + + SortPatternPart operator[](int idx) const { + return _sortPattern[idx]; + } + + std::vector<SortPatternPart>::const_iterator begin() const { + return _sortPattern.cbegin(); + } + + std::vector<SortPatternPart>::const_iterator end() const { + return _sortPattern.cend(); + } + +private: + std::vector<SortPatternPart> _sortPattern; + + // The set of paths on which we're sorting. + std::set<std::string> _paths; +}; +} // namespace mongo |