summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/document_source_sort.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/pipeline/document_source_sort.cpp')
-rw-r--r--src/mongo/db/pipeline/document_source_sort.cpp177
1 files changed, 136 insertions, 41 deletions
diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp
index 4d78d08ccaa..fd113a9c386 100644
--- a/src/mongo/db/pipeline/document_source_sort.cpp
+++ b/src/mongo/db/pipeline/document_source_sort.cpp
@@ -47,6 +47,54 @@ using std::make_pair;
using std::string;
using std::vector;
+namespace {
+Value missingToNull(Value maybeMissing) {
+ return maybeMissing.missing() ? Value(BSONNULL) : maybeMissing;
+}
+
+/**
+ * Converts a Value representing an in-memory sort key to a BSONObj representing a serialized sort
+ * key. If 'sortPatternSize' is 1, returns a BSON object with 'value' as it's only value - and an
+ * empty field name. Otherwise asserts that 'value' is an array of length 'sortPatternSize', and
+ * returns a BSONObj with one field for each value in the array, each field using the empty field
+ * name.
+ */
+BSONObj serializeSortKey(size_t sortPatternSize, Value value) {
+ // Missing values don't serialize correctly in this format, so use nulls instead, since they are
+ // considered equivalent with woCompare().
+ if (sortPatternSize == 1) {
+ return BSON("" << missingToNull(value));
+ }
+ invariant(value.isArray());
+ invariant(value.getArrayLength() == sortPatternSize);
+ BSONObjBuilder bb;
+ for (auto&& val : value.getArray()) {
+ bb << "" << missingToNull(val);
+ }
+ return bb.obj();
+}
+
+/**
+ * Converts a BSONObj representing a serialized sort key into a Value, which we use for in-memory
+ * comparisons. BSONObj {'': 1, '': [2, 3]} becomes Value [1, [2, 3]].
+ */
+Value deserializeSortKey(size_t sortPatternSize, BSONObj bsonSortKey) {
+ vector<Value> keys;
+ keys.reserve(sortPatternSize);
+ for (auto&& elt : bsonSortKey) {
+ keys.push_back(Value{elt});
+ }
+ invariant(keys.size() == sortPatternSize);
+ if (sortPatternSize == 1) {
+ // As a special case for a sort on a single field, we do not put the keys into an array.
+ return keys[0];
+ }
+ return Value{std::move(keys)};
+}
+
+} // namespace
+constexpr StringData DocumentSourceSort::kStageName;
+
DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext>& pExpCtx)
: DocumentSource(pExpCtx), _mergingPresorted(false) {}
@@ -54,10 +102,6 @@ REGISTER_DOCUMENT_SOURCE(sort,
LiteParsedDocumentSourceDefault::parse,
DocumentSourceSort::createFromBson);
-const char* DocumentSourceSort::getSourceName() const {
- return "$sort";
-}
-
DocumentSource::GetNextResult DocumentSourceSort::getNext() {
pExpCtx->checkForInterrupt();
@@ -83,17 +127,18 @@ DocumentSource::GetNextResult DocumentSourceSort::getNext() {
void DocumentSourceSort::serializeToArray(
std::vector<Value>& array, boost::optional<ExplainOptions::Verbosity> explain) const {
if (explain) { // always one Value for combined $sort + $limit
- array.push_back(Value(
- DOC(getSourceName() << DOC(
- "sortKey" << serializeSortKey(static_cast<bool>(explain)) << "mergePresorted"
- << (_mergingPresorted ? Value(true) : Value())
- << "limit"
- << (limitSrc ? Value(limitSrc->getLimit()) : Value())))));
+ array.push_back(Value(DOC(
+ kStageName << DOC("sortKey" << sortKeyPattern(SortKeySerialization::kForExplain)
+ << "mergePresorted"
+ << (_mergingPresorted ? Value(true) : Value())
+ << "limit"
+ << (limitSrc ? Value(limitSrc->getLimit()) : Value())))));
} else { // one Value for $sort and maybe a Value for $limit
- MutableDocument inner(serializeSortKey(static_cast<bool>(explain)));
- if (_mergingPresorted)
+ MutableDocument inner(sortKeyPattern(SortKeySerialization::kForPipelineSerialization));
+ if (_mergingPresorted) {
inner["$mergePresorted"] = Value(true);
- array.push_back(Value(DOC(getSourceName() << inner.freeze())));
+ }
+ array.push_back(Value(DOC(kStageName << inner.freeze())));
if (limitSrc) {
limitSrc->serializeToArray(array);
@@ -109,7 +154,7 @@ long long DocumentSourceSort::getLimit() const {
return limitSrc ? limitSrc->getLimit() : -1;
}
-Document DocumentSourceSort::serializeSortKey(bool explain) const {
+Document DocumentSourceSort::sortKeyPattern(SortKeySerialization serializationMode) const {
MutableDocument keyObj;
const size_t n = _sortPattern.size();
for (size_t i = 0; i < n; ++i) {
@@ -118,9 +163,22 @@ Document DocumentSourceSort::serializeSortKey(bool explain) const {
keyObj.setField(_sortPattern[i].fieldPath->fullPath(),
Value(_sortPattern[i].isAscending ? 1 : -1));
} else {
- // For sorts that are not simply on a field path, use a made-up field name.
- keyObj[string(str::stream() << "$computed" << i)] =
- _sortPattern[i].expression->serialize(explain);
+ // Sorting by an expression, use a made up field name.
+ auto computedFieldName = string(str::stream() << "$computed" << i);
+ switch (serializationMode) {
+ case SortKeySerialization::kForExplain:
+ case SortKeySerialization::kForPipelineSerialization: {
+ const bool isExplain = (serializationMode == SortKeySerialization::kForExplain);
+ keyObj[computedFieldName] = _sortPattern[i].expression->serialize(isExplain);
+ break;
+ }
+ case SortKeySerialization::kForSortKeyMerging: {
+ // We need to be able to tell which direction the sort is. Expression sorts are
+ // always descending.
+ keyObj[computedFieldName] = Value(-1);
+ break;
+ }
+ }
}
}
return keyObj.freeze();
@@ -149,6 +207,10 @@ DocumentSource::GetDepsReturn DocumentSourceSort::getDependencies(DepsTracker* d
deps->fields.insert(keyPart.fieldPath->fullPath());
}
}
+ if (pExpCtx->needsMerge) {
+ // Include the sort key if we will merge several sorted streams later.
+ deps->setNeedSortKey(true);
+ }
return SEE_NEXT;
}
@@ -220,9 +282,11 @@ intrusive_ptr<DocumentSourceSort> DocumentSourceSort::create(
uassert(15976, "$sort stage must have at least one sort key", !pSort->_sortPattern.empty());
- const bool isExplain = false;
- pSort->_sortKeyGen =
- SortKeyGenerator{pSort->serializeSortKey(isExplain).toBson(), pExpCtx->getCollator()};
+ pSort->_sortKeyGen = SortKeyGenerator{
+ // The SortKeyGenerator expects the expressions to be serialized in order to detect a sort
+ // by a metadata field.
+ pSort->sortKeyPattern(SortKeySerialization::kForPipelineSerialization).toBson(),
+ pExpCtx->getCollator()};
if (limit > 0) {
pSort->setLimitSrc(DocumentSourceLimit::create(pExpCtx, limit));
@@ -269,12 +333,19 @@ DocumentSource::GetNextResult DocumentSourceSort::populate() {
}
}
-void DocumentSourceSort::loadDocument(const Document& doc) {
+void DocumentSourceSort::loadDocument(Document&& doc) {
invariant(!_populated);
if (!_sorter) {
_sorter.reset(MySorter::make(makeSortOptions(), Comparator(*this)));
}
- _sorter->add(extractKey(doc), doc);
+
+ Value sortKey;
+ Document docForSorter;
+ // We always need to extract the sort key if we've reached this point. If the query system had
+ // already computed the sort key we'd have split the pipeline there, would be merging presorted
+ // documents, and wouldn't use this method.
+ std::tie(sortKey, docForSorter) = extractSortKey(std::move(doc));
+ _sorter->add(sortKey, docForSorter);
}
void DocumentSourceSort::loadingDone() {
@@ -295,8 +366,18 @@ public:
return _cursor->more();
}
Data next() {
- const Document doc = DocumentSourceMergeCursors::nextSafeFrom(_cursor);
- return make_pair(_sorter->extractKey(doc), doc);
+ auto doc = DocumentSourceMergeCursors::nextSafeFrom(_cursor);
+ if (doc.hasSortKeyMetaField()) {
+ // We set the sort key metadata field during the first half of the sort, so just use
+ // that as the sort key here.
+ return make_pair(
+ deserializeSortKey(_sorter->_sortPattern.size(), doc.getSortKeyMetaField()), doc);
+ } else {
+ // It's possible this result is coming from a shard that is still on an old version. If
+ // that's the case, it won't tell us it's sort key - we'll have to re-compute it
+ // ourselves.
+ return _sorter->extractSortKey(std::move(doc));
+ }
}
private:
@@ -344,7 +425,7 @@ StatusWith<Value> DocumentSourceSort::extractKeyFast(const Document& doc) const
return Value{std::move(keys)};
}
-Value DocumentSourceSort::extractKeyWithArray(const Document& doc) const {
+BSONObj DocumentSourceSort::extractKeyWithArray(const Document& doc) const {
SortKeyGenerator::Metadata metadata;
if (doc.hasTextScore()) {
metadata.textScore = doc.getTextScore();
@@ -356,26 +437,40 @@ Value DocumentSourceSort::extractKeyWithArray(const Document& doc) const {
// Convert the Document to a BSONObj, but only do the conversion for the paths we actually need.
// Then run the result through the SortKeyGenerator to obtain the final sort key.
auto bsonDoc = document_path_support::documentToBsonWithPaths(doc, _paths);
- auto bsonKey = uassertStatusOK(_sortKeyGen->getSortKey(std::move(bsonDoc), &metadata));
-
- // Convert BSON sort key, which is an object with empty field names, into the corresponding
- // array of keys representation as a Value. BSONObj {'': 1, '': [2, 3]} becomes Value [1, [2,
- // 3]].
- vector<Value> keys;
- keys.reserve(_sortPattern.size());
- for (auto&& elt : bsonKey) {
- keys.push_back(Value{elt});
- }
- return Value{std::move(keys)};
+ return uassertStatusOK(_sortKeyGen->getSortKey(std::move(bsonDoc), &metadata));
}
-Value DocumentSourceSort::extractKey(const Document& doc) const {
- auto key = extractKeyFast(doc);
- if (key.isOK()) {
- return key.getValue();
+std::pair<Value, Document> DocumentSourceSort::extractSortKey(Document&& doc) const {
+ boost::optional<BSONObj> serializedSortKey; // Only populated if we need to merge with other
+ // sorted results later. Serialized in the standard
+ // BSON sort key format with empty field names,
+ // e.g. {'': 1, '': [2, 3]}.
+
+ Value inMemorySortKey; // The Value we will use for comparisons within the sorter.
+
+ auto fastKey = extractKeyFast(doc);
+ if (fastKey.isOK()) {
+ inMemorySortKey = std::move(fastKey.getValue());
+ if (pExpCtx->needsMerge) {
+ serializedSortKey = serializeSortKey(_sortPattern.size(), inMemorySortKey);
+ }
+ } else {
+ // We have to do it the slow way - through the sort key generator. This will generate a BSON
+ // sort key, which is an object with empty field names. We then need to convert this BSON
+ // representation into the corresponding array of keys as a Value. BSONObj {'': 1, '': [2,
+ // 3]} becomes Value [1, [2, 3]].
+ serializedSortKey = extractKeyWithArray(doc);
+ inMemorySortKey = deserializeSortKey(_sortPattern.size(), *serializedSortKey);
}
- return extractKeyWithArray(doc);
+ MutableDocument toBeSorted(std::move(doc));
+ if (pExpCtx->needsMerge) {
+ // We need to be merged, so will have to be serialized. Save the sort key here to avoid
+ // re-computing it during the merge.
+ invariant(serializedSortKey);
+ toBeSorted.setSortKeyMetaField(*serializedSortKey);
+ }
+ return {inMemorySortKey, toBeSorted.freeze()};
}
int DocumentSourceSort::compare(const Value& lhs, const Value& rhs) const {