diff options
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/exec/sort_key_generator.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/index/sort_key_generator.cpp | 116 | ||||
-rw-r--r-- | src/mongo/db/index/sort_key_generator.h | 49 | ||||
-rw-r--r-- | src/mongo/db/index/sort_key_generator_test.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_metadata_fields.cpp | 35 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_metadata_fields.h | 14 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.cpp | 169 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.h | 29 | ||||
-rw-r--r-- | src/mongo/db/query/sort_pattern.h | 10 |
9 files changed, 246 insertions, 222 deletions
diff --git a/src/mongo/db/exec/sort_key_generator.cpp b/src/mongo/db/exec/sort_key_generator.cpp index 90fe3b48fea..98800394762 100644 --- a/src/mongo/db/exec/sort_key_generator.cpp +++ b/src/mongo/db/exec/sort_key_generator.cpp @@ -68,7 +68,7 @@ PlanStage::StageState SortKeyGeneratorStage::doWork(WorkingSetID* out) { if (stageState == PlanStage::ADVANCED) { WorkingSetMember* member = _ws->get(*out); - auto sortKey = _sortKeyGen.getSortKey(*member); + auto sortKey = _sortKeyGen.computeSortKey(*member); if (!sortKey.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, sortKey.getStatus()); return PlanStage::FAILURE; diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp index 67f57d58a60..44351d8d664 100644 --- a/src/mongo/db/index/sort_key_generator.cpp +++ b/src/mongo/db/index/sort_key_generator.cpp @@ -76,19 +76,24 @@ SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterf Ordering::make(_sortSpecWithoutMeta)); } -StatusWith<BSONObj> SortKeyGenerator::getSortKey(const WorkingSetMember& wsm) const { +// TODO (SERVER-42836): Once WorkingSetMember objects store a Document (SERVER-42181), this function +// will be able to use the Document overload of computeSortKeyFromDocument, and it will be able to +// store the text score with the Document instead of in a separate SortKeyGenerator::Metadata +// object. +StatusWith<BSONObj> SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const { if (wsm.hasObj()) { SortKeyGenerator::Metadata metadata; if (_sortHasMeta && wsm.metadata().hasTextScore()) { metadata.textScore = wsm.metadata().getTextScore(); } - return getSortKeyFromDocument(wsm.obj.value(), &metadata); + return computeSortKeyFromDocument(wsm.obj.value(), &metadata); } - return getSortKeyFromIndexKey(wsm); + return computeSortKeyFromIndexKey(wsm); } -StatusWith<BSONObj> SortKeyGenerator::getSortKeyFromIndexKey(const WorkingSetMember& member) const { +StatusWith<BSONObj> SortKeyGenerator::computeSortKeyFromIndexKey( + const WorkingSetMember& member) const { invariant(member.getState() == WorkingSetMember::RID_AND_IDX); invariant(!_sortHasMeta); @@ -107,13 +112,13 @@ StatusWith<BSONObj> SortKeyGenerator::getSortKeyFromIndexKey(const WorkingSetMem return objBuilder.obj(); } -StatusWith<BSONObj> SortKeyGenerator::getSortKeyFromDocument(const BSONObj& obj, - const Metadata* metadata) const { +StatusWith<BSONObj> SortKeyGenerator::computeSortKeyFromDocument(const BSONObj& obj, + const Metadata* metadata) const { if (_sortHasMeta) { invariant(metadata); } - auto sortKeyNoMetadata = getSortKeyFromDocumentWithoutMetadata(obj); + auto sortKeyNoMetadata = computeSortKeyFromDocumentWithoutMetadata(obj); if (!sortKeyNoMetadata.isOK()) { return sortKeyNoMetadata; } @@ -153,7 +158,7 @@ StatusWith<BSONObj> SortKeyGenerator::getSortKeyFromDocument(const BSONObj& obj, return mergedKeyBob.obj(); } -StatusWith<BSONObj> SortKeyGenerator::getSortKeyFromDocumentWithoutMetadata( +StatusWith<BSONObj> SortKeyGenerator::computeSortKeyFromDocumentWithoutMetadata( const BSONObj& obj) const { // Not sorting by anything in the key, just bail out early. if (_sortSpecWithoutMeta.isEmpty()) { @@ -192,4 +197,99 @@ StatusWith<BSONObj> SortKeyGenerator::getSortKeyFromDocumentWithoutMetadata( return KeyString::toBson(*keys.begin(), Ordering::make(_sortSpecWithoutMeta)); } +Value SortKeyGenerator::getCollationComparisonKey(const Value& val) const { + // If the collation is the simple collation, the value itself is the comparison key. + if (!_collator) { + return val; + } + + // If 'val' is not a collatable type, there's no need to do any work. + if (!CollationIndexKey::isCollatableType(val.getType())) { + return val; + } + + // If 'val' is a string, directly use the collator to obtain a comparison key. + if (val.getType() == BSONType::String) { + auto compKey = _collator->getComparisonKey(val.getString()); + return Value(compKey.getKeyData()); + } + + // Otherwise, for non-string collatable types, take the slow path and round-trip the value + // through BSON. + BSONObjBuilder input; + val.addToBsonObj(&input, ""_sd); + + BSONObjBuilder output; + CollationIndexKey::collationAwareIndexKeyAppend(input.obj().firstElement(), _collator, &output); + return Value(output.obj().firstElement()); +} + +StatusWith<Value> SortKeyGenerator::extractKeyPart( + const Document& doc, const SortPattern::SortPatternPart& patternPart) const { + Value plainKey; + if (patternPart.fieldPath) { + invariant(!patternPart.expression); + auto key = + document_path_support::extractElementAlongNonArrayPath(doc, *patternPart.fieldPath); + if (!key.isOK()) { + return key; + } + plainKey = key.getValue(); + } else { + invariant(patternPart.expression); + // ExpressionMeta does not use Variables. + plainKey = patternPart.expression->evaluate(doc, nullptr /* variables */); + } + + return getCollationComparisonKey(plainKey); +} + +StatusWith<Value> SortKeyGenerator::extractKeyFast(const Document& doc) const { + if (_sortPattern.isSingleElementKey()) { + return extractKeyPart(doc, _sortPattern[0]); + } + + std::vector<Value> keys; + keys.reserve(_sortPattern.size()); + for (auto&& keyPart : _sortPattern) { + auto extractedKey = extractKeyPart(doc, keyPart); + if (!extractedKey.isOK()) { + // We can't use the fast path, so bail out. + return extractedKey; + } + + keys.push_back(std::move(extractedKey.getValue())); + } + return Value{std::move(keys)}; +} + +BSONObj SortKeyGenerator::extractKeyWithArray(const Document& doc) const { + SortKeyGenerator::Metadata metadata; + if (doc.metadata().hasTextScore()) { + metadata.textScore = doc.metadata().getTextScore(); + } + if (doc.metadata().hasRandVal()) { + metadata.randVal = doc.metadata().getRandVal(); + } + + // Convert the Document to a BSONObj, but only do the conversion for the paths we actually need. + // Then run the result through the SortKeyGenerator to obtain the final sort key. + auto bsonDoc = _sortPattern.documentToBsonWithSortPaths(doc); + return uassertStatusOK(computeSortKeyFromDocument(bsonDoc, &metadata)); +} + +Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc) const { + // This fast pass directly generates a Value. + auto fastKey = extractKeyFast(doc); + if (fastKey.isOK()) { + return std::move(fastKey.getValue()); + } + + // Compute the key through the slow path, which generates a serialized BSON sort key (taking a + // form like BSONObj {'': 1, '': [2, 3]}) and converts it to a Value (Value [1, [2, 3]] in the + // earlier example). + return DocumentMetadataFields::deserializeSortKey(_sortPattern.isSingleElementKey(), + extractKeyWithArray(doc)); +} + } // namespace mongo diff --git a/src/mongo/db/index/sort_key_generator.h b/src/mongo/db/index/sort_key_generator.h index 2ddc186f340..fcdfe3309e5 100644 --- a/src/mongo/db/index/sort_key_generator.h +++ b/src/mongo/db/index/sort_key_generator.h @@ -65,7 +65,7 @@ public: * If the sort pattern contains a $meta sort (e.g. sort by "textScore" or "randVal"), then the * necessary metadata is obtained from the WorkingSetMember. */ - StatusWith<BSONObj> getSortKey(const WorkingSetMember&) const; + StatusWith<BSONObj> computeSortKey(const WorkingSetMember&) const; /** * Returns the key which should be used to sort 'obj', or a non-OK status if no key could be @@ -75,18 +75,59 @@ public: * a $meta sort (i.e. if sortHasMeta() is true). These values are filled in at the corresponding * positions in the sort key. */ - StatusWith<BSONObj> getSortKeyFromDocument(const BSONObj& obj, const Metadata*) const; + StatusWith<BSONObj> computeSortKeyFromDocument(const BSONObj& obj, const Metadata*) const; + + /** + * Returns the sort key for the input 'doc' as a Value. When the sort pattern has multiple + * components, the resulting sort key is a an Array-typed Value with one element for each + * component. For sort pattern with just one component, the sort key is a Value that represents + * the single element to sort on (which may or may not itself be an array). + * + * The sort key is computed based on the sort pattern, the contents of the document, and if + * required by $meta sort specifiers, metadata in the Document. This function throws if it + * cannot compute the sort pattern. + */ + Value computeSortKeyFromDocument(const Document& doc) const; + + bool isSingleElementKey() const { + return _sortPattern.isSingleElementKey(); + } private: // Extracts the sort key from a WorkingSetMember which represents an index key. It is illegal to // call this if the working set member is not in RID_AND_IDX state. It is also illegal to call // this if the sort pattern has any $meta components. - StatusWith<BSONObj> getSortKeyFromIndexKey(const WorkingSetMember& member) const; + StatusWith<BSONObj> computeSortKeyFromIndexKey(const WorkingSetMember& member) const; // Extracts the sort key from 'obj', using '_sortSpecWithoutMeta' and thus ignoring any $meta // sort components of the sort pattern. The caller is responsible for augmenting this key with // the appropriate metadata if '_sortHasMeta' is true. - StatusWith<BSONObj> getSortKeyFromDocumentWithoutMetadata(const BSONObj& obj) const; + StatusWith<BSONObj> computeSortKeyFromDocumentWithoutMetadata(const BSONObj& obj) const; + + /** + * Returns the sort key for 'doc' based on the SortPattern, or ErrorCodes::InternalError if an + * array is encountered during sort key generation. + */ + StatusWith<Value> extractKeyFast(const Document& doc) const; + + /** + * Extracts the sort key component described by 'keyPart' from 'doc' and returns it. Returns + * ErrorCodes::InternalError if the path for 'keyPart' contains an array in 'doc'. + */ + StatusWith<Value> extractKeyPart(const Document& doc, + const SortPattern::SortPatternPart& keyPart) const; + + /** + * Returns the sort key for 'doc' based on the SortPattern. Note this is in the BSONObj format - + * with empty field names. + */ + BSONObj extractKeyWithArray(const Document& doc) const; + + /** + * Returns the comparison key used to sort 'val' with collation. Note that these comparison keys + * should always be sorted with the simple (i.e. binary) collation. + */ + Value getCollationComparisonKey(const Value& val) const; const CollatorInterface* _collator = nullptr; diff --git a/src/mongo/db/index/sort_key_generator_test.cpp b/src/mongo/db/index/sort_key_generator_test.cpp index 300c5971f68..c2bc61e4eb4 100644 --- a/src/mongo/db/index/sort_key_generator_test.cpp +++ b/src/mongo/db/index/sort_key_generator_test.cpp @@ -52,7 +52,7 @@ std::unique_ptr<SortKeyGenerator> makeSortKeyGen(const BSONObj& sortSpec, TEST(SortKeyGeneratorTest, ExtractNumberKeyForNonCompoundSortNonNested) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); - auto sortKey = sortKeyGen->getSortKeyFromDocument(fromjson("{_id: 0, a: 5}"), nullptr); + auto sortKey = sortKeyGen->computeSortKeyFromDocument(fromjson("{_id: 0, a: 5}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 5)); } @@ -60,14 +60,14 @@ TEST(SortKeyGeneratorTest, ExtractNumberKeyForNonCompoundSortNonNested) { TEST(SortKeyGeneratorTest, ExtractNumberKeyFromDocWithSeveralFields) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); auto sortKey = - sortKeyGen->getSortKeyFromDocument(fromjson("{_id: 0, z: 10, a: 6, b: 16}"), nullptr); + sortKeyGen->computeSortKeyFromDocument(fromjson("{_id: 0, z: 10, a: 6, b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 6)); } TEST(SortKeyGeneratorTest, ExtractStringKeyNonCompoundNonNested) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); - auto sortKey = sortKeyGen->getSortKeyFromDocument( + auto sortKey = sortKeyGen->computeSortKeyFromDocument( fromjson("{_id: 0, z: 'thing1', a: 'thing2', b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), @@ -77,7 +77,7 @@ TEST(SortKeyGeneratorTest, ExtractStringKeyNonCompoundNonNested) { TEST(SortKeyGeneratorTest, CompoundSortPattern) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1 << "b" << 1), nullptr); - auto sortKey = sortKeyGen->getSortKeyFromDocument( + auto sortKey = sortKeyGen->computeSortKeyFromDocument( fromjson("{_id: 0, z: 'thing1', a: 99, c: {a: 4}, b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 99 << "" << 16)); @@ -85,7 +85,7 @@ TEST(SortKeyGeneratorTest, CompoundSortPattern) { TEST(SortKeyGeneratorTest, CompoundSortPatternWithDottedPath) { auto sortKeyGen = makeSortKeyGen(BSON("c.a" << 1 << "b" << 1), nullptr); - auto sortKey = sortKeyGen->getSortKeyFromDocument( + auto sortKey = sortKeyGen->computeSortKeyFromDocument( fromjson("{_id: 0, z: 'thing1', a: 99, c: {a: 4}, b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 4 << "" << 16)); @@ -93,7 +93,7 @@ TEST(SortKeyGeneratorTest, CompoundSortPatternWithDottedPath) { TEST(SortKeyGeneratorTest, CompoundPatternLeadingFieldIsArray) { auto sortKeyGen = makeSortKeyGen(BSON("c" << 1 << "b" << 1), nullptr); - auto sortKey = sortKeyGen->getSortKeyFromDocument( + auto sortKey = sortKeyGen->computeSortKeyFromDocument( fromjson("{_id: 0, z: 'thing1', a: 99, c: [2, 4, 1], b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 1 << "" << 16)); @@ -102,7 +102,7 @@ TEST(SortKeyGeneratorTest, CompoundPatternLeadingFieldIsArray) { TEST(SortKeyGeneratorTest, ExtractStringSortKeyWithCollatorUsesComparisonKey) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), &collator); - auto sortKey = sortKeyGen->getSortKeyFromDocument( + auto sortKey = sortKeyGen->computeSortKeyFromDocument( fromjson("{_id: 0, z: 'thing1', a: 'thing2', b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), @@ -114,7 +114,7 @@ TEST(SortKeyGeneratorTest, CollatorHasNoEffectWhenExtractingNonStringSortKey) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), &collator); auto sortKey = - sortKeyGen->getSortKeyFromDocument(fromjson("{_id: 0, z: 10, a: 6, b: 16}"), nullptr); + sortKeyGen->computeSortKeyFromDocument(fromjson("{_id: 0, z: 10, a: 6, b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 6)); } @@ -122,7 +122,7 @@ TEST(SortKeyGeneratorTest, CollatorHasNoEffectWhenExtractingNonStringSortKey) { TEST(SortKeyGeneratorTest, SortKeyGenerationForArraysChoosesCorrectKey) { auto sortKeyGen = makeSortKeyGen(BSON("a" << -1), nullptr); auto sortKey = - sortKeyGen->getSortKeyFromDocument(fromjson("{_id: 0, a: [1, 2, 3, 4]}"), nullptr); + sortKeyGen->computeSortKeyFromDocument(fromjson("{_id: 0, a: [1, 2, 3, 4]}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 4)); } @@ -130,7 +130,7 @@ TEST(SortKeyGeneratorTest, SortKeyGenerationForArraysChoosesCorrectKey) { TEST(SortKeyGeneratorTest, EnsureSortKeyGenerationForArraysRespectsCollation) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), &collator); - auto sortKey = sortKeyGen->getSortKeyFromDocument( + auto sortKey = sortKeyGen->computeSortKeyFromDocument( fromjson("{_id: 0, a: ['aaz', 'zza', 'yya', 'zzb']}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), @@ -140,7 +140,7 @@ TEST(SortKeyGeneratorTest, EnsureSortKeyGenerationForArraysRespectsCollation) { TEST(SortKeyGeneratorTest, SortKeyGenerationForArraysRespectsCompoundOrdering) { auto sortKeyGen = makeSortKeyGen(BSON("a.b" << 1 << "a.c" << -1), nullptr); - auto sortKey = sortKeyGen->getSortKeyFromDocument( + auto sortKey = sortKeyGen->computeSortKeyFromDocument( fromjson("{_id: 0, a: [{b: 1, c: 0}, {b: 0, c: 3}, {b: 0, c: 1}]}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 0 << "" << 3)); @@ -214,7 +214,7 @@ DEATH_TEST(SortKeyGeneratorTest, auto sortKeyGen = makeSortKeyGen(BSON("a" << BSON("$meta" << "textScore")), nullptr); - uassertStatusOK(sortKeyGen->getSortKeyFromDocument(BSONObj{}, nullptr).getStatus()); + uassertStatusOK(sortKeyGen->computeSortKeyFromDocument(BSONObj{}, nullptr).getStatus()); } DEATH_TEST(SortKeyGeneratorTest, @@ -223,7 +223,7 @@ DEATH_TEST(SortKeyGeneratorTest, auto sortKeyGen = makeSortKeyGen(BSON("a" << BSON("$meta" << "randVal")), nullptr); - uassertStatusOK(sortKeyGen->getSortKeyFromDocument(BSONObj{}, nullptr).getStatus()); + uassertStatusOK(sortKeyGen->computeSortKeyFromDocument(BSONObj{}, nullptr).getStatus()); } TEST(SortKeyGeneratorTest, CanGenerateKeysForTextScoreMetaSort) { @@ -232,7 +232,7 @@ TEST(SortKeyGeneratorTest, CanGenerateKeysForTextScoreMetaSort) { nullptr); SortKeyGenerator::Metadata metadata; metadata.textScore = 1.5; - auto sortKey = sortKeyGen->getSortKeyFromDocument(BSONObj{}, &metadata); + auto sortKey = sortKeyGen->computeSortKeyFromDocument(BSONObj{}, &metadata); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 1.5)); } @@ -243,7 +243,7 @@ TEST(SortKeyGeneratorTest, CanGenerateKeysForRandValMetaSort) { nullptr); SortKeyGenerator::Metadata metadata; metadata.randVal = 0.3; - auto sortKey = sortKeyGen->getSortKeyFromDocument(BSONObj{}, &metadata); + auto sortKey = sortKeyGen->computeSortKeyFromDocument(BSONObj{}, &metadata); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 0.3)); } @@ -255,7 +255,7 @@ TEST(SortKeyGeneratorTest, CanGenerateKeysForCompoundMetaSort) { SortKeyGenerator::Metadata metadata; metadata.randVal = 0.3; metadata.textScore = 1.5; - auto sortKey = sortKeyGen->getSortKeyFromDocument(BSON("a" << 4 << "d" << 5), &metadata); + auto sortKey = sortKeyGen->computeSortKeyFromDocument(BSON("a" << 4 << "d" << 5), &metadata); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 4 << "" << 0.3 << "" << 1.5 << "" << 5 << "" << 1.5)); @@ -296,7 +296,7 @@ private: TEST_F(SortKeyGeneratorWorkingSetTest, CanGetSortKeyFromWorkingSetMemberWithObj) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); setRecordIdAndObj(BSON("x" << 1 << "a" << 2 << "y" << 3)); - auto sortKey = sortKeyGen->getSortKey(member()); + auto sortKey = sortKeyGen->computeSortKey(member()); ASSERT_OK(sortKey); ASSERT_BSONOBJ_EQ(BSON("" << 2), sortKey.getValue()); } @@ -304,7 +304,7 @@ TEST_F(SortKeyGeneratorWorkingSetTest, CanGetSortKeyFromWorkingSetMemberWithObj) TEST_F(SortKeyGeneratorWorkingSetTest, CanGetSortKeyFromWorkingSetMemberWithOwnedObj) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); setOwnedObj(BSON("x" << 1 << "a" << 2 << "y" << 3)); - auto sortKey = sortKeyGen->getSortKey(member()); + auto sortKey = sortKeyGen->computeSortKey(member()); ASSERT_OK(sortKey); ASSERT_BSONOBJ_EQ(BSON("" << 2), sortKey.getValue()); } @@ -314,7 +314,7 @@ TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateKeyFromWSMForTextScoreMetaSort auto sortKeyGen = makeSortKeyGen(pattern, nullptr); setOwnedObj(BSON("x" << 1 << "a" << 2 << "y" << 3 << "c" << BSON_ARRAY(4 << 5 << 6))); member().metadata().setTextScore(9.9); - auto sortKey = sortKeyGen->getSortKey(member()); + auto sortKey = sortKeyGen->computeSortKey(member()); ASSERT_OK(sortKey); ASSERT_BSONOBJ_EQ(BSON("" << 2 << "" << 9.9 << "" << 6), sortKey.getValue()); } @@ -322,7 +322,7 @@ TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateKeyFromWSMForTextScoreMetaSort TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateSortKeyFromWSMInIndexKeyState) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); setRecordIdAndIdx(BSON("a" << 1 << "b" << 1), BSON("" << 2 << "" << 3)); - auto sortKey = sortKeyGen->getSortKey(member()); + auto sortKey = sortKeyGen->computeSortKey(member()); ASSERT_OK(sortKey); ASSERT_BSONOBJ_EQ(BSON("" << 2), sortKey.getValue()); } @@ -335,7 +335,7 @@ TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateSortKeyFromWSMInIndexKeyStateW << "string1" << "" << "string2")); - auto sortKey = sortKeyGen->getSortKey(member()); + auto sortKey = sortKeyGen->computeSortKey(member()); ASSERT_OK(sortKey); ASSERT_BSONOBJ_EQ(BSON("" << "1gnirts"), @@ -349,7 +349,7 @@ DEATH_TEST_F(SortKeyGeneratorWorkingSetTest, auto sortKeyGen = makeSortKeyGen(pattern, nullptr); setRecordIdAndIdx(BSON("a" << 1 << "b" << 1), BSON("" << 2 << "" << 3)); member().metadata().setTextScore(9.9); - MONGO_COMPILER_VARIABLE_UNUSED auto ignored = sortKeyGen->getSortKey(member()); + MONGO_COMPILER_VARIABLE_UNUSED auto ignored = sortKeyGen->computeSortKey(member()); } } // namespace diff --git a/src/mongo/db/pipeline/document_metadata_fields.cpp b/src/mongo/db/pipeline/document_metadata_fields.cpp index 3116695af3a..73120cd68ed 100644 --- a/src/mongo/db/pipeline/document_metadata_fields.cpp +++ b/src/mongo/db/pipeline/document_metadata_fields.cpp @@ -31,8 +31,16 @@ #include "mongo/db/pipeline/document_metadata_fields.h" +#include "mongo/bson/bsonobjbuilder.h" + namespace mongo { +namespace { +Value missingToNull(Value maybeMissing) { + return maybeMissing.missing() ? Value(BSONNULL) : maybeMissing; +} +} // namespace + DocumentMetadataFields::DocumentMetadataFields(const DocumentMetadataFields& other) : _holder(other._holder ? std::make_unique<MetadataHolder>(*other._holder) : nullptr) {} @@ -197,4 +205,31 @@ void DocumentMetadataFields::deserializeForSorter(BufReader& buf, DocumentMetada } } +BSONObj DocumentMetadataFields::serializeSortKey(bool isSingleElementKey, const Value& value) { + // Missing values don't serialize correctly in this format, so use nulls instead, since they are + // considered equivalent with woCompare(). + if (isSingleElementKey) { + return BSON("" << missingToNull(value)); + } + invariant(value.isArray()); + BSONObjBuilder bb; + for (auto&& val : value.getArray()) { + bb << "" << missingToNull(val); + } + return bb.obj(); +} + +Value DocumentMetadataFields::deserializeSortKey(bool isSingleElementKey, + const BSONObj& bsonSortKey) { + std::vector<Value> keys; + for (auto&& elt : bsonSortKey) { + keys.push_back(Value{elt}); + } + if (isSingleElementKey) { + // As a special case for a sort on a single field, we do not put the keys into an array. + return keys[0]; + } + return Value{std::move(keys)}; +} + } // namespace mongo diff --git a/src/mongo/db/pipeline/document_metadata_fields.h b/src/mongo/db/pipeline/document_metadata_fields.h index a759d77406b..609aa8c4c5c 100644 --- a/src/mongo/db/pipeline/document_metadata_fields.h +++ b/src/mongo/db/pipeline/document_metadata_fields.h @@ -77,6 +77,20 @@ public: static void deserializeForSorter(BufReader& buf, DocumentMetadataFields* out); /** + * Converts a Value representing an in-memory sort key to a BSONObj representing a serialized + * sort key. If 'isSingleElementKey' is true, returns a BSON object with 'value' as its only + * value - and an empty field name. Otherwise returns a BSONObj with one field for each value in + * the array, each field using the empty string as the key name. + */ + static BSONObj serializeSortKey(bool isSingleElementKey, const Value& value); + + /** + * Converts a BSONObj representing a serialized sort key into a Value, which we use for + * in-memory comparisons. BSONObj {'': 1, '': [2, 3]} becomes Value [1, [2, 3]]. + */ + static Value deserializeSortKey(bool isSingleElementKey, const BSONObj& bsonSortKey); + + /** * Constructs a new DocumentMetadataFields in an uninitialized state. */ DocumentMetadataFields() = default; diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index 30d71711823..2fac75da162 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -50,54 +50,6 @@ using std::string; using std::unique_ptr; using std::vector; -namespace { - -Value missingToNull(Value maybeMissing) { - return maybeMissing.missing() ? Value(BSONNULL) : maybeMissing; -} - -/** - * Converts a Value representing an in-memory sort key to a BSONObj representing a serialized sort - * key. If 'sortPatternSize' is 1, returns a BSON object with 'value' as it's only value - and an - * empty field name. Otherwise asserts that 'value' is an array of length 'sortPatternSize', and - * returns a BSONObj with one field for each value in the array, each field using the empty field - * name. - */ -BSONObj serializeSortKey(size_t sortPatternSize, Value value) { - // Missing values don't serialize correctly in this format, so use nulls instead, since they are - // considered equivalent with woCompare(). - if (sortPatternSize == 1) { - return BSON("" << missingToNull(value)); - } - invariant(value.isArray()); - invariant(value.getArrayLength() == sortPatternSize); - BSONObjBuilder bb; - for (auto&& val : value.getArray()) { - bb << "" << missingToNull(val); - } - return bb.obj(); -} - -/** - * Converts a BSONObj representing a serialized sort key into a Value, which we use for in-memory - * comparisons. BSONObj {'': 1, '': [2, 3]} becomes Value [1, [2, 3]]. - */ -Value deserializeSortKey(size_t sortPatternSize, BSONObj bsonSortKey) { - vector<Value> keys; - keys.reserve(sortPatternSize); - for (auto&& elt : bsonSortKey) { - keys.push_back(Value{elt}); - } - invariant(keys.size() == sortPatternSize); - if (sortPatternSize == 1) { - // As a special case for a sort on a single field, we do not put the keys into an array. - return keys[0]; - } - return Value{std::move(keys)}; -} - -} // namespace - constexpr StringData DocumentSourceSort::kStageName; DocumentSourceSort::DocumentSourceSort(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, @@ -263,121 +215,22 @@ bool DocumentSourceSort::usedDisk() { return _sortExecutor->wasDiskUsed(); } -Value DocumentSourceSort::getCollationComparisonKey(const Value& val) const { - const auto collator = pExpCtx->getCollator(); - - // If the collation is the simple collation, the value itself is the comparison key. - if (!collator) { - return val; - } - - // If 'val' is not a collatable type, there's no need to do any work. - if (!CollationIndexKey::isCollatableType(val.getType())) { - return val; - } - - // If 'val' is a string, directly use the collator to obtain a comparison key. - if (val.getType() == BSONType::String) { - auto compKey = collator->getComparisonKey(val.getString()); - return Value(compKey.getKeyData()); - } - - // Otherwise, for non-string collatable types, take the slow path and round-trip the value - // through BSON. - BSONObjBuilder input; - val.addToBsonObj(&input, ""_sd); - - BSONObjBuilder output; - CollationIndexKey::collationAwareIndexKeyAppend(input.obj().firstElement(), collator, &output); - return Value(output.obj().firstElement()); -} - -StatusWith<Value> DocumentSourceSort::extractKeyPart( - const Document& doc, const SortPattern::SortPatternPart& patternPart) const { - Value plainKey; - if (patternPart.fieldPath) { - invariant(!patternPart.expression); - auto key = - document_path_support::extractElementAlongNonArrayPath(doc, *patternPart.fieldPath); - if (!key.isOK()) { - return key; - } - plainKey = key.getValue(); - } else { - invariant(patternPart.expression); - plainKey = patternPart.expression->evaluate(doc, &pExpCtx->variables); - } - - return getCollationComparisonKey(plainKey); -} - -StatusWith<Value> DocumentSourceSort::extractKeyFast(const Document& doc) const { - if (_sortExecutor->sortPattern().size() == 1u) { - return extractKeyPart(doc, _sortExecutor->sortPattern()[0]); - } - - vector<Value> keys; - keys.reserve(_sortExecutor->sortPattern().size()); - for (auto&& keyPart : _sortExecutor->sortPattern()) { - auto extractedKey = extractKeyPart(doc, keyPart); - if (!extractedKey.isOK()) { - // We can't use the fast path, so bail out. - return extractedKey; - } - - keys.push_back(std::move(extractedKey.getValue())); - } - return Value{std::move(keys)}; -} +std::pair<Value, Document> DocumentSourceSort::extractSortKey(Document&& doc) const { + Value inMemorySortKey = _sortKeyGen->computeSortKeyFromDocument(doc); -BSONObj DocumentSourceSort::extractKeyWithArray(const Document& doc) const { - SortKeyGenerator::Metadata metadata; - if (doc.metadata().hasTextScore()) { - metadata.textScore = doc.metadata().getTextScore(); - } - if (doc.metadata().hasRandVal()) { - metadata.randVal = doc.metadata().getRandVal(); - } + if (pExpCtx->needsMerge) { + // If this sort stage is part of a merged pipeline, make sure that each Document's sort key + // gets saved with its metadata. + auto serializedSortKey = DocumentMetadataFields::serializeSortKey( + _sortKeyGen->isSingleElementKey(), inMemorySortKey); - // Convert the Document to a BSONObj, but only do the conversion for the paths we actually need. - // Then run the result through the SortKeyGenerator to obtain the final sort key. - auto bsonDoc = _sortExecutor->sortPattern().documentToBsonWithSortPaths(doc); - return uassertStatusOK(_sortKeyGen->getSortKeyFromDocument(bsonDoc, &metadata)); -} + MutableDocument toBeSorted(std::move(doc)); + toBeSorted.metadata().setSortKey(serializedSortKey); -std::pair<Value, Document> DocumentSourceSort::extractSortKey(Document&& doc) const { - boost::optional<BSONObj> serializedSortKey; // Only populated if we need to merge with other - // sorted results later. Serialized in the standard - // BSON sort key format with empty field names, - // e.g. {'': 1, '': [2, 3]}. - - Value inMemorySortKey; // The Value we will use for comparisons within the sorter. - - auto fastKey = extractKeyFast(doc); - if (fastKey.isOK()) { - inMemorySortKey = std::move(fastKey.getValue()); - if (pExpCtx->needsMerge) { - serializedSortKey = - serializeSortKey(_sortExecutor->sortPattern().size(), inMemorySortKey); - } + return std::make_pair(std::move(inMemorySortKey), toBeSorted.freeze()); } else { - // We have to do it the slow way - through the sort key generator. This will generate a BSON - // sort key, which is an object with empty field names. We then need to convert this BSON - // representation into the corresponding array of keys as a Value. BSONObj {'': 1, '': [2, - // 3]} becomes Value [1, [2, 3]]. - serializedSortKey = extractKeyWithArray(doc); - inMemorySortKey = - deserializeSortKey(_sortExecutor->sortPattern().size(), *serializedSortKey); - } - - MutableDocument toBeSorted(std::move(doc)); - if (pExpCtx->needsMerge) { - // We need to be merged, so will have to be serialized. Save the sort key here to avoid - // re-computing it during the merge. - invariant(serializedSortKey); - toBeSorted.metadata().setSortKey(*serializedSortKey); + return std::make_pair(std::move(inMemorySortKey), std::move(doc)); } - return {inMemorySortKey, toBeSorted.freeze()}; } boost::optional<DocumentSource::DistributedPlanLogic> DocumentSourceSort::distributedPlanLogic() { diff --git a/src/mongo/db/pipeline/document_source_sort.h b/src/mongo/db/pipeline/document_source_sort.h index 48b7fbc1095..cac2c6b106e 100644 --- a/src/mongo/db/pipeline/document_source_sort.h +++ b/src/mongo/db/pipeline/document_source_sort.h @@ -165,38 +165,9 @@ private: * sorter to eventually be returned. If we will need to later merge the sorted results with * other results, this method adds the sort key as metadata onto 'doc' to speed up the merge * later. - * - * Attempts to generate the key using a fast path that does not handle arrays. If an array is - * encountered, falls back on extractKeyWithArray(). */ std::pair<Value, Document> extractSortKey(Document&& doc) const; - /** - * Returns the sort key for 'doc' based on the SortPattern, or ErrorCodes::InternalError if an - * array is encountered during sort key generation. - */ - StatusWith<Value> extractKeyFast(const Document& doc) const; - - /** - * Extracts the sort key component described by 'keyPart' from 'doc' and returns it. Returns - * ErrorCodes::Internal error if the path for 'keyPart' contains an array in 'doc'. - */ - StatusWith<Value> extractKeyPart(const Document& doc, - const SortPattern::SortPatternPart& keyPart) const; - - /** - * Returns the sort key for 'doc' based on the SortPattern. Note this is in the BSONObj format - - * with empty field names. - */ - BSONObj extractKeyWithArray(const Document& doc) const; - - /** - * Returns the comparison key used to sort 'val' with the collation of the ExpressionContext. - * Note that these comparison keys should always be sorted with the simple (i.e. binary) - * collation. - */ - Value getCollationComparisonKey(const Value& val) const; - bool _populated = false; boost::optional<SortExecutor> _sortExecutor; diff --git a/src/mongo/db/query/sort_pattern.h b/src/mongo/db/query/sort_pattern.h index d818bd82f38..aa5c36d166d 100644 --- a/src/mongo/db/query/sort_pattern.h +++ b/src/mongo/db/query/sort_pattern.h @@ -74,6 +74,16 @@ public: return _sortPattern.empty(); } + /** + * Singleton sort patterns are a special case. In memory, sort keys for singleton patterns get + * stored as a single Value, corresponding to the single component of the sort pattern. By + * contrast, sort patterns for "compound" sort keys get stored as a Value that is an a array, + * with one element for each component of the sort. + */ + bool isSingleElementKey() const { + return _sortPattern.size() == 1; + } + const SortPatternPart& operator[](int idx) const { return _sortPattern[idx]; } |