From 72d8bff5f619ebdcb9904f5978ac14b2f53ea686 Mon Sep 17 00:00:00 2001 From: Justin Seyster Date: Tue, 8 Oct 2019 21:05:28 +0000 Subject: Revert "SERVER-42836 Fast path for sort key generation of WorkingSetMembers" Reverting due to failure in external_sort_find.js. This reverts commit d22968677a25ccd587fdadc7106b181780393736. --- .../group_conversion_to_distinct_scan.js | 6 +- jstests/sharding/fts_score_sort_sharded.js | 42 ++------- src/mongo/db/exec/sort_key_generator.cpp | 14 +-- src/mongo/db/index/sort_key_generator.cpp | 99 +++++++++++--------- src/mongo/db/index/sort_key_generator.h | 102 ++++++++++----------- src/mongo/db/index/sort_key_generator_test.cpp | 15 ++- 6 files changed, 134 insertions(+), 144 deletions(-) diff --git a/jstests/aggregation/group_conversion_to_distinct_scan.js b/jstests/aggregation/group_conversion_to_distinct_scan.js index 226dea6f584..90b25268a7c 100644 --- a/jstests/aggregation/group_conversion_to_distinct_scan.js +++ b/jstests/aggregation/group_conversion_to_distinct_scan.js @@ -44,7 +44,7 @@ assert.commandWorked(coll.insert([ {_id: 3, a: 1, b: 3, c: 2}, {_id: 4, a: 2, b: 2, c: 2}, {_id: 5, b: 1, c: 1}, - {_id: 6, a: null, b: 1, c: 1.5}, + {_id: 6, a: null, b: 1, c: 1}, {_id: 7, aa: 1, mkB: 2, bb: 2}, {_id: 8, aa: 1, mkB: [1, 3], bb: 1}, @@ -224,9 +224,9 @@ assert.eq(null, getAggPlanStage(explain, "SORT"), explain); // Verify that a $sort-$group pipeline can use DISTINCT_SCAN when a $first accumulator needs the // entire document. // -pipeline = [{$sort: {a: -1, b: -1, c: -1}}, {$group: {_id: "$a", accum: {$first: "$$ROOT"}}}]; +pipeline = [{$sort: {a: -1, b: -1}}, {$group: {_id: "$a", accum: {$first: "$$ROOT"}}}]; assertResultsMatchWithAndWithoutHintandIndexes(pipeline, [ - {_id: null, accum: {_id: 6, a: null, b: 1, c: 1.5}}, + {_id: null, accum: {_id: 6, a: null, b: 1, c: 1}}, {_id: 1, accum: {_id: 3, a: 1, b: 3, c: 2}}, {_id: 2, accum: {_id: 4, a: 2, b: 2, c: 2}} ]); diff --git a/jstests/sharding/fts_score_sort_sharded.js b/jstests/sharding/fts_score_sort_sharded.js index a6447cb4b38..e49703821f3 100644 --- a/jstests/sharding/fts_score_sort_sharded.js +++ b/jstests/sharding/fts_score_sort_sharded.js @@ -34,11 +34,11 @@ assert.commandWorked(coll.ensureIndex({a: "text"})); var results = coll.find({$text: {$search: "pizza"}}, {s: {$meta: "textScore"}}) .sort({s: {$meta: "textScore"}}) .toArray(); -assert.eq(results.length, 4, results); -assert.eq(results[0]._id, -2, results); -assert.eq(results[1]._id, 2, results); -assert.eq(results[2]._id, -1, results); -assert.eq(results[3]._id, 1, results); +assert.eq(results.length, 4); +assert.eq(results[0]._id, -2); +assert.eq(results[1]._id, 2); +assert.eq(results[2]._id, -1); +assert.eq(results[3]._id, 1); // // Verify that mongos requires the text metadata sort to be specified in the projection. @@ -68,36 +68,6 @@ assert.throws(function() { cursor.next(); }); -// -// Execute query with a compound sort that includes the text score along with a multikey field. -// - -coll.drop(); -assert.commandWorked(coll.insert({_id: 0, a: "pizza", b: [1, 4]})); -assert.commandWorked(coll.insert({_id: 1, a: "pizza pizza", b: [6, 7]})); -assert.commandWorked(coll.insert({_id: 2, a: "pizza", b: [2, 3]})); -assert.commandWorked(coll.insert({_id: 3, a: "pizza pizza", b: [5, 8]})); -assert.commandWorked(coll.ensureIndex({a: "text"})); - -results = coll.find({$text: {$search: "pizza"}}, {s: {$meta: "textScore"}}) - .sort({s: {$meta: "textScore"}, b: 1}) - .toArray(); -assert.eq(results.length, 4, results); -assert.eq(results[0]._id, 3, results); -assert.eq(results[1]._id, 1, results); -assert.eq(results[2]._id, 0, results); -assert.eq(results[3]._id, 2, results); - -// -// Repeat the query with an aggregation pipeline and verify that the result is the same. -// - -var aggResults = coll.aggregate([ - {$match: {$text: {$search: "pizza"}}}, - {$addFields: {s: {$meta: "textScore"}}}, - {$sort: {s: {$meta: "textScore"}, b: 1}} - ]) - .toArray(); -assert.eq(results, aggResults); +// TODO Test sort on compound key. st.stop(); diff --git a/src/mongo/db/exec/sort_key_generator.cpp b/src/mongo/db/exec/sort_key_generator.cpp index 0d18dc37d7b..f8c81395b8a 100644 --- a/src/mongo/db/exec/sort_key_generator.cpp +++ b/src/mongo/db/exec/sort_key_generator.cpp @@ -68,16 +68,16 @@ PlanStage::StageState SortKeyGeneratorStage::doWork(WorkingSetID* out) { if (stageState == PlanStage::ADVANCED) { WorkingSetMember* member = _ws->get(*out); - try { - auto sortKey = _sortKeyGen.computeSortKey(*member); - - // Add the sort key to the WSM as metadata. - member->metadata().setSortKey(std::move(sortKey), _sortKeyGen.isSingleElementKey()); - } catch (const DBException& computeSortKeyException) { - *out = WorkingSetCommon::allocateStatusMember(_ws, computeSortKeyException.toStatus()); + auto sortKey = _sortKeyGen.computeSortKey(*member); + if (!sortKey.isOK()) { + *out = WorkingSetCommon::allocateStatusMember(_ws, sortKey.getStatus()); return PlanStage::FAILURE; } + // Add the sort key to the WSM as metadata. + member->metadata().setSortKey(std::move(sortKey.getValue()), + _sortKeyGen.isSingleElementKey()); + return PlanStage::ADVANCED; } diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp index a09622bb316..97abfe097a7 100644 --- a/src/mongo/db/index/sort_key_generator.cpp +++ b/src/mongo/db/index/sort_key_generator.cpp @@ -76,15 +76,30 @@ SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterf Ordering::make(_sortSpecWithoutMeta)); } -Value SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const { +// TODO (SERVER-42836): Once WorkingSetMember objects store a Document (SERVER-42181), this function +// will be able to use the Document overload of computeSortKeyFromDocument, and it will be able to +// store the text score with the Document instead of in a separate SortKeyGenerator::Metadata +// object. +StatusWith SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const { if (wsm.hasObj()) { - return computeSortKeyFromDocument(wsm.doc.value(), wsm.metadata()); + SortKeyGenerator::Metadata metadata; + if (_sortHasMeta && wsm.metadata().hasTextScore()) { + metadata.textScore = wsm.metadata().getTextScore(); + } + auto statusWithSortKeyObj = computeSortKeyFromDocument(wsm.doc.value().toBson(), &metadata); + if (!statusWithSortKeyObj.isOK()) { + return statusWithSortKeyObj.getStatus(); + } + + return DocumentMetadataFields::deserializeSortKey(isSingleElementKey(), + statusWithSortKeyObj.getValue()); } return computeSortKeyFromIndexKey(wsm); } -Value SortKeyGenerator::computeSortKeyFromIndexKey(const WorkingSetMember& member) const { +StatusWith SortKeyGenerator::computeSortKeyFromIndexKey( + const WorkingSetMember& member) const { invariant(member.getState() == WorkingSetMember::RID_AND_IDX); invariant(!_sortHasMeta); @@ -103,9 +118,16 @@ Value SortKeyGenerator::computeSortKeyFromIndexKey(const WorkingSetMember& membe return DocumentMetadataFields::deserializeSortKey(isSingleElementKey(), objBuilder.obj()); } -BSONObj SortKeyGenerator::computeSortKeyFromDocument(const BSONObj& obj, - const DocumentMetadataFields& metadata) const { - auto sortKeyNoMetadata = uassertStatusOK(computeSortKeyFromDocumentWithoutMetadata(obj)); +StatusWith SortKeyGenerator::computeSortKeyFromDocument(const BSONObj& obj, + const Metadata* metadata) const { + if (_sortHasMeta) { + invariant(metadata); + } + + auto sortKeyNoMetadata = computeSortKeyFromDocumentWithoutMetadata(obj); + if (!sortKeyNoMetadata.isOK()) { + return sortKeyNoMetadata; + } if (!_sortHasMeta) { // We don't have to worry about $meta sort, so the index key becomes the sort key. @@ -115,27 +137,24 @@ BSONObj SortKeyGenerator::computeSortKeyFromDocument(const BSONObj& obj, BSONObjBuilder mergedKeyBob; // Merge metadata into the key. - BSONObjIterator sortKeyIt(sortKeyNoMetadata); + BSONObjIterator sortKeyIt(sortKeyNoMetadata.getValue()); for (auto& part : _sortPattern) { if (part.fieldPath) { invariant(sortKeyIt.more()); mergedKeyBob.append(sortKeyIt.next()); continue; } - - // Create a Document that represents the input object and its metadata together, so we can - // use it to evaluate the ExpressionMeta for this part of the sort pattern. This operation - // copies the data in 'metadata' but not any of the data in the 'obj' BSON. - MutableDocument documentWithMetdata(Document{obj}); - documentWithMetdata.setMetadata(DocumentMetadataFields(metadata)); - invariant(part.expression); - auto value = - part.expression->evaluate(documentWithMetdata.freeze(), nullptr /* variables */); - if (!value.missing()) { - value.addToBsonObj(&mergedKeyBob, ""_sd); - } else { - mergedKeyBob.appendNull(""); + switch (part.expression->getMetaType()) { + case DocumentMetadataFields::MetaType::kTextScore: { + mergedKeyBob.append("", metadata->textScore); + continue; + } + case DocumentMetadataFields::MetaType::kRandVal: { + mergedKeyBob.append("", metadata->randVal); + continue; + } + default: { MONGO_UNREACHABLE; } } } @@ -212,9 +231,7 @@ Value SortKeyGenerator::getCollationComparisonKey(const Value& val) const { } StatusWith SortKeyGenerator::extractKeyPart( - const Document& doc, - const DocumentMetadataFields& metadata, - const SortPattern::SortPatternPart& patternPart) const { + const Document& doc, const SortPattern::SortPatternPart& patternPart) const { Value plainKey; if (patternPart.fieldPath) { invariant(!patternPart.expression); @@ -226,28 +243,22 @@ StatusWith SortKeyGenerator::extractKeyPart( plainKey = key.getValue(); } else { invariant(patternPart.expression); - // ExpressionMeta expects metadata to be attached to the document. - MutableDocument documentWithMetadata(doc); - documentWithMetadata.setMetadata(DocumentMetadataFields(metadata)); - // ExpressionMeta does not use Variables. - plainKey = patternPart.expression->evaluate(documentWithMetadata.freeze(), - nullptr /* variables */); + plainKey = patternPart.expression->evaluate(doc, nullptr /* variables */); } - return plainKey.missing() ? Value{BSONNULL} : getCollationComparisonKey(plainKey); + return getCollationComparisonKey(plainKey); } -StatusWith SortKeyGenerator::extractKeyFast(const Document& doc, - const DocumentMetadataFields& metadata) const { +StatusWith SortKeyGenerator::extractKeyFast(const Document& doc) const { if (_sortPattern.isSingleElementKey()) { - return extractKeyPart(doc, metadata, _sortPattern[0]); + return extractKeyPart(doc, _sortPattern[0]); } std::vector keys; keys.reserve(_sortPattern.size()); for (auto&& keyPart : _sortPattern) { - auto extractedKey = extractKeyPart(doc, metadata, keyPart); + auto extractedKey = extractKeyPart(doc, keyPart); if (!extractedKey.isOK()) { // We can't use the fast path, so bail out. return extractedKey; @@ -258,18 +269,24 @@ StatusWith SortKeyGenerator::extractKeyFast(const Document& doc, return Value{std::move(keys)}; } -BSONObj SortKeyGenerator::extractKeyWithArray(const Document& doc, - const DocumentMetadataFields& metadata) const { +BSONObj SortKeyGenerator::extractKeyWithArray(const Document& doc) const { + SortKeyGenerator::Metadata metadata; + if (doc.metadata().hasTextScore()) { + metadata.textScore = doc.metadata().getTextScore(); + } + if (doc.metadata().hasRandVal()) { + metadata.randVal = doc.metadata().getRandVal(); + } + // Convert the Document to a BSONObj, but only do the conversion for the paths we actually need. // Then run the result through the SortKeyGenerator to obtain the final sort key. auto bsonDoc = _sortPattern.documentToBsonWithSortPaths(doc); - return computeSortKeyFromDocument(bsonDoc, metadata); + return uassertStatusOK(computeSortKeyFromDocument(bsonDoc, &metadata)); } -Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc, - const DocumentMetadataFields& metadata) const { +Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc) const { // This fast pass directly generates a Value. - auto fastKey = extractKeyFast(doc, metadata); + auto fastKey = extractKeyFast(doc); if (fastKey.isOK()) { return std::move(fastKey.getValue()); } @@ -278,7 +295,7 @@ Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc, // form like BSONObj {'': 1, '': [2, 3]}) and converts it to a Value (Value [1, [2, 3]] in the // earlier example). return DocumentMetadataFields::deserializeSortKey(_sortPattern.isSingleElementKey(), - extractKeyWithArray(doc, metadata)); + extractKeyWithArray(doc)); } } // namespace mongo diff --git a/src/mongo/db/index/sort_key_generator.h b/src/mongo/db/index/sort_key_generator.h index dd4b5031d05..b6ea016139a 100644 --- a/src/mongo/db/index/sort_key_generator.h +++ b/src/mongo/db/index/sort_key_generator.h @@ -40,6 +40,16 @@ namespace mongo { class SortKeyGenerator { public: + /** + * Metadata about a document which is needed to produce keys for $meta sort. The client of the + * SortKeyGenerator must provide this metadata in order to correctly obtain sort keys for sort + * patterns with $meta. + */ + struct Metadata { + double textScore = 0.0; + double randVal = 0.0; + }; + /** * Constructs a sort key generator which will generate keys for sort pattern 'sortPattern'. The * keys will incorporate the collation given by 'collator', and thus when actually compared to @@ -48,87 +58,75 @@ public: SortKeyGenerator(SortPattern sortPattern, const CollatorInterface* collator); /** - * Returns the key which should be used to sort the WorkingSetMember or throws if no key could - * be generated. The WorkingSetMember may represent either an index key or a document (owned or - * unowned) that has been fetched from the collection. + * Returns the key which should be used to sort the WorkingSetMember, or a non-OK status if no + * key could be generated. The WorkingSetMember may represent either an index key, or a document + * (owned or unowned) that has been fetched from the collection. * * If the sort pattern contains a $meta sort (e.g. sort by "textScore" or "randVal"), then the * necessary metadata is obtained from the WorkingSetMember. */ - Value computeSortKey(const WorkingSetMember&) const; + StatusWith computeSortKey(const WorkingSetMember&) const; /** - * Returns the sort key for the input 'doc' as a Value or throws if no key could be generated. - * When the sort pattern has multiple components, the resulting sort key is an Array-typed Value - * with one element for each component. For sort patterns with just one component, the sort key - * is a Value that represents the single element to sort on (which may or may not itself be an - * array). + * Returns the sort key for the input 'doc' as a Value. When the sort pattern has multiple + * components, the resulting sort key is a an Array-typed Value with one element for each + * component. For sort pattern with just one component, the sort key is a Value that represents + * the single element to sort on (which may or may not itself be an array). * * The sort key is computed based on the sort pattern, the contents of the document, and if - * required by $meta sort specifiers, metadata in the Document. + * required by $meta sort specifiers, metadata in the Document. This function throws if it + * cannot compute the sort pattern. */ - Value computeSortKeyFromDocument(const Document& doc) const { - return computeSortKeyFromDocument(doc, doc.metadata()); - } + Value computeSortKeyFromDocument(const Document& doc) const; bool isSingleElementKey() const { return _sortPattern.isSingleElementKey(); } private: - // Returns the sort key for the input 'doc' as a Value. - // - // Note that this function will ignore any metadata (e.g., textScore, randVal), in 'doc' but - // will instead read from the 'metadata' variable. When the metadata is contained in the 'doc' - // input, callers can use the public overload of this function. - Value computeSortKeyFromDocument(const Document& doc, - const DocumentMetadataFields& metadata) const; - - // Returns the key which should be used to sort 'obj' or throws an exception if no key could be - // generated. - // - // The caller must supply the appropriate 'metadata' in the case that the sort pattern includes - // a $meta sort (i.e. if sortHasMeta() is true). These values are filled in at the corresponding - // positions in the sort key. - BSONObj computeSortKeyFromDocument(const BSONObj& obj, - const DocumentMetadataFields& metadata) const; + /** + * Returns the key which should be used to sort 'obj', or a non-OK status if no key could be + * generated. + * + * The caller must supply the appropriate 'metadata' in the case that the sort pattern includes + * a $meta sort (i.e. if sortHasMeta() is true). These values are filled in at the corresponding + * positions in the sort key. + */ + StatusWith computeSortKeyFromDocument(const BSONObj& obj, const Metadata*) const; // Extracts the sort key from a WorkingSetMember which represents an index key. It is illegal to // call this if the working set member is not in RID_AND_IDX state. It is also illegal to call // this if the sort pattern has any $meta components. - Value computeSortKeyFromIndexKey(const WorkingSetMember& member) const; + StatusWith computeSortKeyFromIndexKey(const WorkingSetMember& member) const; // Extracts the sort key from 'obj', using '_sortSpecWithoutMeta' and thus ignoring any $meta // sort components of the sort pattern. The caller is responsible for augmenting this key with // the appropriate metadata if '_sortHasMeta' is true. StatusWith computeSortKeyFromDocumentWithoutMetadata(const BSONObj& obj) const; - // Returns the sort key for 'doc' based on the SortPattern, or ErrorCodes::InternalError if an - // array is encountered during sort key generation. - // - // Note that this function will ignore any metadata (e.g., textScore, randVal), in 'doc' but - // will instead read from the 'metadata' variable. - StatusWith extractKeyFast(const Document& doc, - const DocumentMetadataFields& metadata) const; - - // Extracts the sort key component described by 'keyPart' from 'doc' and returns it. Returns - // ErrorCodes::InternalError if the path for 'keyPart' contains an array in 'doc'. - // - // Note that this function will ignore any metadata (e.g., textScore, randVal), in 'doc' but - // will instead read from the 'metadata' variable. + /** + * Returns the sort key for 'doc' based on the SortPattern, or ErrorCodes::InternalError if an + * array is encountered during sort key generation. + */ + StatusWith extractKeyFast(const Document& doc) const; + + /** + * Extracts the sort key component described by 'keyPart' from 'doc' and returns it. Returns + * ErrorCodes::InternalError if the path for 'keyPart' contains an array in 'doc'. + */ StatusWith extractKeyPart(const Document& doc, - const DocumentMetadataFields& metadata, const SortPattern::SortPatternPart& keyPart) const; - // Returns the sort key for 'doc' based on the SortPattern. Note this is in the BSONObj format - - // with empty field names. - // - // Note that this function will ignore any metadata (e.g., textScore, randVal), in 'doc' but - // will instead read from the 'metadata' variable. - BSONObj extractKeyWithArray(const Document& doc, const DocumentMetadataFields& metadata) const; + /** + * Returns the sort key for 'doc' based on the SortPattern. Note this is in the BSONObj format - + * with empty field names. + */ + BSONObj extractKeyWithArray(const Document& doc) const; - // Returns the comparison key used to sort 'val' with collation. Note that these comparison keys - // should always be sorted with the simple (i.e. binary) collation. + /** + * Returns the comparison key used to sort 'val' with collation. Note that these comparison keys + * should always be sorted with the simple (i.e. binary) collation. + */ Value getCollationComparisonKey(const Value& val) const; const CollatorInterface* _collator = nullptr; diff --git a/src/mongo/db/index/sort_key_generator_test.cpp b/src/mongo/db/index/sort_key_generator_test.cpp index 29ca2039153..0e6681926dc 100644 --- a/src/mongo/db/index/sort_key_generator_test.cpp +++ b/src/mongo/db/index/sort_key_generator_test.cpp @@ -266,14 +266,16 @@ TEST_F(SortKeyGeneratorWorkingSetTest, CanGetSortKeyFromWorkingSetMemberWithObj) auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); setRecordIdAndObj(BSON("x" << 1 << "a" << 2 << "y" << 3)); auto sortKey = sortKeyGen->computeSortKey(member()); - ASSERT_VALUE_EQ(Value(2), sortKey); + ASSERT_OK(sortKey); + ASSERT_VALUE_EQ(Value(2), sortKey.getValue()); } TEST_F(SortKeyGeneratorWorkingSetTest, CanGetSortKeyFromWorkingSetMemberWithOwnedObj) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); setOwnedObj(BSON("x" << 1 << "a" << 2 << "y" << 3)); auto sortKey = sortKeyGen->computeSortKey(member()); - ASSERT_VALUE_EQ(Value(2), sortKey); + ASSERT_OK(sortKey); + ASSERT_VALUE_EQ(Value(2), sortKey.getValue()); } TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateKeyFromWSMForTextScoreMetaSort) { @@ -282,14 +284,16 @@ TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateKeyFromWSMForTextScoreMetaSort setOwnedObj(BSON("x" << 1 << "a" << 2 << "y" << 3 << "c" << BSON_ARRAY(4 << 5 << 6))); member().metadata().setTextScore(9.9); auto sortKey = sortKeyGen->computeSortKey(member()); - ASSERT_VALUE_EQ(Value({Value(2), Value(9.9), Value(6)}), sortKey); + ASSERT_OK(sortKey); + ASSERT_VALUE_EQ(Value({Value(2), Value(9.9), Value(6)}), sortKey.getValue()); } TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateSortKeyFromWSMInIndexKeyState) { auto sortKeyGen = makeSortKeyGen(BSON("a" << 1), nullptr); setRecordIdAndIdx(BSON("a" << 1 << "b" << 1), BSON("" << 2 << "" << 3)); auto sortKey = sortKeyGen->computeSortKey(member()); - ASSERT_VALUE_EQ(Value(2), sortKey); + ASSERT_OK(sortKey); + ASSERT_VALUE_EQ(Value(2), sortKey.getValue()); } TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateSortKeyFromWSMInIndexKeyStateWithCollator) { @@ -301,7 +305,8 @@ TEST_F(SortKeyGeneratorWorkingSetTest, CanGenerateSortKeyFromWSMInIndexKeyStateW << "" << "string2")); auto sortKey = sortKeyGen->computeSortKey(member()); - ASSERT_VALUE_EQ(Value("1gnirts"_sd), sortKey); + ASSERT_OK(sortKey); + ASSERT_VALUE_EQ(Value("1gnirts"_sd), sortKey.getValue()); } DEATH_TEST_F(SortKeyGeneratorWorkingSetTest, -- cgit v1.2.1