diff options
Diffstat (limited to 'src/mongo')
21 files changed, 337 insertions, 49 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index 62da7a2a7ea..06fdd5c3238 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -41,6 +41,7 @@ sortExecutorEnv.Library( '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/s/is_mongos', '$BUILD_DIR/third_party/shim_snappy', + 'working_set', ], ) diff --git a/src/mongo/db/exec/count_scan.cpp b/src/mongo/db/exec/count_scan.cpp index c24f3ef1343..be7914299d9 100644 --- a/src/mongo/db/exec/count_scan.cpp +++ b/src/mongo/db/exec/count_scan.cpp @@ -74,7 +74,7 @@ const char* CountScan::kStageType = "COUNT_SCAN"; // the CountScanParams rather than resolving them via the IndexDescriptor, since these may differ // from the descriptor's contents. CountScan::CountScan(OperationContext* opCtx, CountScanParams params, WorkingSet* workingSet) - : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor), + : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet), _workingSet(workingSet), _keyPattern(std::move(params.keyPattern)), _shouldDedup(params.isMultiKey), diff --git a/src/mongo/db/exec/distinct_scan.cpp b/src/mongo/db/exec/distinct_scan.cpp index 3f8c3342e80..24e45ee62d2 100644 --- a/src/mongo/db/exec/distinct_scan.cpp +++ b/src/mongo/db/exec/distinct_scan.cpp @@ -47,7 +47,7 @@ using std::vector; const char* DistinctScan::kStageType = "DISTINCT_SCAN"; DistinctScan::DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet) - : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor), + : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet), _workingSet(workingSet), _keyPattern(std::move(params.keyPattern)), _scanDirection(params.scanDirection), @@ -122,7 +122,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) { WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->recordId = kv->loc; - member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, indexAccessMethod())); + member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, workingSetIndexId())); _workingSet->transitionToRecordIdAndIdx(id); *out = id; diff --git a/src/mongo/db/exec/idhack.cpp b/src/mongo/db/exec/idhack.cpp index 02e347dda4d..64848cb50e6 100644 --- a/src/mongo/db/exec/idhack.cpp +++ b/src/mongo/db/exec/idhack.cpp @@ -53,7 +53,7 @@ IDHackStage::IDHackStage(OperationContext* opCtx, CanonicalQuery* query, WorkingSet* ws, const IndexDescriptor* descriptor) - : RequiresIndexStage(kStageType, opCtx, descriptor), + : RequiresIndexStage(kStageType, opCtx, descriptor, ws), _workingSet(ws), _key(query->getQueryObj()["_id"].wrap()) { _specificStats.indexName = descriptor->indexName(); @@ -64,7 +64,7 @@ IDHackStage::IDHackStage(OperationContext* opCtx, const BSONObj& key, WorkingSet* ws, const IndexDescriptor* descriptor) - : RequiresIndexStage(kStageType, opCtx, descriptor), _workingSet(ws), _key(key) { + : RequiresIndexStage(kStageType, opCtx, descriptor, ws), _workingSet(ws), _key(key) { _specificStats.indexName = descriptor->indexName(); } diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp index cd32deb7c0d..0e5d940e2f0 100644 --- a/src/mongo/db/exec/index_scan.cpp +++ b/src/mongo/db/exec/index_scan.cpp @@ -64,7 +64,7 @@ IndexScan::IndexScan(OperationContext* opCtx, IndexScanParams params, WorkingSet* workingSet, const MatchExpression* filter) - : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor), + : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet), _workingSet(workingSet), _keyPattern(params.keyPattern.getOwned()), _bounds(std::move(params.bounds)), @@ -233,7 +233,7 @@ PlanStage::StageState IndexScan::doWork(WorkingSetID* out) { WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->recordId = kv->loc; - member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, indexAccessMethod())); + member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, workingSetIndexId())); _workingSet->transitionToRecordIdAndIdx(id); if (_addKeyMetadata) { diff --git a/src/mongo/db/exec/near.cpp b/src/mongo/db/exec/near.cpp index 91a3a60317a..ac16083b0a5 100644 --- a/src/mongo/db/exec/near.cpp +++ b/src/mongo/db/exec/near.cpp @@ -47,7 +47,7 @@ NearStage::NearStage(OperationContext* opCtx, StageType type, WorkingSet* workingSet, const IndexDescriptor* indexDescriptor) - : RequiresIndexStage(typeName, opCtx, indexDescriptor), + : RequiresIndexStage(typeName, opCtx, indexDescriptor, workingSet), _workingSet(workingSet), _searchState(SearchState_Initializing), _nextIntervalStats(nullptr), diff --git a/src/mongo/db/exec/projection_exec_test.cpp b/src/mongo/db/exec/projection_exec_test.cpp index 33e14d01406..684d3a07f35 100644 --- a/src/mongo/db/exec/projection_exec_test.cpp +++ b/src/mongo/db/exec/projection_exec_test.cpp @@ -193,7 +193,7 @@ TEST(ProjectionExecTest, TransformCoveredDottedProjection) { ASSERT_EQ(boost::make_optional("{ b: { c: 2, d: 3, f: { g: 4, h: 5 } } }"s), project("{'b.c': 1, 'b.d': 1, 'b.f.g': 1, 'b.f.h': 1}", "{}", - IndexKeyDatum(keyPattern, keyData, nullptr))); + IndexKeyDatum(keyPattern, keyData, 0))); } TEST(ProjectionExecTest, TransformNonCoveredDottedProjection) { @@ -249,7 +249,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredNormal) { ASSERT_EQ(boost::make_optional("{ a: 5, b: { : 5 } }"s), project("{_id: 0, a: 1, b: {$meta: 'sortKey'}}", "{}", - IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr), + IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0), boost::none, // collator BSON("" << 5))); // sortKey } @@ -258,7 +258,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredOverwrite) { ASSERT_EQ(boost::make_optional("{ a: { : 5 } }"s), project("{_id: 0, a: 1, a: {$meta: 'sortKey'}}", "{}", - IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr), + IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0), boost::none, // collator BSON("" << 5))); // sortKey } @@ -267,7 +267,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredAdditionalData) { ASSERT_EQ(boost::make_optional("{ a: 5, c: 6, b: { : 5 } }"s), project("{_id: 0, a: 1, b: {$meta: 'sortKey'}, c: 1}", "{}", - IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr), + IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0), boost::none, // collator BSON("" << 5))); // sortKey } @@ -276,7 +276,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound) { ASSERT_EQ(boost::make_optional("{ a: 5, b: { : 5, : 6 } }"s), project("{_id: 0, a: 1, b: {$meta: 'sortKey'}}", "{}", - IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr), + IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0), boost::none, // collator BSON("" << 5 << "" << 6))); // sortKey } @@ -287,7 +287,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound2) { "{}", IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1), BSON("" << 5 << "" << 6 << "" << 4), - nullptr), + 0), boost::none, // collator BSON("" << 5 << "" << 6))); // sortKey } @@ -298,7 +298,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound3) { "{}", IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1), BSON("" << 5 << "" << 6 << "" << 4 << "" << 9000), - nullptr), + 0), boost::none, // collator BSON("" << 6 << "" << 4))); // sortKey } diff --git a/src/mongo/db/exec/requires_index_stage.cpp b/src/mongo/db/exec/requires_index_stage.cpp index 307de4b7409..2ff0f373480 100644 --- a/src/mongo/db/exec/requires_index_stage.cpp +++ b/src/mongo/db/exec/requires_index_stage.cpp @@ -35,7 +35,8 @@ namespace mongo { RequiresIndexStage::RequiresIndexStage(const char* stageType, OperationContext* opCtx, - const IndexDescriptor* indexDescriptor) + const IndexDescriptor* indexDescriptor, + WorkingSet* workingSet) : RequiresCollectionStage(stageType, opCtx, indexDescriptor->getCollection()), _weakIndexCatalogEntry(collection()->getIndexCatalog()->getEntryShared(indexDescriptor)) { auto indexCatalogEntry = _weakIndexCatalogEntry.lock(); @@ -44,6 +45,7 @@ RequiresIndexStage::RequiresIndexStage(const char* stageType, invariant(_indexDescriptor); invariant(_indexAccessMethod); _indexName = _indexDescriptor->indexName(); + _workingSetIndexId = workingSet->registerIndexAccessMethod(_indexAccessMethod); } void RequiresIndexStage::doSaveStateRequiresCollection() { diff --git a/src/mongo/db/exec/requires_index_stage.h b/src/mongo/db/exec/requires_index_stage.h index 030aa369306..fca94bf0de0 100644 --- a/src/mongo/db/exec/requires_index_stage.h +++ b/src/mongo/db/exec/requires_index_stage.h @@ -30,6 +30,7 @@ #pragma once #include "mongo/db/exec/requires_collection_stage.h" +#include "mongo/db/exec/working_set.h" #include "mongo/db/index/index_descriptor.h" namespace mongo { @@ -48,7 +49,8 @@ class RequiresIndexStage : public RequiresCollectionStage { public: RequiresIndexStage(const char* stageType, OperationContext* opCtx, - const IndexDescriptor* indexDescriptor); + const IndexDescriptor* indexDescriptor, + WorkingSet* workingSet); virtual ~RequiresIndexStage() = default; @@ -75,6 +77,10 @@ protected: return _indexAccessMethod; } + WorkingSetRegisteredIndexId workingSetIndexId() const { + return _workingSetIndexId; + } + private: // We keep a weak_ptr to the index catalog entry in order to detect when the underlying catalog // object has been destroyed, e.g. due to an index drop. In this scenario, the @@ -93,6 +99,10 @@ private: const IndexAccessMethod* _indexAccessMethod; std::string _indexName; + + // An indentifier for the index required by this stage. Any working set member allocated to + // represent an index key from this index must include this id. + WorkingSetRegisteredIndexId _workingSetIndexId; }; } // namespace mongo diff --git a/src/mongo/db/exec/sort_executor.cpp b/src/mongo/db/exec/sort_executor.cpp index 8fd20cfda46..cbcfaa78b1c 100644 --- a/src/mongo/db/exec/sort_executor.cpp +++ b/src/mongo/db/exec/sort_executor.cpp @@ -98,7 +98,25 @@ int SortExecutor::Comparator::operator()(const DocumentSorter::Data& lhs, return 0; } -boost::optional<Document> SortExecutor::getNext() { +boost::optional<Document> SortExecutor::getNextDoc() { + auto wsm = getNextWsm(); + if (!wsm) { + return boost::none; + } + + // Ensure that this WorkingSetMember only houses a Document. This guarantees that we are not + // discarding data inside the working set member (e.g. the RecordId) when returning the Document + // to the caller. + invariant(wsm->hasOwnedObj()); + + // Transfer metadata from the WorkingSetMember to the Document. + MutableDocument mutableDoc{std::move(wsm->doc.value())}; + mutableDoc.setMetadata(wsm->releaseMetadata()); + + return mutableDoc.freeze(); +} + +boost::optional<WorkingSetMember> SortExecutor::getNextWsm() { if (_isEOF) { return boost::none; } @@ -113,6 +131,22 @@ boost::optional<Document> SortExecutor::getNext() { } void SortExecutor::add(Value sortKey, Document data) { + invariant(data.isOwned()); + WorkingSetMember wsm; + + // Transfer metadata from the Document to the WorkingSetMember. + MutableDocument mutableDoc{std::move(data)}; + wsm.setMetadata(mutableDoc.releaseMetadata()); + + wsm.doc.setValue(mutableDoc.freeze()); + wsm.transitionToOwnedObj(); + this->add(std::move(sortKey), std::move(wsm)); +} + +void SortExecutor::add(Value sortKey, WorkingSetMember data) { + // Metadata should be attached directly to the WSM rather than inside the Document. + invariant(!data.doc.value().metadata()); + if (!_sorter) { _sorter.reset(DocumentSorter::make(makeSortOptions(), Comparator(_sortPattern))); } diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h index 95741f41256..86ce3fc2687 100644 --- a/src/mongo/db/exec/sort_executor.h +++ b/src/mongo/db/exec/sort_executor.h @@ -29,6 +29,7 @@ #pragma once +#include "mongo/db/exec/working_set.h" #include "mongo/db/pipeline/document.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/query/sort_pattern.h" @@ -52,7 +53,9 @@ public: std::string tempDir, bool allowDiskUse); - boost::optional<Document> getNext(); + boost::optional<Document> getNextDoc(); + + boost::optional<WorkingSetMember> getNextWsm(); const SortPattern& sortPattern() const { return _sortPattern; @@ -89,8 +92,13 @@ public: */ void add(Value, Document); + /** + * Add a WorkingSetMember with sort key specified by Value to the DocumentSorter. + */ + void add(Value, WorkingSetMember); + private: - using DocumentSorter = Sorter<Value, Document>; + using DocumentSorter = Sorter<Value, WorkingSetMember>; class Comparator { public: Comparator(const SortPattern& sortPattern) : _sort(sortPattern) {} diff --git a/src/mongo/db/exec/working_set.cpp b/src/mongo/db/exec/working_set.cpp index 1c565cca8cd..2aacba154ab 100644 --- a/src/mongo/db/exec/working_set.cpp +++ b/src/mongo/db/exec/working_set.cpp @@ -88,7 +88,7 @@ void WorkingSet::transitionToRecordIdAndIdx(WorkingSetID id) { void WorkingSet::transitionToRecordIdAndObj(WorkingSetID id) { WorkingSetMember* member = get(id); - member->_state = WorkingSetMember::RID_AND_OBJ; + member->transitionToRecordIdAndObj(); } void WorkingSet::transitionToOwnedObj(WorkingSetID id) { @@ -123,6 +123,9 @@ void WorkingSetMember::transitionToOwnedObj() { _state = OWNED_OBJ; } +void WorkingSetMember::transitionToRecordIdAndObj() { + _state = WorkingSetMember::RID_AND_OBJ; +} bool WorkingSetMember::hasRecordId() const { return _state == RID_AND_IDX || _state == RID_AND_OBJ; @@ -187,4 +190,89 @@ void WorkingSetMember::resetDocument(SnapshotId snapshot, const BSONObj& obj) { md.reset(obj, false); doc.value() = md.freeze(); } + +void WorkingSetMember::serializeForSorter(BufBuilder& buf) const { + // It is not legal to serialize a Document which has metadata attached to it. Any metadata must + // reside directly in the WorkingSetMember. + invariant(!doc.value().metadata()); + + buf.appendChar(static_cast<char>(_state)); + + if (hasObj()) { + doc.value().serializeForSorter(buf); + buf.appendNum(static_cast<unsigned long long>(doc.snapshotId().toNumber())); + } + + if (_state == RID_AND_IDX) { + // First append the number of index keys, and then encode them in series. + buf.appendNum(static_cast<char>(keyData.size())); + for (auto&& indexKeyDatum : keyData) { + indexKeyDatum.indexKeyPattern.serializeForSorter(buf); + indexKeyDatum.keyData.serializeForSorter(buf); + buf.appendNum(indexKeyDatum.indexId); + } + } + + if (hasRecordId()) { + buf.appendNum(recordId.repr()); + } + + _metadata.serializeForSorter(buf); +} + +WorkingSetMember WorkingSetMember::deserializeForSorter(BufReader& buf, + const SorterDeserializeSettings&) { + WorkingSetMember wsm; + + // First decode the state, which instructs us on how to interpret the rest of the buffer. + wsm._state = static_cast<MemberState>(buf.read<char>()); + + if (wsm.hasObj()) { + wsm.doc.setValue( + Document::deserializeForSorter(buf, Document::SorterDeserializeSettings{})); + auto snapshotIdRepr = buf.read<LittleEndian<uint64_t>>(); + auto snapshotId = snapshotIdRepr ? SnapshotId{snapshotIdRepr} : SnapshotId{}; + wsm.doc.setSnapshotId(snapshotId); + } + + if (wsm.getState() == WorkingSetMember::RID_AND_IDX) { + auto numKeys = buf.read<char>(); + wsm.keyData.reserve(numKeys); + for (auto i = 0; i < numKeys; ++i) { + auto indexKeyPattern = + BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings{}).getOwned(); + auto indexKey = + BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings{}).getOwned(); + auto indexId = buf.read<LittleEndian<unsigned int>>(); + wsm.keyData.push_back( + IndexKeyDatum{std::move(indexKeyPattern), std::move(indexKey), indexId}); + } + + // Mark any working set member representing an index key as suspicious on deserialization. + // This is needed because the member may have survived a yield while absent from the working + // set. + wsm.isSuspicious = true; + } + + if (wsm.hasRecordId()) { + wsm.recordId = RecordId{buf.read<LittleEndian<int64_t>>()}; + } + + DocumentMetadataFields::deserializeForSorter(buf, &wsm._metadata); + + return wsm; +} + +WorkingSetRegisteredIndexId WorkingSet::registerIndexAccessMethod( + const IndexAccessMethod* indexAccess) { + for (WorkingSetRegisteredIndexId i = 0; i < _registeredIndexes.size(); ++i) { + if (_registeredIndexes[i] == indexAccess) { + return i; + } + } + + _registeredIndexes.push_back(indexAccess); + return _registeredIndexes.size() - 1; +} + } // namespace mongo diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h index 6398214ce87..7c5063f0785 100644 --- a/src/mongo/db/exec/working_set.h +++ b/src/mongo/db/exec/working_set.h @@ -47,13 +47,21 @@ class WorkingSetMember; typedef size_t WorkingSetID; /** + * A type used to identify indexes that have been registered with the WorkingSet. A WorkingSetMember + * can be associated with a particular index via this id. + */ +using WorkingSetRegisteredIndexId = unsigned int; + +/** * The key data extracted from an index. Keeps track of both the key (currently a BSONObj) and * the index that provided the key. The index key pattern is required to correctly interpret * the key. */ struct IndexKeyDatum { - IndexKeyDatum(const BSONObj& keyPattern, const BSONObj& key, const IndexAccessMethod* index) - : indexKeyPattern(keyPattern), keyData(key), index(index) {} + IndexKeyDatum(const BSONObj& keyPattern, + const BSONObj& key, + WorkingSetRegisteredIndexId indexId) + : indexKeyPattern(keyPattern), keyData(key), indexId(indexId) {} /** * getFieldDotted produces the field with the provided name based on index keyData. The return @@ -84,7 +92,9 @@ struct IndexKeyDatum { // This is the BSONObj for the key that we put into the index. Owned by us. BSONObj keyData; - const IndexAccessMethod* index; + // Associates this index key with an index that has been registered with the WorkingSet. Can be + // used to recover pointers to catalog objects for this index from the WorkingSet. + WorkingSetRegisteredIndexId indexId; }; /** @@ -98,11 +108,6 @@ struct IndexKeyDatum { */ class WorkingSetMember { public: - /** - * Reset to an "empty" state. - */ - void clear(); - enum MemberState { // Initial state. INVALID, @@ -120,12 +125,23 @@ public: OWNED_OBJ, }; + struct SorterDeserializeSettings {}; + + static WorkingSetMember deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&); + + /** + * Reset to an "empty" state. + */ + void clear(); + // // Member state and state transitions // MemberState getState() const; + void transitionToRecordIdAndObj(); + void transitionToOwnedObj(); // @@ -208,6 +224,12 @@ public: */ void resetDocument(SnapshotId snapshot, const BSONObj& obj); + void serializeForSorter(BufBuilder& buf) const; + + int memUsageForSorter() const { + return getMemUsage(); + } + private: friend class WorkingSet; @@ -293,6 +315,20 @@ public: */ std::vector<WorkingSetID> getAndClearYieldSensitiveIds(); + /** + * Registers an IndexAccessMethod pointer with the WorkingSet, and returns a handle that can be + * used to recover the IndexAccessMethod. + */ + WorkingSetRegisteredIndexId registerIndexAccessMethod(const IndexAccessMethod* indexAccess); + + /** + * Returns the IndexAccessMethod for an index that has previously been registered with the + * WorkingSet using 'registerIndexAccessMethod()'. + */ + const IndexAccessMethod* retrieveIndexAccessMethod(WorkingSetRegisteredIndexId indexId) const { + return _registeredIndexes[indexId]; + } + private: struct MemberHolder { // Free list link if freed. Points to self if in use. @@ -312,6 +348,10 @@ private: // Contains ids of WSMs that may need to be adjusted when we next yield. std::vector<WorkingSetID> _yieldSensitiveIds; + + // Holds IndexAccessMethods that have been registered with 'registerIndexAccessMethod()`. The + // WorkingSetRegisteredIndexId is the offset into the vector. + std::vector<const IndexAccessMethod*> _registeredIndexes; }; } // namespace mongo diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp index 66f2da690d5..e59d7c92d17 100644 --- a/src/mongo/db/exec/working_set_common.cpp +++ b/src/mongo/db/exec/working_set_common.cpp @@ -72,10 +72,11 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx, member->resetDocument(opCtx->recoveryUnit()->getSnapshotId(), record->data.releaseToBson()); if (member->isSuspicious) { - // Make sure that all of the keyData is still valid for this copy of the document. - // This ensures both that index-provided filters and sort orders still hold. - // TODO provide a way for the query planner to opt out of this checking if it is - // unneeded due to the structure of the plan. + // Make sure that all of the keyData is still valid for this copy of the document. This + // ensures both that index-provided filters and sort orders still hold. + // + // TODO provide a way for the query planner to opt out of this checking if it is unneeded + // due to the structure of the plan. invariant(!member->keyData.empty()); for (size_t i = 0; i < member->keyData.size(); i++) { KeyStringSet keys; @@ -83,7 +84,8 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx, // be multikey when ensuring the keyData is still valid. KeyStringSet* multikeyMetadataKeys = nullptr; MultikeyPaths* multikeyPaths = nullptr; - auto* iam = member->keyData[i].index; + auto indexId = member->keyData[i].indexId; + auto* iam = workingSet->retrieveIndexAccessMethod(indexId); iam->getKeys(member->doc.value().toBson(), IndexAccessMethod::GetKeysMode::kEnforceConstraints, &keys, diff --git a/src/mongo/db/exec/working_set_test.cpp b/src/mongo/db/exec/working_set_test.cpp index 13015f2fcf9..67513a75a27 100644 --- a/src/mongo/db/exec/working_set_test.cpp +++ b/src/mongo/db/exec/working_set_test.cpp @@ -33,7 +33,9 @@ #include "mongo/db/jsobj.h" #include "mongo/db/json.h" #include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/document_value_test_util.h" #include "mongo/db/storage/snapshot.h" +#include "mongo/unittest/bson_test_util.h" #include "mongo/unittest/unittest.h" #include "mongo/util/assert_util.h" @@ -119,7 +121,7 @@ TEST_F(WorkingSetFixture, getFieldFromIndex) { string secondName = "y"; int secondValue = 10; - member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), nullptr)); + member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), 0)); // Also a minor lie as RecordId is bogus. ws->transitionToRecordIdAndIdx(id); BSONElement elt; @@ -129,8 +131,7 @@ TEST_F(WorkingSetFixture, getFieldFromIndex) { ASSERT_FALSE(member->getFieldDotted("foo", &elt)); // Add another index datum. - member->keyData.push_back( - IndexKeyDatum(BSON(secondName << 1), BSON("" << secondValue), nullptr)); + member->keyData.push_back(IndexKeyDatum(BSON(secondName << 1), BSON("" << secondValue), 0)); ASSERT_TRUE(member->getFieldDotted(secondName, &elt)); ASSERT_EQUALS(elt.numberInt(), secondValue); ASSERT_TRUE(member->getFieldDotted(firstName, &elt)); @@ -143,7 +144,7 @@ TEST_F(WorkingSetFixture, getDottedFieldFromIndex) { string firstName = "x.y"; int firstValue = 5; - member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), nullptr)); + member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), 0)); ws->transitionToRecordIdAndIdx(id); BSONElement elt; ASSERT_TRUE(member->getFieldDotted(firstName, &elt)); @@ -190,4 +191,93 @@ TEST_F(WorkingSetFixture, MetadataCanBeCorrectlyTransferredBackAndForthFromDocum ASSERT_TRUE(member->metadata().hasSearchScore()); } +namespace { +// Serializes the given working set member to a buffer, then returns a working set member resulting +// from deserializing this buffer. +WorkingSetMember roundtripWsmThroughSerialization(const WorkingSetMember& wsm) { + BufBuilder builder{}; + wsm.serializeForSorter(builder); + BufReader reader{builder.buf(), static_cast<unsigned>(builder.len())}; + return WorkingSetMember::deserializeForSorter(reader, + WorkingSetMember::SorterDeserializeSettings{}); +} +} // namespace + +TEST_F(WorkingSetFixture, RecordIdAndObjStateCanRoundtripThroughSerialization) { + Document doc{{"foo", Value{"bar"_sd}}}; + member->doc.setValue(doc); + member->doc.setSnapshotId(SnapshotId{42u}); + member->recordId = RecordId{43}; + ws->transitionToRecordIdAndObj(id); + auto roundtripped = roundtripWsmThroughSerialization(*member); + ASSERT_EQ(WorkingSetMember::RID_AND_OBJ, roundtripped.getState()); + ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc); + ASSERT_EQ(roundtripped.doc.snapshotId().toNumber(), 42u); + ASSERT_EQ(roundtripped.recordId.repr(), 43); + ASSERT_FALSE(roundtripped.isSuspicious); + ASSERT_FALSE(roundtripped.metadata()); +} + +TEST_F(WorkingSetFixture, OwnedObjStateCanRoundtripThroughSerialization) { + Document doc{{"foo", Value{"bar"_sd}}}; + member->doc.setValue(doc); + member->doc.setSnapshotId(SnapshotId{42u}); + ws->transitionToOwnedObj(id); + auto roundtripped = roundtripWsmThroughSerialization(*member); + ASSERT_EQ(WorkingSetMember::OWNED_OBJ, roundtripped.getState()); + ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc); + ASSERT_EQ(roundtripped.doc.snapshotId().toNumber(), 42u); + ASSERT(roundtripped.recordId.isNull()); + ASSERT_FALSE(roundtripped.isSuspicious); + ASSERT_FALSE(roundtripped.metadata()); +} + +TEST_F(WorkingSetFixture, RecordIdAndIdxStateCanRoundtripThroughSerialization) { + member->recordId = RecordId{43}; + member->keyData.emplace_back(BSON("a" << 1 << "b" << 1), BSON("" << 3 << "" << 4), 8u); + member->keyData.emplace_back(BSON("c" << -1), BSON("" << 5), 9u); + ws->transitionToRecordIdAndIdx(id); + ASSERT_FALSE(member->isSuspicious); + + auto roundtripped = roundtripWsmThroughSerialization(*member); + ASSERT_EQ(WorkingSetMember::RID_AND_IDX, roundtripped.getState()); + ASSERT_EQ(roundtripped.recordId.repr(), 43); + ASSERT_EQ(roundtripped.keyData.size(), 2u); + + ASSERT_BSONOBJ_EQ(roundtripped.keyData[0].indexKeyPattern, BSON("a" << 1 << "b" << 1)); + ASSERT_BSONOBJ_EQ(roundtripped.keyData[0].keyData, BSON("" << 3 << "" << 4)); + ASSERT_EQ(roundtripped.keyData[0].indexId, 8u); + + ASSERT_BSONOBJ_EQ(roundtripped.keyData[1].indexKeyPattern, BSON("c" << -1)); + ASSERT_BSONOBJ_EQ(roundtripped.keyData[1].keyData, BSON("" << 5)); + ASSERT_EQ(roundtripped.keyData[1].indexId, 9u); + + ASSERT_TRUE(roundtripped.isSuspicious); + ASSERT_FALSE(roundtripped.metadata()); +} + +TEST_F(WorkingSetFixture, WsmWithMetadataCanRoundtripThroughSerialization) { + Document doc{{"foo", Value{"bar"_sd}}}; + member->doc.setValue(doc); + member->metadata().setTextScore(42.0); + member->metadata().setSearchScore(43.0); + ws->transitionToRecordIdAndObj(id); + auto roundtripped = roundtripWsmThroughSerialization(*member); + + ASSERT_EQ(WorkingSetMember::RID_AND_OBJ, roundtripped.getState()); + ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc); + ASSERT_FALSE(roundtripped.doc.value().metadata()); + ASSERT_TRUE(roundtripped.doc.snapshotId().isNull()); + ASSERT_TRUE(roundtripped.recordId.isNull()); + ASSERT_FALSE(roundtripped.isSuspicious); + + ASSERT_TRUE(roundtripped.metadata()); + ASSERT_TRUE(roundtripped.metadata().hasTextScore()); + ASSERT_EQ(roundtripped.metadata().getTextScore(), 42.0); + ASSERT_TRUE(roundtripped.metadata().hasSearchScore()); + ASSERT_EQ(roundtripped.metadata().getSearchScore(), 43.0); + ASSERT_FALSE(roundtripped.metadata().hasGeoNearPoint()); + ASSERT_FALSE(roundtripped.metadata().hasGeoNearDistance()); +} + } // namespace mongo diff --git a/src/mongo/db/index/sort_key_generator_test.cpp b/src/mongo/db/index/sort_key_generator_test.cpp index 169d0cb8369..2b65febd058 100644 --- a/src/mongo/db/index/sort_key_generator_test.cpp +++ b/src/mongo/db/index/sort_key_generator_test.cpp @@ -279,7 +279,7 @@ public: } void setRecordIdAndIdx(BSONObj keyPattern, BSONObj key) { - _member->keyData.push_back(IndexKeyDatum(std::move(keyPattern), std::move(key), nullptr)); + _member->keyData.push_back(IndexKeyDatum(std::move(keyPattern), std::move(key), 0)); _workingSet.transitionToRecordIdAndIdx(_wsid); } diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp index adbc3692f5b..cf7c58a819e 100644 --- a/src/mongo/db/pipeline/document.cpp +++ b/src/mongo/db/pipeline/document.cpp @@ -323,6 +323,7 @@ intrusive_ptr<DocumentStorage> DocumentStorage::clone() const { dassert(out->_numFields == _numFields); } + out->_haveLazyLoadedMetadata = _haveLazyLoadedMetadata; out->_metadataFields = _metadataFields; return out; @@ -361,7 +362,7 @@ void DocumentStorage::reset(const BSONObj& bson, bool stripMetadata) { } void DocumentStorage::loadLazyMetadata() const { - if (_metadataFields) { + if (_haveLazyLoadedMetadata) { return; } @@ -397,6 +398,8 @@ void DocumentStorage::loadLazyMetadata() const { } } } + + _haveLazyLoadedMetadata = true; } Document::Document(const BSONObj& bson) { diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h index 28417a6a46a..d521fa5aec7 100644 --- a/src/mongo/db/pipeline/document_internal.h +++ b/src/mongo/db/pipeline/document_internal.h @@ -422,6 +422,7 @@ public: } DocumentMetadataFields releaseMetadata() { + loadLazyMetadata(); return std::move(_metadataFields); } @@ -533,6 +534,11 @@ private: BSONObj _bson; mutable BSONObjIterator _bsonIt; + // If '_stripMetadata' is true, tracks whether or not the metadata has been lazy-loaded from the + // backing '_bson' object. If so, then no attempt will be made to load the metadata again, even + // if the metadata has been released by a call to 'releaseMetadata()'. + mutable bool _haveLazyLoadedMetadata = false; + mutable DocumentMetadataFields _metadataFields; // The storage constructed from a BSON value may contain metadata. When we process the BSON we diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index 6c77a8fa013..027d88037f8 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -83,7 +83,7 @@ DocumentSource::GetNextResult DocumentSourceSort::doGetNext() { invariant(populationResult.isEOF()); } - auto result = _sortExecutor->getNext(); + auto result = _sortExecutor->getNextDoc(); if (!result) return GetNextResult::makeEOF(); return GetNextResult(std::move(*result)); @@ -202,7 +202,7 @@ void DocumentSourceSort::loadDocument(Document&& doc) { // already computed the sort key we'd have split the pipeline there, would be merging presorted // documents, and wouldn't use this method. std::tie(sortKey, docForSorter) = extractSortKey(std::move(doc)); - _sortExecutor->add(sortKey, docForSorter); + _sortExecutor->add(sortKey, std::move(docForSorter)); } void DocumentSourceSort::loadingDone() { diff --git a/src/mongo/db/storage/snapshot.h b/src/mongo/db/storage/snapshot.h index 371e4cec29d..75870a7791f 100644 --- a/src/mongo/db/storage/snapshot.h +++ b/src/mongo/db/storage/snapshot.h @@ -60,6 +60,10 @@ public: return std::to_string(_id); } + uint64_t toNumber() const { + return _id; + } + private: uint64_t _id; }; diff --git a/src/mongo/dbtests/query_stage_sort_key_generator.cpp b/src/mongo/dbtests/query_stage_sort_key_generator.cpp index 51fd476078c..bc85c3dfab2 100644 --- a/src/mongo/dbtests/query_stage_sort_key_generator.cpp +++ b/src/mongo/dbtests/query_stage_sort_key_generator.cpp @@ -154,8 +154,8 @@ TEST(SortKeyGeneratorStageTest, SortKeyArray) { TEST(SortKeyGeneratorStageTest, SortKeyCoveredNormal) { CollatorInterface* collator = nullptr; - BSONObj actualOut = extractSortKeyCovered( - "{a: 1}", IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr), collator); + BSONObj actualOut = + extractSortKeyCovered("{a: 1}", IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0), collator); BSONObj expectedOut = BSON("" << 5); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); } @@ -164,7 +164,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredEmbedded) { CollatorInterface* collator = nullptr; BSONObj actualOut = extractSortKeyCovered( "{'a.c': 1}", - IndexKeyDatum(BSON("a.c" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr), + IndexKeyDatum(BSON("a.c" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0), collator); BSONObj expectedOut = BSON("" << 5); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); @@ -174,7 +174,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound) { CollatorInterface* collator = nullptr; BSONObj actualOut = extractSortKeyCovered( "{a: 1, c: 1}", - IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr), + IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0), collator); BSONObj expectedOut = BSON("" << 5 << "" << 6); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); @@ -185,7 +185,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound2) { BSONObj actualOut = extractSortKeyCovered("{a: 1, b: 1}", IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1), BSON("" << 5 << "" << 6 << "" << 4), - nullptr), + 0), collator); BSONObj expectedOut = BSON("" << 5 << "" << 6); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); @@ -197,7 +197,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound3) { extractSortKeyCovered("{b: 1, c: 1}", IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1), BSON("" << 5 << "" << 6 << "" << 4 << "" << 9000), - nullptr), + 0), collator); BSONObj expectedOut = BSON("" << 6 << "" << 4); ASSERT_BSONOBJ_EQ(actualOut, expectedOut); @@ -225,7 +225,7 @@ TEST(SortKeyGeneratorStageTest, CollatorAppliesWhenExtractingCoveredSortKeyStrin IndexKeyDatum(BSON("a" << 1 << "b" << 1), BSON("" << 4 << "" << "foo"), - nullptr), + 0), &collator); BSONObj expectedOut = BSON("" << "oof"); |