summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mongo/db/exec/SConscript1
-rw-r--r--src/mongo/db/exec/count_scan.cpp2
-rw-r--r--src/mongo/db/exec/distinct_scan.cpp4
-rw-r--r--src/mongo/db/exec/idhack.cpp4
-rw-r--r--src/mongo/db/exec/index_scan.cpp4
-rw-r--r--src/mongo/db/exec/near.cpp2
-rw-r--r--src/mongo/db/exec/projection_exec_test.cpp14
-rw-r--r--src/mongo/db/exec/requires_index_stage.cpp4
-rw-r--r--src/mongo/db/exec/requires_index_stage.h12
-rw-r--r--src/mongo/db/exec/sort_executor.cpp36
-rw-r--r--src/mongo/db/exec/sort_executor.h12
-rw-r--r--src/mongo/db/exec/working_set.cpp90
-rw-r--r--src/mongo/db/exec/working_set.h56
-rw-r--r--src/mongo/db/exec/working_set_common.cpp12
-rw-r--r--src/mongo/db/exec/working_set_test.cpp98
-rw-r--r--src/mongo/db/index/sort_key_generator_test.cpp2
-rw-r--r--src/mongo/db/pipeline/document.cpp5
-rw-r--r--src/mongo/db/pipeline/document_internal.h6
-rw-r--r--src/mongo/db/pipeline/document_source_sort.cpp4
-rw-r--r--src/mongo/db/storage/snapshot.h4
-rw-r--r--src/mongo/dbtests/query_stage_sort_key_generator.cpp14
21 files changed, 337 insertions, 49 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript
index 62da7a2a7ea..06fdd5c3238 100644
--- a/src/mongo/db/exec/SConscript
+++ b/src/mongo/db/exec/SConscript
@@ -41,6 +41,7 @@ sortExecutorEnv.Library(
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/s/is_mongos',
'$BUILD_DIR/third_party/shim_snappy',
+ 'working_set',
],
)
diff --git a/src/mongo/db/exec/count_scan.cpp b/src/mongo/db/exec/count_scan.cpp
index c24f3ef1343..be7914299d9 100644
--- a/src/mongo/db/exec/count_scan.cpp
+++ b/src/mongo/db/exec/count_scan.cpp
@@ -74,7 +74,7 @@ const char* CountScan::kStageType = "COUNT_SCAN";
// the CountScanParams rather than resolving them via the IndexDescriptor, since these may differ
// from the descriptor's contents.
CountScan::CountScan(OperationContext* opCtx, CountScanParams params, WorkingSet* workingSet)
- : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor),
+ : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet),
_workingSet(workingSet),
_keyPattern(std::move(params.keyPattern)),
_shouldDedup(params.isMultiKey),
diff --git a/src/mongo/db/exec/distinct_scan.cpp b/src/mongo/db/exec/distinct_scan.cpp
index 3f8c3342e80..24e45ee62d2 100644
--- a/src/mongo/db/exec/distinct_scan.cpp
+++ b/src/mongo/db/exec/distinct_scan.cpp
@@ -47,7 +47,7 @@ using std::vector;
const char* DistinctScan::kStageType = "DISTINCT_SCAN";
DistinctScan::DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet)
- : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor),
+ : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet),
_workingSet(workingSet),
_keyPattern(std::move(params.keyPattern)),
_scanDirection(params.scanDirection),
@@ -122,7 +122,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) {
WorkingSetID id = _workingSet->allocate();
WorkingSetMember* member = _workingSet->get(id);
member->recordId = kv->loc;
- member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, indexAccessMethod()));
+ member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, workingSetIndexId()));
_workingSet->transitionToRecordIdAndIdx(id);
*out = id;
diff --git a/src/mongo/db/exec/idhack.cpp b/src/mongo/db/exec/idhack.cpp
index 02e347dda4d..64848cb50e6 100644
--- a/src/mongo/db/exec/idhack.cpp
+++ b/src/mongo/db/exec/idhack.cpp
@@ -53,7 +53,7 @@ IDHackStage::IDHackStage(OperationContext* opCtx,
CanonicalQuery* query,
WorkingSet* ws,
const IndexDescriptor* descriptor)
- : RequiresIndexStage(kStageType, opCtx, descriptor),
+ : RequiresIndexStage(kStageType, opCtx, descriptor, ws),
_workingSet(ws),
_key(query->getQueryObj()["_id"].wrap()) {
_specificStats.indexName = descriptor->indexName();
@@ -64,7 +64,7 @@ IDHackStage::IDHackStage(OperationContext* opCtx,
const BSONObj& key,
WorkingSet* ws,
const IndexDescriptor* descriptor)
- : RequiresIndexStage(kStageType, opCtx, descriptor), _workingSet(ws), _key(key) {
+ : RequiresIndexStage(kStageType, opCtx, descriptor, ws), _workingSet(ws), _key(key) {
_specificStats.indexName = descriptor->indexName();
}
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp
index cd32deb7c0d..0e5d940e2f0 100644
--- a/src/mongo/db/exec/index_scan.cpp
+++ b/src/mongo/db/exec/index_scan.cpp
@@ -64,7 +64,7 @@ IndexScan::IndexScan(OperationContext* opCtx,
IndexScanParams params,
WorkingSet* workingSet,
const MatchExpression* filter)
- : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor),
+ : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet),
_workingSet(workingSet),
_keyPattern(params.keyPattern.getOwned()),
_bounds(std::move(params.bounds)),
@@ -233,7 +233,7 @@ PlanStage::StageState IndexScan::doWork(WorkingSetID* out) {
WorkingSetID id = _workingSet->allocate();
WorkingSetMember* member = _workingSet->get(id);
member->recordId = kv->loc;
- member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, indexAccessMethod()));
+ member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, workingSetIndexId()));
_workingSet->transitionToRecordIdAndIdx(id);
if (_addKeyMetadata) {
diff --git a/src/mongo/db/exec/near.cpp b/src/mongo/db/exec/near.cpp
index 91a3a60317a..ac16083b0a5 100644
--- a/src/mongo/db/exec/near.cpp
+++ b/src/mongo/db/exec/near.cpp
@@ -47,7 +47,7 @@ NearStage::NearStage(OperationContext* opCtx,
StageType type,
WorkingSet* workingSet,
const IndexDescriptor* indexDescriptor)
- : RequiresIndexStage(typeName, opCtx, indexDescriptor),
+ : RequiresIndexStage(typeName, opCtx, indexDescriptor, workingSet),
_workingSet(workingSet),
_searchState(SearchState_Initializing),
_nextIntervalStats(nullptr),
diff --git a/src/mongo/db/exec/projection_exec_test.cpp b/src/mongo/db/exec/projection_exec_test.cpp
index 33e14d01406..684d3a07f35 100644
--- a/src/mongo/db/exec/projection_exec_test.cpp
+++ b/src/mongo/db/exec/projection_exec_test.cpp
@@ -193,7 +193,7 @@ TEST(ProjectionExecTest, TransformCoveredDottedProjection) {
ASSERT_EQ(boost::make_optional("{ b: { c: 2, d: 3, f: { g: 4, h: 5 } } }"s),
project("{'b.c': 1, 'b.d': 1, 'b.f.g': 1, 'b.f.h': 1}",
"{}",
- IndexKeyDatum(keyPattern, keyData, nullptr)));
+ IndexKeyDatum(keyPattern, keyData, 0)));
}
TEST(ProjectionExecTest, TransformNonCoveredDottedProjection) {
@@ -249,7 +249,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredNormal) {
ASSERT_EQ(boost::make_optional("{ a: 5, b: { : 5 } }"s),
project("{_id: 0, a: 1, b: {$meta: 'sortKey'}}",
"{}",
- IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr),
+ IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0),
boost::none, // collator
BSON("" << 5))); // sortKey
}
@@ -258,7 +258,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredOverwrite) {
ASSERT_EQ(boost::make_optional("{ a: { : 5 } }"s),
project("{_id: 0, a: 1, a: {$meta: 'sortKey'}}",
"{}",
- IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr),
+ IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0),
boost::none, // collator
BSON("" << 5))); // sortKey
}
@@ -267,7 +267,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredAdditionalData) {
ASSERT_EQ(boost::make_optional("{ a: 5, c: 6, b: { : 5 } }"s),
project("{_id: 0, a: 1, b: {$meta: 'sortKey'}, c: 1}",
"{}",
- IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr),
+ IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0),
boost::none, // collator
BSON("" << 5))); // sortKey
}
@@ -276,7 +276,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound) {
ASSERT_EQ(boost::make_optional("{ a: 5, b: { : 5, : 6 } }"s),
project("{_id: 0, a: 1, b: {$meta: 'sortKey'}}",
"{}",
- IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr),
+ IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0),
boost::none, // collator
BSON("" << 5 << "" << 6))); // sortKey
}
@@ -287,7 +287,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound2) {
"{}",
IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1),
BSON("" << 5 << "" << 6 << "" << 4),
- nullptr),
+ 0),
boost::none, // collator
BSON("" << 5 << "" << 6))); // sortKey
}
@@ -298,7 +298,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound3) {
"{}",
IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1),
BSON("" << 5 << "" << 6 << "" << 4 << "" << 9000),
- nullptr),
+ 0),
boost::none, // collator
BSON("" << 6 << "" << 4))); // sortKey
}
diff --git a/src/mongo/db/exec/requires_index_stage.cpp b/src/mongo/db/exec/requires_index_stage.cpp
index 307de4b7409..2ff0f373480 100644
--- a/src/mongo/db/exec/requires_index_stage.cpp
+++ b/src/mongo/db/exec/requires_index_stage.cpp
@@ -35,7 +35,8 @@ namespace mongo {
RequiresIndexStage::RequiresIndexStage(const char* stageType,
OperationContext* opCtx,
- const IndexDescriptor* indexDescriptor)
+ const IndexDescriptor* indexDescriptor,
+ WorkingSet* workingSet)
: RequiresCollectionStage(stageType, opCtx, indexDescriptor->getCollection()),
_weakIndexCatalogEntry(collection()->getIndexCatalog()->getEntryShared(indexDescriptor)) {
auto indexCatalogEntry = _weakIndexCatalogEntry.lock();
@@ -44,6 +45,7 @@ RequiresIndexStage::RequiresIndexStage(const char* stageType,
invariant(_indexDescriptor);
invariant(_indexAccessMethod);
_indexName = _indexDescriptor->indexName();
+ _workingSetIndexId = workingSet->registerIndexAccessMethod(_indexAccessMethod);
}
void RequiresIndexStage::doSaveStateRequiresCollection() {
diff --git a/src/mongo/db/exec/requires_index_stage.h b/src/mongo/db/exec/requires_index_stage.h
index 030aa369306..fca94bf0de0 100644
--- a/src/mongo/db/exec/requires_index_stage.h
+++ b/src/mongo/db/exec/requires_index_stage.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/db/exec/requires_collection_stage.h"
+#include "mongo/db/exec/working_set.h"
#include "mongo/db/index/index_descriptor.h"
namespace mongo {
@@ -48,7 +49,8 @@ class RequiresIndexStage : public RequiresCollectionStage {
public:
RequiresIndexStage(const char* stageType,
OperationContext* opCtx,
- const IndexDescriptor* indexDescriptor);
+ const IndexDescriptor* indexDescriptor,
+ WorkingSet* workingSet);
virtual ~RequiresIndexStage() = default;
@@ -75,6 +77,10 @@ protected:
return _indexAccessMethod;
}
+ WorkingSetRegisteredIndexId workingSetIndexId() const {
+ return _workingSetIndexId;
+ }
+
private:
// We keep a weak_ptr to the index catalog entry in order to detect when the underlying catalog
// object has been destroyed, e.g. due to an index drop. In this scenario, the
@@ -93,6 +99,10 @@ private:
const IndexAccessMethod* _indexAccessMethod;
std::string _indexName;
+
+ // An indentifier for the index required by this stage. Any working set member allocated to
+ // represent an index key from this index must include this id.
+ WorkingSetRegisteredIndexId _workingSetIndexId;
};
} // namespace mongo
diff --git a/src/mongo/db/exec/sort_executor.cpp b/src/mongo/db/exec/sort_executor.cpp
index 8fd20cfda46..cbcfaa78b1c 100644
--- a/src/mongo/db/exec/sort_executor.cpp
+++ b/src/mongo/db/exec/sort_executor.cpp
@@ -98,7 +98,25 @@ int SortExecutor::Comparator::operator()(const DocumentSorter::Data& lhs,
return 0;
}
-boost::optional<Document> SortExecutor::getNext() {
+boost::optional<Document> SortExecutor::getNextDoc() {
+ auto wsm = getNextWsm();
+ if (!wsm) {
+ return boost::none;
+ }
+
+ // Ensure that this WorkingSetMember only houses a Document. This guarantees that we are not
+ // discarding data inside the working set member (e.g. the RecordId) when returning the Document
+ // to the caller.
+ invariant(wsm->hasOwnedObj());
+
+ // Transfer metadata from the WorkingSetMember to the Document.
+ MutableDocument mutableDoc{std::move(wsm->doc.value())};
+ mutableDoc.setMetadata(wsm->releaseMetadata());
+
+ return mutableDoc.freeze();
+}
+
+boost::optional<WorkingSetMember> SortExecutor::getNextWsm() {
if (_isEOF) {
return boost::none;
}
@@ -113,6 +131,22 @@ boost::optional<Document> SortExecutor::getNext() {
}
void SortExecutor::add(Value sortKey, Document data) {
+ invariant(data.isOwned());
+ WorkingSetMember wsm;
+
+ // Transfer metadata from the Document to the WorkingSetMember.
+ MutableDocument mutableDoc{std::move(data)};
+ wsm.setMetadata(mutableDoc.releaseMetadata());
+
+ wsm.doc.setValue(mutableDoc.freeze());
+ wsm.transitionToOwnedObj();
+ this->add(std::move(sortKey), std::move(wsm));
+}
+
+void SortExecutor::add(Value sortKey, WorkingSetMember data) {
+ // Metadata should be attached directly to the WSM rather than inside the Document.
+ invariant(!data.doc.value().metadata());
+
if (!_sorter) {
_sorter.reset(DocumentSorter::make(makeSortOptions(), Comparator(_sortPattern)));
}
diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h
index 95741f41256..86ce3fc2687 100644
--- a/src/mongo/db/exec/sort_executor.h
+++ b/src/mongo/db/exec/sort_executor.h
@@ -29,6 +29,7 @@
#pragma once
+#include "mongo/db/exec/working_set.h"
#include "mongo/db/pipeline/document.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/query/sort_pattern.h"
@@ -52,7 +53,9 @@ public:
std::string tempDir,
bool allowDiskUse);
- boost::optional<Document> getNext();
+ boost::optional<Document> getNextDoc();
+
+ boost::optional<WorkingSetMember> getNextWsm();
const SortPattern& sortPattern() const {
return _sortPattern;
@@ -89,8 +92,13 @@ public:
*/
void add(Value, Document);
+ /**
+ * Add a WorkingSetMember with sort key specified by Value to the DocumentSorter.
+ */
+ void add(Value, WorkingSetMember);
+
private:
- using DocumentSorter = Sorter<Value, Document>;
+ using DocumentSorter = Sorter<Value, WorkingSetMember>;
class Comparator {
public:
Comparator(const SortPattern& sortPattern) : _sort(sortPattern) {}
diff --git a/src/mongo/db/exec/working_set.cpp b/src/mongo/db/exec/working_set.cpp
index 1c565cca8cd..2aacba154ab 100644
--- a/src/mongo/db/exec/working_set.cpp
+++ b/src/mongo/db/exec/working_set.cpp
@@ -88,7 +88,7 @@ void WorkingSet::transitionToRecordIdAndIdx(WorkingSetID id) {
void WorkingSet::transitionToRecordIdAndObj(WorkingSetID id) {
WorkingSetMember* member = get(id);
- member->_state = WorkingSetMember::RID_AND_OBJ;
+ member->transitionToRecordIdAndObj();
}
void WorkingSet::transitionToOwnedObj(WorkingSetID id) {
@@ -123,6 +123,9 @@ void WorkingSetMember::transitionToOwnedObj() {
_state = OWNED_OBJ;
}
+void WorkingSetMember::transitionToRecordIdAndObj() {
+ _state = WorkingSetMember::RID_AND_OBJ;
+}
bool WorkingSetMember::hasRecordId() const {
return _state == RID_AND_IDX || _state == RID_AND_OBJ;
@@ -187,4 +190,89 @@ void WorkingSetMember::resetDocument(SnapshotId snapshot, const BSONObj& obj) {
md.reset(obj, false);
doc.value() = md.freeze();
}
+
+void WorkingSetMember::serializeForSorter(BufBuilder& buf) const {
+ // It is not legal to serialize a Document which has metadata attached to it. Any metadata must
+ // reside directly in the WorkingSetMember.
+ invariant(!doc.value().metadata());
+
+ buf.appendChar(static_cast<char>(_state));
+
+ if (hasObj()) {
+ doc.value().serializeForSorter(buf);
+ buf.appendNum(static_cast<unsigned long long>(doc.snapshotId().toNumber()));
+ }
+
+ if (_state == RID_AND_IDX) {
+ // First append the number of index keys, and then encode them in series.
+ buf.appendNum(static_cast<char>(keyData.size()));
+ for (auto&& indexKeyDatum : keyData) {
+ indexKeyDatum.indexKeyPattern.serializeForSorter(buf);
+ indexKeyDatum.keyData.serializeForSorter(buf);
+ buf.appendNum(indexKeyDatum.indexId);
+ }
+ }
+
+ if (hasRecordId()) {
+ buf.appendNum(recordId.repr());
+ }
+
+ _metadata.serializeForSorter(buf);
+}
+
+WorkingSetMember WorkingSetMember::deserializeForSorter(BufReader& buf,
+ const SorterDeserializeSettings&) {
+ WorkingSetMember wsm;
+
+ // First decode the state, which instructs us on how to interpret the rest of the buffer.
+ wsm._state = static_cast<MemberState>(buf.read<char>());
+
+ if (wsm.hasObj()) {
+ wsm.doc.setValue(
+ Document::deserializeForSorter(buf, Document::SorterDeserializeSettings{}));
+ auto snapshotIdRepr = buf.read<LittleEndian<uint64_t>>();
+ auto snapshotId = snapshotIdRepr ? SnapshotId{snapshotIdRepr} : SnapshotId{};
+ wsm.doc.setSnapshotId(snapshotId);
+ }
+
+ if (wsm.getState() == WorkingSetMember::RID_AND_IDX) {
+ auto numKeys = buf.read<char>();
+ wsm.keyData.reserve(numKeys);
+ for (auto i = 0; i < numKeys; ++i) {
+ auto indexKeyPattern =
+ BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings{}).getOwned();
+ auto indexKey =
+ BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings{}).getOwned();
+ auto indexId = buf.read<LittleEndian<unsigned int>>();
+ wsm.keyData.push_back(
+ IndexKeyDatum{std::move(indexKeyPattern), std::move(indexKey), indexId});
+ }
+
+ // Mark any working set member representing an index key as suspicious on deserialization.
+ // This is needed because the member may have survived a yield while absent from the working
+ // set.
+ wsm.isSuspicious = true;
+ }
+
+ if (wsm.hasRecordId()) {
+ wsm.recordId = RecordId{buf.read<LittleEndian<int64_t>>()};
+ }
+
+ DocumentMetadataFields::deserializeForSorter(buf, &wsm._metadata);
+
+ return wsm;
+}
+
+WorkingSetRegisteredIndexId WorkingSet::registerIndexAccessMethod(
+ const IndexAccessMethod* indexAccess) {
+ for (WorkingSetRegisteredIndexId i = 0; i < _registeredIndexes.size(); ++i) {
+ if (_registeredIndexes[i] == indexAccess) {
+ return i;
+ }
+ }
+
+ _registeredIndexes.push_back(indexAccess);
+ return _registeredIndexes.size() - 1;
+}
+
} // namespace mongo
diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h
index 6398214ce87..7c5063f0785 100644
--- a/src/mongo/db/exec/working_set.h
+++ b/src/mongo/db/exec/working_set.h
@@ -47,13 +47,21 @@ class WorkingSetMember;
typedef size_t WorkingSetID;
/**
+ * A type used to identify indexes that have been registered with the WorkingSet. A WorkingSetMember
+ * can be associated with a particular index via this id.
+ */
+using WorkingSetRegisteredIndexId = unsigned int;
+
+/**
* The key data extracted from an index. Keeps track of both the key (currently a BSONObj) and
* the index that provided the key. The index key pattern is required to correctly interpret
* the key.
*/
struct IndexKeyDatum {
- IndexKeyDatum(const BSONObj& keyPattern, const BSONObj& key, const IndexAccessMethod* index)
- : indexKeyPattern(keyPattern), keyData(key), index(index) {}
+ IndexKeyDatum(const BSONObj& keyPattern,
+ const BSONObj& key,
+ WorkingSetRegisteredIndexId indexId)
+ : indexKeyPattern(keyPattern), keyData(key), indexId(indexId) {}
/**
* getFieldDotted produces the field with the provided name based on index keyData. The return
@@ -84,7 +92,9 @@ struct IndexKeyDatum {
// This is the BSONObj for the key that we put into the index. Owned by us.
BSONObj keyData;
- const IndexAccessMethod* index;
+ // Associates this index key with an index that has been registered with the WorkingSet. Can be
+ // used to recover pointers to catalog objects for this index from the WorkingSet.
+ WorkingSetRegisteredIndexId indexId;
};
/**
@@ -98,11 +108,6 @@ struct IndexKeyDatum {
*/
class WorkingSetMember {
public:
- /**
- * Reset to an "empty" state.
- */
- void clear();
-
enum MemberState {
// Initial state.
INVALID,
@@ -120,12 +125,23 @@ public:
OWNED_OBJ,
};
+ struct SorterDeserializeSettings {};
+
+ static WorkingSetMember deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&);
+
+ /**
+ * Reset to an "empty" state.
+ */
+ void clear();
+
//
// Member state and state transitions
//
MemberState getState() const;
+ void transitionToRecordIdAndObj();
+
void transitionToOwnedObj();
//
@@ -208,6 +224,12 @@ public:
*/
void resetDocument(SnapshotId snapshot, const BSONObj& obj);
+ void serializeForSorter(BufBuilder& buf) const;
+
+ int memUsageForSorter() const {
+ return getMemUsage();
+ }
+
private:
friend class WorkingSet;
@@ -293,6 +315,20 @@ public:
*/
std::vector<WorkingSetID> getAndClearYieldSensitiveIds();
+ /**
+ * Registers an IndexAccessMethod pointer with the WorkingSet, and returns a handle that can be
+ * used to recover the IndexAccessMethod.
+ */
+ WorkingSetRegisteredIndexId registerIndexAccessMethod(const IndexAccessMethod* indexAccess);
+
+ /**
+ * Returns the IndexAccessMethod for an index that has previously been registered with the
+ * WorkingSet using 'registerIndexAccessMethod()'.
+ */
+ const IndexAccessMethod* retrieveIndexAccessMethod(WorkingSetRegisteredIndexId indexId) const {
+ return _registeredIndexes[indexId];
+ }
+
private:
struct MemberHolder {
// Free list link if freed. Points to self if in use.
@@ -312,6 +348,10 @@ private:
// Contains ids of WSMs that may need to be adjusted when we next yield.
std::vector<WorkingSetID> _yieldSensitiveIds;
+
+ // Holds IndexAccessMethods that have been registered with 'registerIndexAccessMethod()`. The
+ // WorkingSetRegisteredIndexId is the offset into the vector.
+ std::vector<const IndexAccessMethod*> _registeredIndexes;
};
} // namespace mongo
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
index 66f2da690d5..e59d7c92d17 100644
--- a/src/mongo/db/exec/working_set_common.cpp
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -72,10 +72,11 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
member->resetDocument(opCtx->recoveryUnit()->getSnapshotId(), record->data.releaseToBson());
if (member->isSuspicious) {
- // Make sure that all of the keyData is still valid for this copy of the document.
- // This ensures both that index-provided filters and sort orders still hold.
- // TODO provide a way for the query planner to opt out of this checking if it is
- // unneeded due to the structure of the plan.
+ // Make sure that all of the keyData is still valid for this copy of the document. This
+ // ensures both that index-provided filters and sort orders still hold.
+ //
+ // TODO provide a way for the query planner to opt out of this checking if it is unneeded
+ // due to the structure of the plan.
invariant(!member->keyData.empty());
for (size_t i = 0; i < member->keyData.size(); i++) {
KeyStringSet keys;
@@ -83,7 +84,8 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
// be multikey when ensuring the keyData is still valid.
KeyStringSet* multikeyMetadataKeys = nullptr;
MultikeyPaths* multikeyPaths = nullptr;
- auto* iam = member->keyData[i].index;
+ auto indexId = member->keyData[i].indexId;
+ auto* iam = workingSet->retrieveIndexAccessMethod(indexId);
iam->getKeys(member->doc.value().toBson(),
IndexAccessMethod::GetKeysMode::kEnforceConstraints,
&keys,
diff --git a/src/mongo/db/exec/working_set_test.cpp b/src/mongo/db/exec/working_set_test.cpp
index 13015f2fcf9..67513a75a27 100644
--- a/src/mongo/db/exec/working_set_test.cpp
+++ b/src/mongo/db/exec/working_set_test.cpp
@@ -33,7 +33,9 @@
#include "mongo/db/jsobj.h"
#include "mongo/db/json.h"
#include "mongo/db/pipeline/document.h"
+#include "mongo/db/pipeline/document_value_test_util.h"
#include "mongo/db/storage/snapshot.h"
+#include "mongo/unittest/bson_test_util.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/assert_util.h"
@@ -119,7 +121,7 @@ TEST_F(WorkingSetFixture, getFieldFromIndex) {
string secondName = "y";
int secondValue = 10;
- member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), nullptr));
+ member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), 0));
// Also a minor lie as RecordId is bogus.
ws->transitionToRecordIdAndIdx(id);
BSONElement elt;
@@ -129,8 +131,7 @@ TEST_F(WorkingSetFixture, getFieldFromIndex) {
ASSERT_FALSE(member->getFieldDotted("foo", &elt));
// Add another index datum.
- member->keyData.push_back(
- IndexKeyDatum(BSON(secondName << 1), BSON("" << secondValue), nullptr));
+ member->keyData.push_back(IndexKeyDatum(BSON(secondName << 1), BSON("" << secondValue), 0));
ASSERT_TRUE(member->getFieldDotted(secondName, &elt));
ASSERT_EQUALS(elt.numberInt(), secondValue);
ASSERT_TRUE(member->getFieldDotted(firstName, &elt));
@@ -143,7 +144,7 @@ TEST_F(WorkingSetFixture, getDottedFieldFromIndex) {
string firstName = "x.y";
int firstValue = 5;
- member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), nullptr));
+ member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), 0));
ws->transitionToRecordIdAndIdx(id);
BSONElement elt;
ASSERT_TRUE(member->getFieldDotted(firstName, &elt));
@@ -190,4 +191,93 @@ TEST_F(WorkingSetFixture, MetadataCanBeCorrectlyTransferredBackAndForthFromDocum
ASSERT_TRUE(member->metadata().hasSearchScore());
}
+namespace {
+// Serializes the given working set member to a buffer, then returns a working set member resulting
+// from deserializing this buffer.
+WorkingSetMember roundtripWsmThroughSerialization(const WorkingSetMember& wsm) {
+ BufBuilder builder{};
+ wsm.serializeForSorter(builder);
+ BufReader reader{builder.buf(), static_cast<unsigned>(builder.len())};
+ return WorkingSetMember::deserializeForSorter(reader,
+ WorkingSetMember::SorterDeserializeSettings{});
+}
+} // namespace
+
+TEST_F(WorkingSetFixture, RecordIdAndObjStateCanRoundtripThroughSerialization) {
+ Document doc{{"foo", Value{"bar"_sd}}};
+ member->doc.setValue(doc);
+ member->doc.setSnapshotId(SnapshotId{42u});
+ member->recordId = RecordId{43};
+ ws->transitionToRecordIdAndObj(id);
+ auto roundtripped = roundtripWsmThroughSerialization(*member);
+ ASSERT_EQ(WorkingSetMember::RID_AND_OBJ, roundtripped.getState());
+ ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc);
+ ASSERT_EQ(roundtripped.doc.snapshotId().toNumber(), 42u);
+ ASSERT_EQ(roundtripped.recordId.repr(), 43);
+ ASSERT_FALSE(roundtripped.isSuspicious);
+ ASSERT_FALSE(roundtripped.metadata());
+}
+
+TEST_F(WorkingSetFixture, OwnedObjStateCanRoundtripThroughSerialization) {
+ Document doc{{"foo", Value{"bar"_sd}}};
+ member->doc.setValue(doc);
+ member->doc.setSnapshotId(SnapshotId{42u});
+ ws->transitionToOwnedObj(id);
+ auto roundtripped = roundtripWsmThroughSerialization(*member);
+ ASSERT_EQ(WorkingSetMember::OWNED_OBJ, roundtripped.getState());
+ ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc);
+ ASSERT_EQ(roundtripped.doc.snapshotId().toNumber(), 42u);
+ ASSERT(roundtripped.recordId.isNull());
+ ASSERT_FALSE(roundtripped.isSuspicious);
+ ASSERT_FALSE(roundtripped.metadata());
+}
+
+TEST_F(WorkingSetFixture, RecordIdAndIdxStateCanRoundtripThroughSerialization) {
+ member->recordId = RecordId{43};
+ member->keyData.emplace_back(BSON("a" << 1 << "b" << 1), BSON("" << 3 << "" << 4), 8u);
+ member->keyData.emplace_back(BSON("c" << -1), BSON("" << 5), 9u);
+ ws->transitionToRecordIdAndIdx(id);
+ ASSERT_FALSE(member->isSuspicious);
+
+ auto roundtripped = roundtripWsmThroughSerialization(*member);
+ ASSERT_EQ(WorkingSetMember::RID_AND_IDX, roundtripped.getState());
+ ASSERT_EQ(roundtripped.recordId.repr(), 43);
+ ASSERT_EQ(roundtripped.keyData.size(), 2u);
+
+ ASSERT_BSONOBJ_EQ(roundtripped.keyData[0].indexKeyPattern, BSON("a" << 1 << "b" << 1));
+ ASSERT_BSONOBJ_EQ(roundtripped.keyData[0].keyData, BSON("" << 3 << "" << 4));
+ ASSERT_EQ(roundtripped.keyData[0].indexId, 8u);
+
+ ASSERT_BSONOBJ_EQ(roundtripped.keyData[1].indexKeyPattern, BSON("c" << -1));
+ ASSERT_BSONOBJ_EQ(roundtripped.keyData[1].keyData, BSON("" << 5));
+ ASSERT_EQ(roundtripped.keyData[1].indexId, 9u);
+
+ ASSERT_TRUE(roundtripped.isSuspicious);
+ ASSERT_FALSE(roundtripped.metadata());
+}
+
+TEST_F(WorkingSetFixture, WsmWithMetadataCanRoundtripThroughSerialization) {
+ Document doc{{"foo", Value{"bar"_sd}}};
+ member->doc.setValue(doc);
+ member->metadata().setTextScore(42.0);
+ member->metadata().setSearchScore(43.0);
+ ws->transitionToRecordIdAndObj(id);
+ auto roundtripped = roundtripWsmThroughSerialization(*member);
+
+ ASSERT_EQ(WorkingSetMember::RID_AND_OBJ, roundtripped.getState());
+ ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc);
+ ASSERT_FALSE(roundtripped.doc.value().metadata());
+ ASSERT_TRUE(roundtripped.doc.snapshotId().isNull());
+ ASSERT_TRUE(roundtripped.recordId.isNull());
+ ASSERT_FALSE(roundtripped.isSuspicious);
+
+ ASSERT_TRUE(roundtripped.metadata());
+ ASSERT_TRUE(roundtripped.metadata().hasTextScore());
+ ASSERT_EQ(roundtripped.metadata().getTextScore(), 42.0);
+ ASSERT_TRUE(roundtripped.metadata().hasSearchScore());
+ ASSERT_EQ(roundtripped.metadata().getSearchScore(), 43.0);
+ ASSERT_FALSE(roundtripped.metadata().hasGeoNearPoint());
+ ASSERT_FALSE(roundtripped.metadata().hasGeoNearDistance());
+}
+
} // namespace mongo
diff --git a/src/mongo/db/index/sort_key_generator_test.cpp b/src/mongo/db/index/sort_key_generator_test.cpp
index 169d0cb8369..2b65febd058 100644
--- a/src/mongo/db/index/sort_key_generator_test.cpp
+++ b/src/mongo/db/index/sort_key_generator_test.cpp
@@ -279,7 +279,7 @@ public:
}
void setRecordIdAndIdx(BSONObj keyPattern, BSONObj key) {
- _member->keyData.push_back(IndexKeyDatum(std::move(keyPattern), std::move(key), nullptr));
+ _member->keyData.push_back(IndexKeyDatum(std::move(keyPattern), std::move(key), 0));
_workingSet.transitionToRecordIdAndIdx(_wsid);
}
diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp
index adbc3692f5b..cf7c58a819e 100644
--- a/src/mongo/db/pipeline/document.cpp
+++ b/src/mongo/db/pipeline/document.cpp
@@ -323,6 +323,7 @@ intrusive_ptr<DocumentStorage> DocumentStorage::clone() const {
dassert(out->_numFields == _numFields);
}
+ out->_haveLazyLoadedMetadata = _haveLazyLoadedMetadata;
out->_metadataFields = _metadataFields;
return out;
@@ -361,7 +362,7 @@ void DocumentStorage::reset(const BSONObj& bson, bool stripMetadata) {
}
void DocumentStorage::loadLazyMetadata() const {
- if (_metadataFields) {
+ if (_haveLazyLoadedMetadata) {
return;
}
@@ -397,6 +398,8 @@ void DocumentStorage::loadLazyMetadata() const {
}
}
}
+
+ _haveLazyLoadedMetadata = true;
}
Document::Document(const BSONObj& bson) {
diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h
index 28417a6a46a..d521fa5aec7 100644
--- a/src/mongo/db/pipeline/document_internal.h
+++ b/src/mongo/db/pipeline/document_internal.h
@@ -422,6 +422,7 @@ public:
}
DocumentMetadataFields releaseMetadata() {
+ loadLazyMetadata();
return std::move(_metadataFields);
}
@@ -533,6 +534,11 @@ private:
BSONObj _bson;
mutable BSONObjIterator _bsonIt;
+ // If '_stripMetadata' is true, tracks whether or not the metadata has been lazy-loaded from the
+ // backing '_bson' object. If so, then no attempt will be made to load the metadata again, even
+ // if the metadata has been released by a call to 'releaseMetadata()'.
+ mutable bool _haveLazyLoadedMetadata = false;
+
mutable DocumentMetadataFields _metadataFields;
// The storage constructed from a BSON value may contain metadata. When we process the BSON we
diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp
index 6c77a8fa013..027d88037f8 100644
--- a/src/mongo/db/pipeline/document_source_sort.cpp
+++ b/src/mongo/db/pipeline/document_source_sort.cpp
@@ -83,7 +83,7 @@ DocumentSource::GetNextResult DocumentSourceSort::doGetNext() {
invariant(populationResult.isEOF());
}
- auto result = _sortExecutor->getNext();
+ auto result = _sortExecutor->getNextDoc();
if (!result)
return GetNextResult::makeEOF();
return GetNextResult(std::move(*result));
@@ -202,7 +202,7 @@ void DocumentSourceSort::loadDocument(Document&& doc) {
// already computed the sort key we'd have split the pipeline there, would be merging presorted
// documents, and wouldn't use this method.
std::tie(sortKey, docForSorter) = extractSortKey(std::move(doc));
- _sortExecutor->add(sortKey, docForSorter);
+ _sortExecutor->add(sortKey, std::move(docForSorter));
}
void DocumentSourceSort::loadingDone() {
diff --git a/src/mongo/db/storage/snapshot.h b/src/mongo/db/storage/snapshot.h
index 371e4cec29d..75870a7791f 100644
--- a/src/mongo/db/storage/snapshot.h
+++ b/src/mongo/db/storage/snapshot.h
@@ -60,6 +60,10 @@ public:
return std::to_string(_id);
}
+ uint64_t toNumber() const {
+ return _id;
+ }
+
private:
uint64_t _id;
};
diff --git a/src/mongo/dbtests/query_stage_sort_key_generator.cpp b/src/mongo/dbtests/query_stage_sort_key_generator.cpp
index 51fd476078c..bc85c3dfab2 100644
--- a/src/mongo/dbtests/query_stage_sort_key_generator.cpp
+++ b/src/mongo/dbtests/query_stage_sort_key_generator.cpp
@@ -154,8 +154,8 @@ TEST(SortKeyGeneratorStageTest, SortKeyArray) {
TEST(SortKeyGeneratorStageTest, SortKeyCoveredNormal) {
CollatorInterface* collator = nullptr;
- BSONObj actualOut = extractSortKeyCovered(
- "{a: 1}", IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr), collator);
+ BSONObj actualOut =
+ extractSortKeyCovered("{a: 1}", IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0), collator);
BSONObj expectedOut = BSON("" << 5);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
}
@@ -164,7 +164,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredEmbedded) {
CollatorInterface* collator = nullptr;
BSONObj actualOut = extractSortKeyCovered(
"{'a.c': 1}",
- IndexKeyDatum(BSON("a.c" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr),
+ IndexKeyDatum(BSON("a.c" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0),
collator);
BSONObj expectedOut = BSON("" << 5);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
@@ -174,7 +174,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound) {
CollatorInterface* collator = nullptr;
BSONObj actualOut = extractSortKeyCovered(
"{a: 1, c: 1}",
- IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr),
+ IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0),
collator);
BSONObj expectedOut = BSON("" << 5 << "" << 6);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
@@ -185,7 +185,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound2) {
BSONObj actualOut = extractSortKeyCovered("{a: 1, b: 1}",
IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1),
BSON("" << 5 << "" << 6 << "" << 4),
- nullptr),
+ 0),
collator);
BSONObj expectedOut = BSON("" << 5 << "" << 6);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
@@ -197,7 +197,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound3) {
extractSortKeyCovered("{b: 1, c: 1}",
IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1),
BSON("" << 5 << "" << 6 << "" << 4 << "" << 9000),
- nullptr),
+ 0),
collator);
BSONObj expectedOut = BSON("" << 6 << "" << 4);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
@@ -225,7 +225,7 @@ TEST(SortKeyGeneratorStageTest, CollatorAppliesWhenExtractingCoveredSortKeyStrin
IndexKeyDatum(BSON("a" << 1 << "b" << 1),
BSON("" << 4 << ""
<< "foo"),
- nullptr),
+ 0),
&collator);
BSONObj expectedOut = BSON(""
<< "oof");