summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDavid Storch <david.storch@mongodb.com>2019-09-12 21:31:40 +0000
committerevergreen <evergreen@mongodb.com>2019-09-12 21:31:40 +0000
commit9e98a74e5f206bf28f598c7e94a4e3901c7a1bd7 (patch)
tree8de7ef2fcb2eca439a49af570998f637761bebe8 /src
parente3a8d37b8775d90f85cb64f3630cb547a886baa1 (diff)
downloadmongo-9e98a74e5f206bf28f598c7e94a4e3901c7a1bd7.tar.gz
SERVER-42981 Make WorkingSetMember compatible for use with the Sorter.
The bulk of this change is to implement serialization and deserialization routines for WorkingSetMember, so that the Sorter can spill WorkingSetMembers to disk. In addition, this changes the SortExecutor to sort WorkingSetMembers internally as opposed to sorting Documents.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/exec/SConscript1
-rw-r--r--src/mongo/db/exec/count_scan.cpp2
-rw-r--r--src/mongo/db/exec/distinct_scan.cpp4
-rw-r--r--src/mongo/db/exec/idhack.cpp4
-rw-r--r--src/mongo/db/exec/index_scan.cpp4
-rw-r--r--src/mongo/db/exec/near.cpp2
-rw-r--r--src/mongo/db/exec/projection_exec_test.cpp14
-rw-r--r--src/mongo/db/exec/requires_index_stage.cpp4
-rw-r--r--src/mongo/db/exec/requires_index_stage.h12
-rw-r--r--src/mongo/db/exec/sort_executor.cpp36
-rw-r--r--src/mongo/db/exec/sort_executor.h12
-rw-r--r--src/mongo/db/exec/working_set.cpp90
-rw-r--r--src/mongo/db/exec/working_set.h56
-rw-r--r--src/mongo/db/exec/working_set_common.cpp12
-rw-r--r--src/mongo/db/exec/working_set_test.cpp98
-rw-r--r--src/mongo/db/index/sort_key_generator_test.cpp2
-rw-r--r--src/mongo/db/pipeline/document.cpp5
-rw-r--r--src/mongo/db/pipeline/document_internal.h6
-rw-r--r--src/mongo/db/pipeline/document_source_sort.cpp4
-rw-r--r--src/mongo/db/storage/snapshot.h4
-rw-r--r--src/mongo/dbtests/query_stage_sort_key_generator.cpp14
21 files changed, 337 insertions, 49 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript
index 62da7a2a7ea..06fdd5c3238 100644
--- a/src/mongo/db/exec/SConscript
+++ b/src/mongo/db/exec/SConscript
@@ -41,6 +41,7 @@ sortExecutorEnv.Library(
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/s/is_mongos',
'$BUILD_DIR/third_party/shim_snappy',
+ 'working_set',
],
)
diff --git a/src/mongo/db/exec/count_scan.cpp b/src/mongo/db/exec/count_scan.cpp
index c24f3ef1343..be7914299d9 100644
--- a/src/mongo/db/exec/count_scan.cpp
+++ b/src/mongo/db/exec/count_scan.cpp
@@ -74,7 +74,7 @@ const char* CountScan::kStageType = "COUNT_SCAN";
// the CountScanParams rather than resolving them via the IndexDescriptor, since these may differ
// from the descriptor's contents.
CountScan::CountScan(OperationContext* opCtx, CountScanParams params, WorkingSet* workingSet)
- : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor),
+ : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet),
_workingSet(workingSet),
_keyPattern(std::move(params.keyPattern)),
_shouldDedup(params.isMultiKey),
diff --git a/src/mongo/db/exec/distinct_scan.cpp b/src/mongo/db/exec/distinct_scan.cpp
index 3f8c3342e80..24e45ee62d2 100644
--- a/src/mongo/db/exec/distinct_scan.cpp
+++ b/src/mongo/db/exec/distinct_scan.cpp
@@ -47,7 +47,7 @@ using std::vector;
const char* DistinctScan::kStageType = "DISTINCT_SCAN";
DistinctScan::DistinctScan(OperationContext* opCtx, DistinctParams params, WorkingSet* workingSet)
- : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor),
+ : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet),
_workingSet(workingSet),
_keyPattern(std::move(params.keyPattern)),
_scanDirection(params.scanDirection),
@@ -122,7 +122,7 @@ PlanStage::StageState DistinctScan::doWork(WorkingSetID* out) {
WorkingSetID id = _workingSet->allocate();
WorkingSetMember* member = _workingSet->get(id);
member->recordId = kv->loc;
- member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, indexAccessMethod()));
+ member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, workingSetIndexId()));
_workingSet->transitionToRecordIdAndIdx(id);
*out = id;
diff --git a/src/mongo/db/exec/idhack.cpp b/src/mongo/db/exec/idhack.cpp
index 02e347dda4d..64848cb50e6 100644
--- a/src/mongo/db/exec/idhack.cpp
+++ b/src/mongo/db/exec/idhack.cpp
@@ -53,7 +53,7 @@ IDHackStage::IDHackStage(OperationContext* opCtx,
CanonicalQuery* query,
WorkingSet* ws,
const IndexDescriptor* descriptor)
- : RequiresIndexStage(kStageType, opCtx, descriptor),
+ : RequiresIndexStage(kStageType, opCtx, descriptor, ws),
_workingSet(ws),
_key(query->getQueryObj()["_id"].wrap()) {
_specificStats.indexName = descriptor->indexName();
@@ -64,7 +64,7 @@ IDHackStage::IDHackStage(OperationContext* opCtx,
const BSONObj& key,
WorkingSet* ws,
const IndexDescriptor* descriptor)
- : RequiresIndexStage(kStageType, opCtx, descriptor), _workingSet(ws), _key(key) {
+ : RequiresIndexStage(kStageType, opCtx, descriptor, ws), _workingSet(ws), _key(key) {
_specificStats.indexName = descriptor->indexName();
}
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp
index cd32deb7c0d..0e5d940e2f0 100644
--- a/src/mongo/db/exec/index_scan.cpp
+++ b/src/mongo/db/exec/index_scan.cpp
@@ -64,7 +64,7 @@ IndexScan::IndexScan(OperationContext* opCtx,
IndexScanParams params,
WorkingSet* workingSet,
const MatchExpression* filter)
- : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor),
+ : RequiresIndexStage(kStageType, opCtx, params.indexDescriptor, workingSet),
_workingSet(workingSet),
_keyPattern(params.keyPattern.getOwned()),
_bounds(std::move(params.bounds)),
@@ -233,7 +233,7 @@ PlanStage::StageState IndexScan::doWork(WorkingSetID* out) {
WorkingSetID id = _workingSet->allocate();
WorkingSetMember* member = _workingSet->get(id);
member->recordId = kv->loc;
- member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, indexAccessMethod()));
+ member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, workingSetIndexId()));
_workingSet->transitionToRecordIdAndIdx(id);
if (_addKeyMetadata) {
diff --git a/src/mongo/db/exec/near.cpp b/src/mongo/db/exec/near.cpp
index 91a3a60317a..ac16083b0a5 100644
--- a/src/mongo/db/exec/near.cpp
+++ b/src/mongo/db/exec/near.cpp
@@ -47,7 +47,7 @@ NearStage::NearStage(OperationContext* opCtx,
StageType type,
WorkingSet* workingSet,
const IndexDescriptor* indexDescriptor)
- : RequiresIndexStage(typeName, opCtx, indexDescriptor),
+ : RequiresIndexStage(typeName, opCtx, indexDescriptor, workingSet),
_workingSet(workingSet),
_searchState(SearchState_Initializing),
_nextIntervalStats(nullptr),
diff --git a/src/mongo/db/exec/projection_exec_test.cpp b/src/mongo/db/exec/projection_exec_test.cpp
index 33e14d01406..684d3a07f35 100644
--- a/src/mongo/db/exec/projection_exec_test.cpp
+++ b/src/mongo/db/exec/projection_exec_test.cpp
@@ -193,7 +193,7 @@ TEST(ProjectionExecTest, TransformCoveredDottedProjection) {
ASSERT_EQ(boost::make_optional("{ b: { c: 2, d: 3, f: { g: 4, h: 5 } } }"s),
project("{'b.c': 1, 'b.d': 1, 'b.f.g': 1, 'b.f.h': 1}",
"{}",
- IndexKeyDatum(keyPattern, keyData, nullptr)));
+ IndexKeyDatum(keyPattern, keyData, 0)));
}
TEST(ProjectionExecTest, TransformNonCoveredDottedProjection) {
@@ -249,7 +249,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredNormal) {
ASSERT_EQ(boost::make_optional("{ a: 5, b: { : 5 } }"s),
project("{_id: 0, a: 1, b: {$meta: 'sortKey'}}",
"{}",
- IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr),
+ IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0),
boost::none, // collator
BSON("" << 5))); // sortKey
}
@@ -258,7 +258,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredOverwrite) {
ASSERT_EQ(boost::make_optional("{ a: { : 5 } }"s),
project("{_id: 0, a: 1, a: {$meta: 'sortKey'}}",
"{}",
- IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr),
+ IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0),
boost::none, // collator
BSON("" << 5))); // sortKey
}
@@ -267,7 +267,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredAdditionalData) {
ASSERT_EQ(boost::make_optional("{ a: 5, c: 6, b: { : 5 } }"s),
project("{_id: 0, a: 1, b: {$meta: 'sortKey'}, c: 1}",
"{}",
- IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr),
+ IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0),
boost::none, // collator
BSON("" << 5))); // sortKey
}
@@ -276,7 +276,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound) {
ASSERT_EQ(boost::make_optional("{ a: 5, b: { : 5, : 6 } }"s),
project("{_id: 0, a: 1, b: {$meta: 'sortKey'}}",
"{}",
- IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr),
+ IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0),
boost::none, // collator
BSON("" << 5 << "" << 6))); // sortKey
}
@@ -287,7 +287,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound2) {
"{}",
IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1),
BSON("" << 5 << "" << 6 << "" << 4),
- nullptr),
+ 0),
boost::none, // collator
BSON("" << 5 << "" << 6))); // sortKey
}
@@ -298,7 +298,7 @@ TEST(ProjectionExecTest, TransformMetaSortKeyCoveredCompound3) {
"{}",
IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1),
BSON("" << 5 << "" << 6 << "" << 4 << "" << 9000),
- nullptr),
+ 0),
boost::none, // collator
BSON("" << 6 << "" << 4))); // sortKey
}
diff --git a/src/mongo/db/exec/requires_index_stage.cpp b/src/mongo/db/exec/requires_index_stage.cpp
index 307de4b7409..2ff0f373480 100644
--- a/src/mongo/db/exec/requires_index_stage.cpp
+++ b/src/mongo/db/exec/requires_index_stage.cpp
@@ -35,7 +35,8 @@ namespace mongo {
RequiresIndexStage::RequiresIndexStage(const char* stageType,
OperationContext* opCtx,
- const IndexDescriptor* indexDescriptor)
+ const IndexDescriptor* indexDescriptor,
+ WorkingSet* workingSet)
: RequiresCollectionStage(stageType, opCtx, indexDescriptor->getCollection()),
_weakIndexCatalogEntry(collection()->getIndexCatalog()->getEntryShared(indexDescriptor)) {
auto indexCatalogEntry = _weakIndexCatalogEntry.lock();
@@ -44,6 +45,7 @@ RequiresIndexStage::RequiresIndexStage(const char* stageType,
invariant(_indexDescriptor);
invariant(_indexAccessMethod);
_indexName = _indexDescriptor->indexName();
+ _workingSetIndexId = workingSet->registerIndexAccessMethod(_indexAccessMethod);
}
void RequiresIndexStage::doSaveStateRequiresCollection() {
diff --git a/src/mongo/db/exec/requires_index_stage.h b/src/mongo/db/exec/requires_index_stage.h
index 030aa369306..fca94bf0de0 100644
--- a/src/mongo/db/exec/requires_index_stage.h
+++ b/src/mongo/db/exec/requires_index_stage.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/db/exec/requires_collection_stage.h"
+#include "mongo/db/exec/working_set.h"
#include "mongo/db/index/index_descriptor.h"
namespace mongo {
@@ -48,7 +49,8 @@ class RequiresIndexStage : public RequiresCollectionStage {
public:
RequiresIndexStage(const char* stageType,
OperationContext* opCtx,
- const IndexDescriptor* indexDescriptor);
+ const IndexDescriptor* indexDescriptor,
+ WorkingSet* workingSet);
virtual ~RequiresIndexStage() = default;
@@ -75,6 +77,10 @@ protected:
return _indexAccessMethod;
}
+ WorkingSetRegisteredIndexId workingSetIndexId() const {
+ return _workingSetIndexId;
+ }
+
private:
// We keep a weak_ptr to the index catalog entry in order to detect when the underlying catalog
// object has been destroyed, e.g. due to an index drop. In this scenario, the
@@ -93,6 +99,10 @@ private:
const IndexAccessMethod* _indexAccessMethod;
std::string _indexName;
+
+ // An indentifier for the index required by this stage. Any working set member allocated to
+ // represent an index key from this index must include this id.
+ WorkingSetRegisteredIndexId _workingSetIndexId;
};
} // namespace mongo
diff --git a/src/mongo/db/exec/sort_executor.cpp b/src/mongo/db/exec/sort_executor.cpp
index 8fd20cfda46..cbcfaa78b1c 100644
--- a/src/mongo/db/exec/sort_executor.cpp
+++ b/src/mongo/db/exec/sort_executor.cpp
@@ -98,7 +98,25 @@ int SortExecutor::Comparator::operator()(const DocumentSorter::Data& lhs,
return 0;
}
-boost::optional<Document> SortExecutor::getNext() {
+boost::optional<Document> SortExecutor::getNextDoc() {
+ auto wsm = getNextWsm();
+ if (!wsm) {
+ return boost::none;
+ }
+
+ // Ensure that this WorkingSetMember only houses a Document. This guarantees that we are not
+ // discarding data inside the working set member (e.g. the RecordId) when returning the Document
+ // to the caller.
+ invariant(wsm->hasOwnedObj());
+
+ // Transfer metadata from the WorkingSetMember to the Document.
+ MutableDocument mutableDoc{std::move(wsm->doc.value())};
+ mutableDoc.setMetadata(wsm->releaseMetadata());
+
+ return mutableDoc.freeze();
+}
+
+boost::optional<WorkingSetMember> SortExecutor::getNextWsm() {
if (_isEOF) {
return boost::none;
}
@@ -113,6 +131,22 @@ boost::optional<Document> SortExecutor::getNext() {
}
void SortExecutor::add(Value sortKey, Document data) {
+ invariant(data.isOwned());
+ WorkingSetMember wsm;
+
+ // Transfer metadata from the Document to the WorkingSetMember.
+ MutableDocument mutableDoc{std::move(data)};
+ wsm.setMetadata(mutableDoc.releaseMetadata());
+
+ wsm.doc.setValue(mutableDoc.freeze());
+ wsm.transitionToOwnedObj();
+ this->add(std::move(sortKey), std::move(wsm));
+}
+
+void SortExecutor::add(Value sortKey, WorkingSetMember data) {
+ // Metadata should be attached directly to the WSM rather than inside the Document.
+ invariant(!data.doc.value().metadata());
+
if (!_sorter) {
_sorter.reset(DocumentSorter::make(makeSortOptions(), Comparator(_sortPattern)));
}
diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h
index 95741f41256..86ce3fc2687 100644
--- a/src/mongo/db/exec/sort_executor.h
+++ b/src/mongo/db/exec/sort_executor.h
@@ -29,6 +29,7 @@
#pragma once
+#include "mongo/db/exec/working_set.h"
#include "mongo/db/pipeline/document.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/query/sort_pattern.h"
@@ -52,7 +53,9 @@ public:
std::string tempDir,
bool allowDiskUse);
- boost::optional<Document> getNext();
+ boost::optional<Document> getNextDoc();
+
+ boost::optional<WorkingSetMember> getNextWsm();
const SortPattern& sortPattern() const {
return _sortPattern;
@@ -89,8 +92,13 @@ public:
*/
void add(Value, Document);
+ /**
+ * Add a WorkingSetMember with sort key specified by Value to the DocumentSorter.
+ */
+ void add(Value, WorkingSetMember);
+
private:
- using DocumentSorter = Sorter<Value, Document>;
+ using DocumentSorter = Sorter<Value, WorkingSetMember>;
class Comparator {
public:
Comparator(const SortPattern& sortPattern) : _sort(sortPattern) {}
diff --git a/src/mongo/db/exec/working_set.cpp b/src/mongo/db/exec/working_set.cpp
index 1c565cca8cd..2aacba154ab 100644
--- a/src/mongo/db/exec/working_set.cpp
+++ b/src/mongo/db/exec/working_set.cpp
@@ -88,7 +88,7 @@ void WorkingSet::transitionToRecordIdAndIdx(WorkingSetID id) {
void WorkingSet::transitionToRecordIdAndObj(WorkingSetID id) {
WorkingSetMember* member = get(id);
- member->_state = WorkingSetMember::RID_AND_OBJ;
+ member->transitionToRecordIdAndObj();
}
void WorkingSet::transitionToOwnedObj(WorkingSetID id) {
@@ -123,6 +123,9 @@ void WorkingSetMember::transitionToOwnedObj() {
_state = OWNED_OBJ;
}
+void WorkingSetMember::transitionToRecordIdAndObj() {
+ _state = WorkingSetMember::RID_AND_OBJ;
+}
bool WorkingSetMember::hasRecordId() const {
return _state == RID_AND_IDX || _state == RID_AND_OBJ;
@@ -187,4 +190,89 @@ void WorkingSetMember::resetDocument(SnapshotId snapshot, const BSONObj& obj) {
md.reset(obj, false);
doc.value() = md.freeze();
}
+
+void WorkingSetMember::serializeForSorter(BufBuilder& buf) const {
+ // It is not legal to serialize a Document which has metadata attached to it. Any metadata must
+ // reside directly in the WorkingSetMember.
+ invariant(!doc.value().metadata());
+
+ buf.appendChar(static_cast<char>(_state));
+
+ if (hasObj()) {
+ doc.value().serializeForSorter(buf);
+ buf.appendNum(static_cast<unsigned long long>(doc.snapshotId().toNumber()));
+ }
+
+ if (_state == RID_AND_IDX) {
+ // First append the number of index keys, and then encode them in series.
+ buf.appendNum(static_cast<char>(keyData.size()));
+ for (auto&& indexKeyDatum : keyData) {
+ indexKeyDatum.indexKeyPattern.serializeForSorter(buf);
+ indexKeyDatum.keyData.serializeForSorter(buf);
+ buf.appendNum(indexKeyDatum.indexId);
+ }
+ }
+
+ if (hasRecordId()) {
+ buf.appendNum(recordId.repr());
+ }
+
+ _metadata.serializeForSorter(buf);
+}
+
+WorkingSetMember WorkingSetMember::deserializeForSorter(BufReader& buf,
+ const SorterDeserializeSettings&) {
+ WorkingSetMember wsm;
+
+ // First decode the state, which instructs us on how to interpret the rest of the buffer.
+ wsm._state = static_cast<MemberState>(buf.read<char>());
+
+ if (wsm.hasObj()) {
+ wsm.doc.setValue(
+ Document::deserializeForSorter(buf, Document::SorterDeserializeSettings{}));
+ auto snapshotIdRepr = buf.read<LittleEndian<uint64_t>>();
+ auto snapshotId = snapshotIdRepr ? SnapshotId{snapshotIdRepr} : SnapshotId{};
+ wsm.doc.setSnapshotId(snapshotId);
+ }
+
+ if (wsm.getState() == WorkingSetMember::RID_AND_IDX) {
+ auto numKeys = buf.read<char>();
+ wsm.keyData.reserve(numKeys);
+ for (auto i = 0; i < numKeys; ++i) {
+ auto indexKeyPattern =
+ BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings{}).getOwned();
+ auto indexKey =
+ BSONObj::deserializeForSorter(buf, BSONObj::SorterDeserializeSettings{}).getOwned();
+ auto indexId = buf.read<LittleEndian<unsigned int>>();
+ wsm.keyData.push_back(
+ IndexKeyDatum{std::move(indexKeyPattern), std::move(indexKey), indexId});
+ }
+
+ // Mark any working set member representing an index key as suspicious on deserialization.
+ // This is needed because the member may have survived a yield while absent from the working
+ // set.
+ wsm.isSuspicious = true;
+ }
+
+ if (wsm.hasRecordId()) {
+ wsm.recordId = RecordId{buf.read<LittleEndian<int64_t>>()};
+ }
+
+ DocumentMetadataFields::deserializeForSorter(buf, &wsm._metadata);
+
+ return wsm;
+}
+
+WorkingSetRegisteredIndexId WorkingSet::registerIndexAccessMethod(
+ const IndexAccessMethod* indexAccess) {
+ for (WorkingSetRegisteredIndexId i = 0; i < _registeredIndexes.size(); ++i) {
+ if (_registeredIndexes[i] == indexAccess) {
+ return i;
+ }
+ }
+
+ _registeredIndexes.push_back(indexAccess);
+ return _registeredIndexes.size() - 1;
+}
+
} // namespace mongo
diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h
index 6398214ce87..7c5063f0785 100644
--- a/src/mongo/db/exec/working_set.h
+++ b/src/mongo/db/exec/working_set.h
@@ -47,13 +47,21 @@ class WorkingSetMember;
typedef size_t WorkingSetID;
/**
+ * A type used to identify indexes that have been registered with the WorkingSet. A WorkingSetMember
+ * can be associated with a particular index via this id.
+ */
+using WorkingSetRegisteredIndexId = unsigned int;
+
+/**
* The key data extracted from an index. Keeps track of both the key (currently a BSONObj) and
* the index that provided the key. The index key pattern is required to correctly interpret
* the key.
*/
struct IndexKeyDatum {
- IndexKeyDatum(const BSONObj& keyPattern, const BSONObj& key, const IndexAccessMethod* index)
- : indexKeyPattern(keyPattern), keyData(key), index(index) {}
+ IndexKeyDatum(const BSONObj& keyPattern,
+ const BSONObj& key,
+ WorkingSetRegisteredIndexId indexId)
+ : indexKeyPattern(keyPattern), keyData(key), indexId(indexId) {}
/**
* getFieldDotted produces the field with the provided name based on index keyData. The return
@@ -84,7 +92,9 @@ struct IndexKeyDatum {
// This is the BSONObj for the key that we put into the index. Owned by us.
BSONObj keyData;
- const IndexAccessMethod* index;
+ // Associates this index key with an index that has been registered with the WorkingSet. Can be
+ // used to recover pointers to catalog objects for this index from the WorkingSet.
+ WorkingSetRegisteredIndexId indexId;
};
/**
@@ -98,11 +108,6 @@ struct IndexKeyDatum {
*/
class WorkingSetMember {
public:
- /**
- * Reset to an "empty" state.
- */
- void clear();
-
enum MemberState {
// Initial state.
INVALID,
@@ -120,12 +125,23 @@ public:
OWNED_OBJ,
};
+ struct SorterDeserializeSettings {};
+
+ static WorkingSetMember deserializeForSorter(BufReader& buf, const SorterDeserializeSettings&);
+
+ /**
+ * Reset to an "empty" state.
+ */
+ void clear();
+
//
// Member state and state transitions
//
MemberState getState() const;
+ void transitionToRecordIdAndObj();
+
void transitionToOwnedObj();
//
@@ -208,6 +224,12 @@ public:
*/
void resetDocument(SnapshotId snapshot, const BSONObj& obj);
+ void serializeForSorter(BufBuilder& buf) const;
+
+ int memUsageForSorter() const {
+ return getMemUsage();
+ }
+
private:
friend class WorkingSet;
@@ -293,6 +315,20 @@ public:
*/
std::vector<WorkingSetID> getAndClearYieldSensitiveIds();
+ /**
+ * Registers an IndexAccessMethod pointer with the WorkingSet, and returns a handle that can be
+ * used to recover the IndexAccessMethod.
+ */
+ WorkingSetRegisteredIndexId registerIndexAccessMethod(const IndexAccessMethod* indexAccess);
+
+ /**
+ * Returns the IndexAccessMethod for an index that has previously been registered with the
+ * WorkingSet using 'registerIndexAccessMethod()'.
+ */
+ const IndexAccessMethod* retrieveIndexAccessMethod(WorkingSetRegisteredIndexId indexId) const {
+ return _registeredIndexes[indexId];
+ }
+
private:
struct MemberHolder {
// Free list link if freed. Points to self if in use.
@@ -312,6 +348,10 @@ private:
// Contains ids of WSMs that may need to be adjusted when we next yield.
std::vector<WorkingSetID> _yieldSensitiveIds;
+
+ // Holds IndexAccessMethods that have been registered with 'registerIndexAccessMethod()`. The
+ // WorkingSetRegisteredIndexId is the offset into the vector.
+ std::vector<const IndexAccessMethod*> _registeredIndexes;
};
} // namespace mongo
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
index 66f2da690d5..e59d7c92d17 100644
--- a/src/mongo/db/exec/working_set_common.cpp
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -72,10 +72,11 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
member->resetDocument(opCtx->recoveryUnit()->getSnapshotId(), record->data.releaseToBson());
if (member->isSuspicious) {
- // Make sure that all of the keyData is still valid for this copy of the document.
- // This ensures both that index-provided filters and sort orders still hold.
- // TODO provide a way for the query planner to opt out of this checking if it is
- // unneeded due to the structure of the plan.
+ // Make sure that all of the keyData is still valid for this copy of the document. This
+ // ensures both that index-provided filters and sort orders still hold.
+ //
+ // TODO provide a way for the query planner to opt out of this checking if it is unneeded
+ // due to the structure of the plan.
invariant(!member->keyData.empty());
for (size_t i = 0; i < member->keyData.size(); i++) {
KeyStringSet keys;
@@ -83,7 +84,8 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
// be multikey when ensuring the keyData is still valid.
KeyStringSet* multikeyMetadataKeys = nullptr;
MultikeyPaths* multikeyPaths = nullptr;
- auto* iam = member->keyData[i].index;
+ auto indexId = member->keyData[i].indexId;
+ auto* iam = workingSet->retrieveIndexAccessMethod(indexId);
iam->getKeys(member->doc.value().toBson(),
IndexAccessMethod::GetKeysMode::kEnforceConstraints,
&keys,
diff --git a/src/mongo/db/exec/working_set_test.cpp b/src/mongo/db/exec/working_set_test.cpp
index 13015f2fcf9..67513a75a27 100644
--- a/src/mongo/db/exec/working_set_test.cpp
+++ b/src/mongo/db/exec/working_set_test.cpp
@@ -33,7 +33,9 @@
#include "mongo/db/jsobj.h"
#include "mongo/db/json.h"
#include "mongo/db/pipeline/document.h"
+#include "mongo/db/pipeline/document_value_test_util.h"
#include "mongo/db/storage/snapshot.h"
+#include "mongo/unittest/bson_test_util.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/assert_util.h"
@@ -119,7 +121,7 @@ TEST_F(WorkingSetFixture, getFieldFromIndex) {
string secondName = "y";
int secondValue = 10;
- member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), nullptr));
+ member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), 0));
// Also a minor lie as RecordId is bogus.
ws->transitionToRecordIdAndIdx(id);
BSONElement elt;
@@ -129,8 +131,7 @@ TEST_F(WorkingSetFixture, getFieldFromIndex) {
ASSERT_FALSE(member->getFieldDotted("foo", &elt));
// Add another index datum.
- member->keyData.push_back(
- IndexKeyDatum(BSON(secondName << 1), BSON("" << secondValue), nullptr));
+ member->keyData.push_back(IndexKeyDatum(BSON(secondName << 1), BSON("" << secondValue), 0));
ASSERT_TRUE(member->getFieldDotted(secondName, &elt));
ASSERT_EQUALS(elt.numberInt(), secondValue);
ASSERT_TRUE(member->getFieldDotted(firstName, &elt));
@@ -143,7 +144,7 @@ TEST_F(WorkingSetFixture, getDottedFieldFromIndex) {
string firstName = "x.y";
int firstValue = 5;
- member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), nullptr));
+ member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue), 0));
ws->transitionToRecordIdAndIdx(id);
BSONElement elt;
ASSERT_TRUE(member->getFieldDotted(firstName, &elt));
@@ -190,4 +191,93 @@ TEST_F(WorkingSetFixture, MetadataCanBeCorrectlyTransferredBackAndForthFromDocum
ASSERT_TRUE(member->metadata().hasSearchScore());
}
+namespace {
+// Serializes the given working set member to a buffer, then returns a working set member resulting
+// from deserializing this buffer.
+WorkingSetMember roundtripWsmThroughSerialization(const WorkingSetMember& wsm) {
+ BufBuilder builder{};
+ wsm.serializeForSorter(builder);
+ BufReader reader{builder.buf(), static_cast<unsigned>(builder.len())};
+ return WorkingSetMember::deserializeForSorter(reader,
+ WorkingSetMember::SorterDeserializeSettings{});
+}
+} // namespace
+
+TEST_F(WorkingSetFixture, RecordIdAndObjStateCanRoundtripThroughSerialization) {
+ Document doc{{"foo", Value{"bar"_sd}}};
+ member->doc.setValue(doc);
+ member->doc.setSnapshotId(SnapshotId{42u});
+ member->recordId = RecordId{43};
+ ws->transitionToRecordIdAndObj(id);
+ auto roundtripped = roundtripWsmThroughSerialization(*member);
+ ASSERT_EQ(WorkingSetMember::RID_AND_OBJ, roundtripped.getState());
+ ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc);
+ ASSERT_EQ(roundtripped.doc.snapshotId().toNumber(), 42u);
+ ASSERT_EQ(roundtripped.recordId.repr(), 43);
+ ASSERT_FALSE(roundtripped.isSuspicious);
+ ASSERT_FALSE(roundtripped.metadata());
+}
+
+TEST_F(WorkingSetFixture, OwnedObjStateCanRoundtripThroughSerialization) {
+ Document doc{{"foo", Value{"bar"_sd}}};
+ member->doc.setValue(doc);
+ member->doc.setSnapshotId(SnapshotId{42u});
+ ws->transitionToOwnedObj(id);
+ auto roundtripped = roundtripWsmThroughSerialization(*member);
+ ASSERT_EQ(WorkingSetMember::OWNED_OBJ, roundtripped.getState());
+ ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc);
+ ASSERT_EQ(roundtripped.doc.snapshotId().toNumber(), 42u);
+ ASSERT(roundtripped.recordId.isNull());
+ ASSERT_FALSE(roundtripped.isSuspicious);
+ ASSERT_FALSE(roundtripped.metadata());
+}
+
+TEST_F(WorkingSetFixture, RecordIdAndIdxStateCanRoundtripThroughSerialization) {
+ member->recordId = RecordId{43};
+ member->keyData.emplace_back(BSON("a" << 1 << "b" << 1), BSON("" << 3 << "" << 4), 8u);
+ member->keyData.emplace_back(BSON("c" << -1), BSON("" << 5), 9u);
+ ws->transitionToRecordIdAndIdx(id);
+ ASSERT_FALSE(member->isSuspicious);
+
+ auto roundtripped = roundtripWsmThroughSerialization(*member);
+ ASSERT_EQ(WorkingSetMember::RID_AND_IDX, roundtripped.getState());
+ ASSERT_EQ(roundtripped.recordId.repr(), 43);
+ ASSERT_EQ(roundtripped.keyData.size(), 2u);
+
+ ASSERT_BSONOBJ_EQ(roundtripped.keyData[0].indexKeyPattern, BSON("a" << 1 << "b" << 1));
+ ASSERT_BSONOBJ_EQ(roundtripped.keyData[0].keyData, BSON("" << 3 << "" << 4));
+ ASSERT_EQ(roundtripped.keyData[0].indexId, 8u);
+
+ ASSERT_BSONOBJ_EQ(roundtripped.keyData[1].indexKeyPattern, BSON("c" << -1));
+ ASSERT_BSONOBJ_EQ(roundtripped.keyData[1].keyData, BSON("" << 5));
+ ASSERT_EQ(roundtripped.keyData[1].indexId, 9u);
+
+ ASSERT_TRUE(roundtripped.isSuspicious);
+ ASSERT_FALSE(roundtripped.metadata());
+}
+
+TEST_F(WorkingSetFixture, WsmWithMetadataCanRoundtripThroughSerialization) {
+ Document doc{{"foo", Value{"bar"_sd}}};
+ member->doc.setValue(doc);
+ member->metadata().setTextScore(42.0);
+ member->metadata().setSearchScore(43.0);
+ ws->transitionToRecordIdAndObj(id);
+ auto roundtripped = roundtripWsmThroughSerialization(*member);
+
+ ASSERT_EQ(WorkingSetMember::RID_AND_OBJ, roundtripped.getState());
+ ASSERT_DOCUMENT_EQ(roundtripped.doc.value(), doc);
+ ASSERT_FALSE(roundtripped.doc.value().metadata());
+ ASSERT_TRUE(roundtripped.doc.snapshotId().isNull());
+ ASSERT_TRUE(roundtripped.recordId.isNull());
+ ASSERT_FALSE(roundtripped.isSuspicious);
+
+ ASSERT_TRUE(roundtripped.metadata());
+ ASSERT_TRUE(roundtripped.metadata().hasTextScore());
+ ASSERT_EQ(roundtripped.metadata().getTextScore(), 42.0);
+ ASSERT_TRUE(roundtripped.metadata().hasSearchScore());
+ ASSERT_EQ(roundtripped.metadata().getSearchScore(), 43.0);
+ ASSERT_FALSE(roundtripped.metadata().hasGeoNearPoint());
+ ASSERT_FALSE(roundtripped.metadata().hasGeoNearDistance());
+}
+
} // namespace mongo
diff --git a/src/mongo/db/index/sort_key_generator_test.cpp b/src/mongo/db/index/sort_key_generator_test.cpp
index 169d0cb8369..2b65febd058 100644
--- a/src/mongo/db/index/sort_key_generator_test.cpp
+++ b/src/mongo/db/index/sort_key_generator_test.cpp
@@ -279,7 +279,7 @@ public:
}
void setRecordIdAndIdx(BSONObj keyPattern, BSONObj key) {
- _member->keyData.push_back(IndexKeyDatum(std::move(keyPattern), std::move(key), nullptr));
+ _member->keyData.push_back(IndexKeyDatum(std::move(keyPattern), std::move(key), 0));
_workingSet.transitionToRecordIdAndIdx(_wsid);
}
diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp
index adbc3692f5b..cf7c58a819e 100644
--- a/src/mongo/db/pipeline/document.cpp
+++ b/src/mongo/db/pipeline/document.cpp
@@ -323,6 +323,7 @@ intrusive_ptr<DocumentStorage> DocumentStorage::clone() const {
dassert(out->_numFields == _numFields);
}
+ out->_haveLazyLoadedMetadata = _haveLazyLoadedMetadata;
out->_metadataFields = _metadataFields;
return out;
@@ -361,7 +362,7 @@ void DocumentStorage::reset(const BSONObj& bson, bool stripMetadata) {
}
void DocumentStorage::loadLazyMetadata() const {
- if (_metadataFields) {
+ if (_haveLazyLoadedMetadata) {
return;
}
@@ -397,6 +398,8 @@ void DocumentStorage::loadLazyMetadata() const {
}
}
}
+
+ _haveLazyLoadedMetadata = true;
}
Document::Document(const BSONObj& bson) {
diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h
index 28417a6a46a..d521fa5aec7 100644
--- a/src/mongo/db/pipeline/document_internal.h
+++ b/src/mongo/db/pipeline/document_internal.h
@@ -422,6 +422,7 @@ public:
}
DocumentMetadataFields releaseMetadata() {
+ loadLazyMetadata();
return std::move(_metadataFields);
}
@@ -533,6 +534,11 @@ private:
BSONObj _bson;
mutable BSONObjIterator _bsonIt;
+ // If '_stripMetadata' is true, tracks whether or not the metadata has been lazy-loaded from the
+ // backing '_bson' object. If so, then no attempt will be made to load the metadata again, even
+ // if the metadata has been released by a call to 'releaseMetadata()'.
+ mutable bool _haveLazyLoadedMetadata = false;
+
mutable DocumentMetadataFields _metadataFields;
// The storage constructed from a BSON value may contain metadata. When we process the BSON we
diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp
index 6c77a8fa013..027d88037f8 100644
--- a/src/mongo/db/pipeline/document_source_sort.cpp
+++ b/src/mongo/db/pipeline/document_source_sort.cpp
@@ -83,7 +83,7 @@ DocumentSource::GetNextResult DocumentSourceSort::doGetNext() {
invariant(populationResult.isEOF());
}
- auto result = _sortExecutor->getNext();
+ auto result = _sortExecutor->getNextDoc();
if (!result)
return GetNextResult::makeEOF();
return GetNextResult(std::move(*result));
@@ -202,7 +202,7 @@ void DocumentSourceSort::loadDocument(Document&& doc) {
// already computed the sort key we'd have split the pipeline there, would be merging presorted
// documents, and wouldn't use this method.
std::tie(sortKey, docForSorter) = extractSortKey(std::move(doc));
- _sortExecutor->add(sortKey, docForSorter);
+ _sortExecutor->add(sortKey, std::move(docForSorter));
}
void DocumentSourceSort::loadingDone() {
diff --git a/src/mongo/db/storage/snapshot.h b/src/mongo/db/storage/snapshot.h
index 371e4cec29d..75870a7791f 100644
--- a/src/mongo/db/storage/snapshot.h
+++ b/src/mongo/db/storage/snapshot.h
@@ -60,6 +60,10 @@ public:
return std::to_string(_id);
}
+ uint64_t toNumber() const {
+ return _id;
+ }
+
private:
uint64_t _id;
};
diff --git a/src/mongo/dbtests/query_stage_sort_key_generator.cpp b/src/mongo/dbtests/query_stage_sort_key_generator.cpp
index 51fd476078c..bc85c3dfab2 100644
--- a/src/mongo/dbtests/query_stage_sort_key_generator.cpp
+++ b/src/mongo/dbtests/query_stage_sort_key_generator.cpp
@@ -154,8 +154,8 @@ TEST(SortKeyGeneratorStageTest, SortKeyArray) {
TEST(SortKeyGeneratorStageTest, SortKeyCoveredNormal) {
CollatorInterface* collator = nullptr;
- BSONObj actualOut = extractSortKeyCovered(
- "{a: 1}", IndexKeyDatum(BSON("a" << 1), BSON("" << 5), nullptr), collator);
+ BSONObj actualOut =
+ extractSortKeyCovered("{a: 1}", IndexKeyDatum(BSON("a" << 1), BSON("" << 5), 0), collator);
BSONObj expectedOut = BSON("" << 5);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
}
@@ -164,7 +164,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredEmbedded) {
CollatorInterface* collator = nullptr;
BSONObj actualOut = extractSortKeyCovered(
"{'a.c': 1}",
- IndexKeyDatum(BSON("a.c" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr),
+ IndexKeyDatum(BSON("a.c" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0),
collator);
BSONObj expectedOut = BSON("" << 5);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
@@ -174,7 +174,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound) {
CollatorInterface* collator = nullptr;
BSONObj actualOut = extractSortKeyCovered(
"{a: 1, c: 1}",
- IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), nullptr),
+ IndexKeyDatum(BSON("a" << 1 << "c" << 1), BSON("" << 5 << "" << 6), 0),
collator);
BSONObj expectedOut = BSON("" << 5 << "" << 6);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
@@ -185,7 +185,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound2) {
BSONObj actualOut = extractSortKeyCovered("{a: 1, b: 1}",
IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1),
BSON("" << 5 << "" << 6 << "" << 4),
- nullptr),
+ 0),
collator);
BSONObj expectedOut = BSON("" << 5 << "" << 6);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
@@ -197,7 +197,7 @@ TEST(SortKeyGeneratorStageTest, SortKeyCoveredCompound3) {
extractSortKeyCovered("{b: 1, c: 1}",
IndexKeyDatum(BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1),
BSON("" << 5 << "" << 6 << "" << 4 << "" << 9000),
- nullptr),
+ 0),
collator);
BSONObj expectedOut = BSON("" << 6 << "" << 4);
ASSERT_BSONOBJ_EQ(actualOut, expectedOut);
@@ -225,7 +225,7 @@ TEST(SortKeyGeneratorStageTest, CollatorAppliesWhenExtractingCoveredSortKeyStrin
IndexKeyDatum(BSON("a" << 1 << "b" << 1),
BSON("" << 4 << ""
<< "foo"),
- nullptr),
+ 0),
&collator);
BSONObj expectedOut = BSON(""
<< "oof");