summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIan Boros <ian.boros@mongodb.com>2022-04-27 11:01:18 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-04-28 16:01:18 +0000
commitcadaba712c0f063816faa766098725da73ad073e (patch)
tree129440f94e785a26b0d0b077e99d0ada8ae72962 /src
parentd40ef71d68e67e4b55ffc8c1274a38ed4ead1f97 (diff)
downloadmongo-cadaba712c0f063816faa766098725da73ad073e.tar.gz
SERVER-66022 Add fake column index and SBE integration
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/exec/fake_column_cursor.h192
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.cpp217
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.h36
-rw-r--r--src/mongo/db/storage/column_store.h13
4 files changed, 424 insertions, 34 deletions
diff --git a/src/mongo/db/exec/fake_column_cursor.h b/src/mongo/db/exec/fake_column_cursor.h
new file mode 100644
index 00000000000..5ffde9ec3a6
--- /dev/null
+++ b/src/mongo/db/exec/fake_column_cursor.h
@@ -0,0 +1,192 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/exec/sbe/values/bson.h"
+#include "mongo/db/exec/sbe/values/value.h"
+#include "mongo/db/index/column_key_generator.h"
+#include "mongo/db/storage/column_store.h"
+
+namespace mongo::sbe {
+
+struct FakeCell {
+ ~FakeCell() {
+ for (size_t i = 0; i < tags.size(); ++i) {
+ value::releaseValue(tags[i], vals[i]);
+ }
+ }
+ FakeCell(const FakeCell&) = delete;
+ FakeCell(FakeCell&&) = default;
+
+ FakeCell& operator=(const FakeCell&) = delete;
+ FakeCell& operator=(FakeCell&&) = default;
+
+
+ // This really doesn't belong in the cell, but it's convenient to put here.
+ RecordId rid;
+
+ std::string arrayInfo;
+
+ // Owned here.
+ std::vector<value::TypeTags> tags;
+ std::vector<value::Value> vals;
+
+ bool hasDuplicateFields;
+ bool hasSubPaths;
+ bool isSparse;
+ bool hasDoubleNestedArrays;
+};
+
+/**
+ * Mock cursor for a single path in a column index. We use this instead of mocking the
+ * ColumnStore::Cursor class because that allows traversing through the entire index, which
+ * requires knowledge of all fields present across all documents.
+ */
+class FakeCursorForPath {
+public:
+ FakeCursorForPath(StringData path, std::unique_ptr<SeekableRecordCursor> cursor)
+ : _path(path.toString()), _cursor(std::move(cursor)) {}
+ boost::optional<FakeCell> next() {
+ if (_eof)
+ return {};
+ return nextUntilResultIsFound(nullptr);
+ }
+ boost::optional<FakeCell> seekAtOrPast(RecordId rid) {
+ auto result = _cursor->seekNear(rid);
+ if (!result) {
+ return {};
+ }
+
+ // seekNear() will return the key directionally _before_ the one requested if the requested
+ // key does not exist.
+ if (result->id < rid) {
+ return nextUntilResultIsFound(nullptr);
+ }
+
+ return nextUntilResultIsFound(&*result);
+ }
+ boost::optional<FakeCell> seekExact(RecordId rid) {
+ auto res = _cursor->seekExact(rid);
+ if (res) {
+ if (auto cell = extractCellForRecord(*res)) {
+ return cell;
+ }
+ }
+ return {};
+ }
+
+ void save() {
+ if (_eof) {
+ return saveUnpositioned();
+ }
+ _cursor->save();
+ }
+ void saveUnpositioned() {
+ _eof = true;
+ _cursor->saveUnpositioned();
+ }
+
+ void restore() {
+ _cursor->restore();
+ }
+
+ void detachFromOperationContext() {
+ _cursor->detachFromOperationContext();
+ }
+ void reattachToOperationContext(OperationContext* opCtx) {
+ _cursor->reattachToOperationContext(opCtx);
+ }
+
+ const PathValue& path() const {
+ return _path;
+ }
+
+private:
+ boost::optional<FakeCell> extractCellForRecord(Record record) {
+ boost::optional<FakeCell> ret;
+ BSONObj bsonObj(record.data.data());
+
+ column_keygen::visitCellsForInsert(
+ bsonObj, [&](PathView path, const column_keygen::UnencodedCellView& cellView) {
+ if (path != _path) {
+ return;
+ }
+
+ std::vector<value::TypeTags> tags;
+ std::vector<value::Value> vals;
+
+ for (auto elem : cellView.vals) {
+ auto [t, v] = bson::convertFrom<false>(elem);
+ tags.push_back(t);
+ vals.push_back(v);
+ }
+
+ ret = FakeCell{record.id,
+ cellView.arrayInfo.toString(),
+ std::move(tags),
+ std::move(vals),
+ cellView.hasDuplicateFields,
+ cellView.hasSubPaths,
+ cellView.isSparse,
+ cellView.hasDoubleNestedArrays};
+ });
+ return ret;
+ }
+
+ /*
+ * Iterates the underlying cursor until a result is found which contains _path and can be
+ * returned.
+ *
+ * If 'startRecord' is not null, it is assumed that the cursor is positioned at 'startRecord'
+ * and 'startRecord' will be considered as a possible record to return data from. Otherwise,
+ * the first record considered will be the next result from the cursor.
+ */
+ boost::optional<FakeCell> nextUntilResultIsFound(Record* startRecord) {
+ boost::optional<Record> currentRecord;
+ if (startRecord) {
+ currentRecord = *startRecord;
+ } else {
+ currentRecord = _cursor->next();
+ }
+
+ while (currentRecord) {
+ if (auto ret = extractCellForRecord(*currentRecord)) {
+ return ret;
+ }
+ currentRecord = _cursor->next();
+ }
+
+ return boost::none;
+ }
+ const PathValue _path;
+ bool _eof = false;
+ const std::unique_ptr<SeekableRecordCursor> _cursor;
+};
+} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index 4c1e1d7da46..ef70102a8c0 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -32,6 +32,7 @@
#include "mongo/db/exec/sbe/expressions/expression.h"
#include "mongo/db/exec/sbe/size_estimator.h"
+#include "mongo/db/exec/sbe/values/columnar.h"
namespace mongo {
namespace sbe {
@@ -127,12 +128,19 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId
}
void ColumnScanStage::doSaveState(bool relinquishCursor) {
- if (_cursor && relinquishCursor) {
- _cursor->save();
+ if (_rowStoreCursor && relinquishCursor) {
+ _rowStoreCursor->save();
}
- if (_cursor) {
- _cursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor);
+ if (_rowStoreCursor) {
+ _rowStoreCursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor);
+ }
+
+ for (auto& cursor : _columnCursors) {
+ cursor.cursor->save();
+ }
+ for (auto& [path, cursor] : _parentPathCursors) {
+ cursor->save();
}
_coll.reset();
@@ -150,23 +158,42 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) {
tassert(6298603, "Catalog epoch should be initialized", _catalogEpoch);
_coll = restoreCollection(_opCtx, *_collName, _collUuid, *_catalogEpoch);
- if (_cursor) {
+ if (_rowStoreCursor) {
if (relinquishCursor) {
- const bool couldRestore = _cursor->restore();
+ const bool couldRestore = _rowStoreCursor->restore();
invariant(couldRestore);
}
}
+
+ for (auto& cursor : _columnCursors) {
+ cursor.cursor->restore();
+ }
+ for (auto& [path, cursor] : _parentPathCursors) {
+ cursor->restore();
+ }
}
void ColumnScanStage::doDetachFromOperationContext() {
- if (_cursor) {
- _cursor->detachFromOperationContext();
+ if (_rowStoreCursor) {
+ _rowStoreCursor->detachFromOperationContext();
+ }
+ for (auto& cursor : _columnCursors) {
+ cursor.cursor->detachFromOperationContext();
+ }
+ for (auto& [path, cursor] : _parentPathCursors) {
+ cursor->detachFromOperationContext();
}
}
void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) {
- if (_cursor) {
- _cursor->reattachToOperationContext(opCtx);
+ if (_rowStoreCursor) {
+ _rowStoreCursor->reattachToOperationContext(opCtx);
+ }
+ for (auto& cursor : _columnCursors) {
+ cursor.cursor->reattachToOperationContext(opCtx);
+ }
+ for (auto& [path, cursor] : _parentPathCursors) {
+ cursor->reattachToOperationContext(opCtx);
}
}
@@ -189,27 +216,92 @@ void ColumnScanStage::open(bool reOpen) {
if (_open) {
tassert(6298604, "reopened ColumnScanStage but reOpen=false", reOpen);
tassert(6298605, "ColumnScanStage is open but _coll is not null", _coll);
- tassert(6298606, "ColumnScanStage is open but don't have _cursor", _cursor);
+ tassert(6298606, "ColumnScanStage is open but don't have _rowStoreCursor", _rowStoreCursor);
} else {
tassert(6298607, "first open to ColumnScanStage but reOpen=true", !reOpen);
if (!_coll) {
// We're being opened after 'close()'. We need to re-acquire '_coll' in this case and
// make some validity checks (the collection has not been dropped, renamed, etc.).
- tassert(6298608, "ColumnScanStage is not open but have _cursor", !_cursor);
+ tassert(
+ 6298608, "ColumnScanStage is not open but have _rowStoreCursor", !_rowStoreCursor);
tassert(6298609, "Collection name should be initialized", _collName);
tassert(6298610, "Catalog epoch should be initialized", _catalogEpoch);
_coll = restoreCollection(_opCtx, *_collName, _collUuid, *_catalogEpoch);
}
}
- if (!_cursor) {
- _cursor = _coll->getCursor(_opCtx, true);
+ if (!_rowStoreCursor) {
+ _rowStoreCursor = _coll->getCursor(_opCtx, true);
+ }
+
+ if (_columnCursors.empty()) {
+ // We fake the special Row ID column by just using _id, for now.
+ _columnCursors.push_back(ColumnCursor{
+ std::make_unique<FakeCursorForPath>("_id", _coll->getCursor(_opCtx, true)),
+ boost::none,
+ false /* add to document */
+ });
+
+ for (auto&& path : _paths) {
+ _columnCursors.push_back(ColumnCursor{
+ std::make_unique<FakeCursorForPath>(path, _coll->getCursor(_opCtx, true)),
+ boost::none,
+ true /* add to document */
+ });
+ }
}
_open = true;
_firstGetNext = true;
}
+void ColumnScanStage::readParentsIntoObj(StringData path,
+ value::Object* outObj,
+ StringDataSet* pathsReadSetOut,
+ bool first) {
+ auto parent = ColumnStore::getParentPath(path);
+
+ // If a top-level path doesn't exist, it just doesn't exist. It can't exist in some places
+ // within a document but not others. No further inspection is necessary.
+
+ if (!parent) {
+ return;
+ }
+
+ if (pathsReadSetOut->contains(*parent)) {
+ // We've already read the parent in, so skip it.
+ return;
+ }
+
+ // Create the parent path cursor if necessary.
+
+ // First we try to emplace a nullptr, so that we avoid creating the cursor when we don't have
+ // to.
+ auto [it, inserted] = _parentPathCursors.try_emplace(*parent, nullptr);
+
+ // If we inserted a new entry, replace the null with an actual cursor.
+ if (inserted) {
+ invariant(it->second == nullptr);
+ it->second = std::make_unique<FakeCursorForPath>(*parent, _coll->getCursor(_opCtx, true));
+ }
+
+ auto optCell = it->second->seekExact(_recordId);
+ pathsReadSetOut->insert(*parent);
+
+ if (!optCell || optCell->isSparse) {
+ // We need this cell's parent too.
+ readParentsIntoObj(*parent, outObj, pathsReadSetOut, false);
+ }
+
+ if (optCell) {
+ auto translatedCell =
+ TranslatedCell{optCell->arrayInfo, *parent, optCell->tags, optCell->vals};
+
+ addCellToObject(translatedCell, *outObj);
+ }
+}
+
+
PlanState ColumnScanStage::getNext() {
auto optTimer(getOptTimer(_opCtx));
@@ -217,32 +309,90 @@ PlanState ColumnScanStage::getNext() {
// case it yields as the state will be completely overwritten after the next() call.
disableSlotAccess();
- // This call to checkForInterrupt() may result in a call to save() or restore() on the entire
- // PlanStage tree if a yield occurs. It's important that we call checkForInterrupt() before
- // checking '_needsToCheckCappedPositionLost' since a call to restoreState() may set
- // '_needsToCheckCappedPositionLost'.
checkForInterrupt(_opCtx);
- auto nextRecord = _cursor->next();
- _firstGetNext = false;
+ if (_firstGetNext) {
+ for (size_t i = 0; i < _columnCursors.size(); ++i) {
+ _columnCursors[i].next();
+ }
+ _firstGetNext = false;
+ }
+
+ // Find minimum record ID of all column cursors.
+ _recordId = RecordId();
+ for (auto& cursor : _columnCursors) {
+ auto& result = cursor.lastCell;
+ if (result && (_recordId.isNull() || result->rid < _recordId)) {
+ _recordId = result->rid;
+ }
+ }
- if (!nextRecord) {
+ if (_recordId.isNull()) {
return trackPlanState(PlanState::IS_EOF);
}
- if (_recordIdAccessor) {
- _recordId = nextRecord->id;
- _recordIdAccessor->reset(
- false, value::TypeTags::RecordId, value::bitcastFrom<RecordId*>(&_recordId));
+ auto [outTag, outVal] = value::makeNewObject();
+ auto& outObj = *value::bitcastTo<value::Object*>(outVal);
+ value::ValueGuard materializedObjGuard(outTag, outVal);
+
+ StringDataSet parentPathsRead;
+ bool useRowStore = false;
+ for (size_t i = 0; i < _columnCursors.size(); ++i) {
+ auto& lastCell = _columnCursors[i].lastCell;
+
+ const FakeCell* cellForRid =
+ (lastCell && lastCell->rid == _recordId) ? lastCell.get_ptr() : nullptr;
+ const auto& path = _columnCursors[i].cursor->path();
+
+ if (cellForRid && (cellForRid->hasSubPaths || cellForRid->hasDuplicateFields)) {
+ useRowStore = true;
+ } else if (!useRowStore && _columnCursors[i].includeInOutput) {
+ if (!cellForRid || cellForRid->isSparse) {
+ // Must read in the parent information first.
+ readParentsIntoObj(path, &outObj, &parentPathsRead);
+ }
+
+ if (cellForRid) {
+ auto translatedCell = TranslatedCell{cellForRid->arrayInfo,
+ _columnCursors[i].path(),
+ cellForRid->tags,
+ cellForRid->vals};
+
+ addCellToObject(translatedCell, outObj);
+ }
+ }
+
+ if (cellForRid) {
+ _columnCursors[i].next();
+ }
}
- _rowStoreAccessor->reset(false,
- value::TypeTags::bsonObject,
- value::bitcastFrom<const char*>(nextRecord->data.data()));
+ if (useRowStore) {
+ std::cout << "Using row store for rid " << _recordId << std::endl;
- if (_recordExpr) {
- auto [owned, tag, val] = _bytecode.run(_recordExprCode.get());
- _recordAccessor->reset(owned, tag, val);
+ // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead do
+ // a next() which should be much cheaper.
+ auto record = _rowStoreCursor->seekExact(_recordId);
+
+ // If there's no record, the index is out of sync with the row store.
+ invariant(record);
+
+ _rowStoreAccessor->reset(false,
+ value::TypeTags::bsonObject,
+ value::bitcastFrom<const char*>(record->data.data()));
+
+ if (_recordExpr) {
+ auto [owned, tag, val] = _bytecode.run(_recordExprCode.get());
+ _recordAccessor->reset(owned, tag, val);
+ }
+ } else {
+ _recordAccessor->reset(true, outTag, outVal);
+ materializedObjGuard.reset();
+ }
+
+ if (_recordIdAccessor) {
+ _recordIdAccessor->reset(
+ false, value::TypeTags::RecordId, value::bitcastFrom<RecordId*>(&_recordId));
}
for (size_t idx = 0; idx < _outputFields.size(); ++idx) {
@@ -267,8 +417,10 @@ void ColumnScanStage::close() {
auto optTimer(getOptTimer(_opCtx));
trackClose();
- _cursor.reset();
+ _rowStoreCursor.reset();
_coll.reset();
+ _columnCursors.clear();
+ _parentPathCursors.clear();
_open = false;
}
@@ -348,6 +500,5 @@ size_t ColumnScanStage::estimateCompileTimeSize() const {
size += size_estimator::estimate(_specificStats);
return size;
}
-
} // namespace sbe
} // namespace mongo
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h
index 22ca42ee401..e555d10a114 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.h
+++ b/src/mongo/db/exec/sbe/stages/column_scan.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/config.h"
+#include "mongo/db/exec/fake_column_cursor.h"
#include "mongo/db/exec/sbe/expressions/expression.h"
#include "mongo/db/exec/sbe/stages/collection_helpers.h"
#include "mongo/db/exec/sbe/stages/stages.h"
@@ -77,6 +78,36 @@ protected:
TrialRunTracker* tracker, TrialRunTrackerAttachResultMask childrenAttachResult) override;
private:
+ struct ColumnCursor {
+ std::unique_ptr<FakeCursorForPath> cursor;
+ boost::optional<FakeCell> lastCell;
+ bool includeInOutput = false;
+
+ boost::optional<FakeCell>& next() {
+ // TODO For some reason the destructor of 'lastCell' is not called
+ // on my local asan build unless we explicitly reset it. Maybe
+ // the same compiler bug Nikita ran into?
+ lastCell.reset();
+ lastCell = cursor->next();
+ return lastCell;
+ }
+
+ boost::optional<FakeCell>& seekAtOrPast(RecordId id) {
+ lastCell.reset();
+ lastCell = cursor->seekAtOrPast(id);
+ return lastCell;
+ }
+
+ const PathValue& path() const {
+ return cursor->path();
+ }
+ };
+
+ void readParentsIntoObj(StringData path,
+ value::Object* out,
+ StringDataSet* pathsReadSetOut,
+ bool first = true);
+
const UUID _collUuid;
const std::string _columnIndexName;
const value::SlotVector _fieldSlots;
@@ -109,7 +140,10 @@ private:
CollectionPtr _coll;
- std::unique_ptr<SeekableRecordCursor> _cursor;
+ std::unique_ptr<SeekableRecordCursor> _rowStoreCursor;
+
+ std::vector<ColumnCursor> _columnCursors;
+ StringMap<std::unique_ptr<FakeCursorForPath>> _parentPathCursors;
RecordId _recordId;
diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h
index 0d1c3c81d03..f8cf708ff83 100644
--- a/src/mongo/db/storage/column_store.h
+++ b/src/mongo/db/storage/column_store.h
@@ -319,6 +319,19 @@ public:
return res;
}
+ /**
+ * Returns the parent path for the given path, if there is one.
+ */
+ static boost::optional<PathView> getParentPath(PathView path) {
+ auto lastDot = path.rfind('.');
+ if (lastDot == std::string::npos) {
+ return {};
+ }
+
+ return path.substr(0, lastDot);
+ }
+
+
protected:
class Cursor {
public: