diff options
author | Ian Boros <ian.boros@mongodb.com> | 2022-04-27 11:01:18 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-04-28 16:01:18 +0000 |
commit | cadaba712c0f063816faa766098725da73ad073e (patch) | |
tree | 129440f94e785a26b0d0b077e99d0ada8ae72962 /src | |
parent | d40ef71d68e67e4b55ffc8c1274a38ed4ead1f97 (diff) | |
download | mongo-cadaba712c0f063816faa766098725da73ad073e.tar.gz |
SERVER-66022 Add fake column index and SBE integration
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/exec/fake_column_cursor.h | 192 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.cpp | 217 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.h | 36 | ||||
-rw-r--r-- | src/mongo/db/storage/column_store.h | 13 |
4 files changed, 424 insertions, 34 deletions
diff --git a/src/mongo/db/exec/fake_column_cursor.h b/src/mongo/db/exec/fake_column_cursor.h new file mode 100644 index 00000000000..5ffde9ec3a6 --- /dev/null +++ b/src/mongo/db/exec/fake_column_cursor.h @@ -0,0 +1,192 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/exec/sbe/values/bson.h" +#include "mongo/db/exec/sbe/values/value.h" +#include "mongo/db/index/column_key_generator.h" +#include "mongo/db/storage/column_store.h" + +namespace mongo::sbe { + +struct FakeCell { + ~FakeCell() { + for (size_t i = 0; i < tags.size(); ++i) { + value::releaseValue(tags[i], vals[i]); + } + } + FakeCell(const FakeCell&) = delete; + FakeCell(FakeCell&&) = default; + + FakeCell& operator=(const FakeCell&) = delete; + FakeCell& operator=(FakeCell&&) = default; + + + // This really doesn't belong in the cell, but it's convenient to put here. + RecordId rid; + + std::string arrayInfo; + + // Owned here. + std::vector<value::TypeTags> tags; + std::vector<value::Value> vals; + + bool hasDuplicateFields; + bool hasSubPaths; + bool isSparse; + bool hasDoubleNestedArrays; +}; + +/** + * Mock cursor for a single path in a column index. We use this instead of mocking the + * ColumnStore::Cursor class because that allows traversing through the entire index, which + * requires knowledge of all fields present across all documents. + */ +class FakeCursorForPath { +public: + FakeCursorForPath(StringData path, std::unique_ptr<SeekableRecordCursor> cursor) + : _path(path.toString()), _cursor(std::move(cursor)) {} + boost::optional<FakeCell> next() { + if (_eof) + return {}; + return nextUntilResultIsFound(nullptr); + } + boost::optional<FakeCell> seekAtOrPast(RecordId rid) { + auto result = _cursor->seekNear(rid); + if (!result) { + return {}; + } + + // seekNear() will return the key directionally _before_ the one requested if the requested + // key does not exist. + if (result->id < rid) { + return nextUntilResultIsFound(nullptr); + } + + return nextUntilResultIsFound(&*result); + } + boost::optional<FakeCell> seekExact(RecordId rid) { + auto res = _cursor->seekExact(rid); + if (res) { + if (auto cell = extractCellForRecord(*res)) { + return cell; + } + } + return {}; + } + + void save() { + if (_eof) { + return saveUnpositioned(); + } + _cursor->save(); + } + void saveUnpositioned() { + _eof = true; + _cursor->saveUnpositioned(); + } + + void restore() { + _cursor->restore(); + } + + void detachFromOperationContext() { + _cursor->detachFromOperationContext(); + } + void reattachToOperationContext(OperationContext* opCtx) { + _cursor->reattachToOperationContext(opCtx); + } + + const PathValue& path() const { + return _path; + } + +private: + boost::optional<FakeCell> extractCellForRecord(Record record) { + boost::optional<FakeCell> ret; + BSONObj bsonObj(record.data.data()); + + column_keygen::visitCellsForInsert( + bsonObj, [&](PathView path, const column_keygen::UnencodedCellView& cellView) { + if (path != _path) { + return; + } + + std::vector<value::TypeTags> tags; + std::vector<value::Value> vals; + + for (auto elem : cellView.vals) { + auto [t, v] = bson::convertFrom<false>(elem); + tags.push_back(t); + vals.push_back(v); + } + + ret = FakeCell{record.id, + cellView.arrayInfo.toString(), + std::move(tags), + std::move(vals), + cellView.hasDuplicateFields, + cellView.hasSubPaths, + cellView.isSparse, + cellView.hasDoubleNestedArrays}; + }); + return ret; + } + + /* + * Iterates the underlying cursor until a result is found which contains _path and can be + * returned. + * + * If 'startRecord' is not null, it is assumed that the cursor is positioned at 'startRecord' + * and 'startRecord' will be considered as a possible record to return data from. Otherwise, + * the first record considered will be the next result from the cursor. + */ + boost::optional<FakeCell> nextUntilResultIsFound(Record* startRecord) { + boost::optional<Record> currentRecord; + if (startRecord) { + currentRecord = *startRecord; + } else { + currentRecord = _cursor->next(); + } + + while (currentRecord) { + if (auto ret = extractCellForRecord(*currentRecord)) { + return ret; + } + currentRecord = _cursor->next(); + } + + return boost::none; + } + const PathValue _path; + bool _eof = false; + const std::unique_ptr<SeekableRecordCursor> _cursor; +}; +} // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp index 4c1e1d7da46..ef70102a8c0 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp @@ -32,6 +32,7 @@ #include "mongo/db/exec/sbe/expressions/expression.h" #include "mongo/db/exec/sbe/size_estimator.h" +#include "mongo/db/exec/sbe/values/columnar.h" namespace mongo { namespace sbe { @@ -127,12 +128,19 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId } void ColumnScanStage::doSaveState(bool relinquishCursor) { - if (_cursor && relinquishCursor) { - _cursor->save(); + if (_rowStoreCursor && relinquishCursor) { + _rowStoreCursor->save(); } - if (_cursor) { - _cursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor); + if (_rowStoreCursor) { + _rowStoreCursor->setSaveStorageCursorOnDetachFromOperationContext(!relinquishCursor); + } + + for (auto& cursor : _columnCursors) { + cursor.cursor->save(); + } + for (auto& [path, cursor] : _parentPathCursors) { + cursor->save(); } _coll.reset(); @@ -150,23 +158,42 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) { tassert(6298603, "Catalog epoch should be initialized", _catalogEpoch); _coll = restoreCollection(_opCtx, *_collName, _collUuid, *_catalogEpoch); - if (_cursor) { + if (_rowStoreCursor) { if (relinquishCursor) { - const bool couldRestore = _cursor->restore(); + const bool couldRestore = _rowStoreCursor->restore(); invariant(couldRestore); } } + + for (auto& cursor : _columnCursors) { + cursor.cursor->restore(); + } + for (auto& [path, cursor] : _parentPathCursors) { + cursor->restore(); + } } void ColumnScanStage::doDetachFromOperationContext() { - if (_cursor) { - _cursor->detachFromOperationContext(); + if (_rowStoreCursor) { + _rowStoreCursor->detachFromOperationContext(); + } + for (auto& cursor : _columnCursors) { + cursor.cursor->detachFromOperationContext(); + } + for (auto& [path, cursor] : _parentPathCursors) { + cursor->detachFromOperationContext(); } } void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) { - if (_cursor) { - _cursor->reattachToOperationContext(opCtx); + if (_rowStoreCursor) { + _rowStoreCursor->reattachToOperationContext(opCtx); + } + for (auto& cursor : _columnCursors) { + cursor.cursor->reattachToOperationContext(opCtx); + } + for (auto& [path, cursor] : _parentPathCursors) { + cursor->reattachToOperationContext(opCtx); } } @@ -189,27 +216,92 @@ void ColumnScanStage::open(bool reOpen) { if (_open) { tassert(6298604, "reopened ColumnScanStage but reOpen=false", reOpen); tassert(6298605, "ColumnScanStage is open but _coll is not null", _coll); - tassert(6298606, "ColumnScanStage is open but don't have _cursor", _cursor); + tassert(6298606, "ColumnScanStage is open but don't have _rowStoreCursor", _rowStoreCursor); } else { tassert(6298607, "first open to ColumnScanStage but reOpen=true", !reOpen); if (!_coll) { // We're being opened after 'close()'. We need to re-acquire '_coll' in this case and // make some validity checks (the collection has not been dropped, renamed, etc.). - tassert(6298608, "ColumnScanStage is not open but have _cursor", !_cursor); + tassert( + 6298608, "ColumnScanStage is not open but have _rowStoreCursor", !_rowStoreCursor); tassert(6298609, "Collection name should be initialized", _collName); tassert(6298610, "Catalog epoch should be initialized", _catalogEpoch); _coll = restoreCollection(_opCtx, *_collName, _collUuid, *_catalogEpoch); } } - if (!_cursor) { - _cursor = _coll->getCursor(_opCtx, true); + if (!_rowStoreCursor) { + _rowStoreCursor = _coll->getCursor(_opCtx, true); + } + + if (_columnCursors.empty()) { + // We fake the special Row ID column by just using _id, for now. + _columnCursors.push_back(ColumnCursor{ + std::make_unique<FakeCursorForPath>("_id", _coll->getCursor(_opCtx, true)), + boost::none, + false /* add to document */ + }); + + for (auto&& path : _paths) { + _columnCursors.push_back(ColumnCursor{ + std::make_unique<FakeCursorForPath>(path, _coll->getCursor(_opCtx, true)), + boost::none, + true /* add to document */ + }); + } } _open = true; _firstGetNext = true; } +void ColumnScanStage::readParentsIntoObj(StringData path, + value::Object* outObj, + StringDataSet* pathsReadSetOut, + bool first) { + auto parent = ColumnStore::getParentPath(path); + + // If a top-level path doesn't exist, it just doesn't exist. It can't exist in some places + // within a document but not others. No further inspection is necessary. + + if (!parent) { + return; + } + + if (pathsReadSetOut->contains(*parent)) { + // We've already read the parent in, so skip it. + return; + } + + // Create the parent path cursor if necessary. + + // First we try to emplace a nullptr, so that we avoid creating the cursor when we don't have + // to. + auto [it, inserted] = _parentPathCursors.try_emplace(*parent, nullptr); + + // If we inserted a new entry, replace the null with an actual cursor. + if (inserted) { + invariant(it->second == nullptr); + it->second = std::make_unique<FakeCursorForPath>(*parent, _coll->getCursor(_opCtx, true)); + } + + auto optCell = it->second->seekExact(_recordId); + pathsReadSetOut->insert(*parent); + + if (!optCell || optCell->isSparse) { + // We need this cell's parent too. + readParentsIntoObj(*parent, outObj, pathsReadSetOut, false); + } + + if (optCell) { + auto translatedCell = + TranslatedCell{optCell->arrayInfo, *parent, optCell->tags, optCell->vals}; + + addCellToObject(translatedCell, *outObj); + } +} + + PlanState ColumnScanStage::getNext() { auto optTimer(getOptTimer(_opCtx)); @@ -217,32 +309,90 @@ PlanState ColumnScanStage::getNext() { // case it yields as the state will be completely overwritten after the next() call. disableSlotAccess(); - // This call to checkForInterrupt() may result in a call to save() or restore() on the entire - // PlanStage tree if a yield occurs. It's important that we call checkForInterrupt() before - // checking '_needsToCheckCappedPositionLost' since a call to restoreState() may set - // '_needsToCheckCappedPositionLost'. checkForInterrupt(_opCtx); - auto nextRecord = _cursor->next(); - _firstGetNext = false; + if (_firstGetNext) { + for (size_t i = 0; i < _columnCursors.size(); ++i) { + _columnCursors[i].next(); + } + _firstGetNext = false; + } + + // Find minimum record ID of all column cursors. + _recordId = RecordId(); + for (auto& cursor : _columnCursors) { + auto& result = cursor.lastCell; + if (result && (_recordId.isNull() || result->rid < _recordId)) { + _recordId = result->rid; + } + } - if (!nextRecord) { + if (_recordId.isNull()) { return trackPlanState(PlanState::IS_EOF); } - if (_recordIdAccessor) { - _recordId = nextRecord->id; - _recordIdAccessor->reset( - false, value::TypeTags::RecordId, value::bitcastFrom<RecordId*>(&_recordId)); + auto [outTag, outVal] = value::makeNewObject(); + auto& outObj = *value::bitcastTo<value::Object*>(outVal); + value::ValueGuard materializedObjGuard(outTag, outVal); + + StringDataSet parentPathsRead; + bool useRowStore = false; + for (size_t i = 0; i < _columnCursors.size(); ++i) { + auto& lastCell = _columnCursors[i].lastCell; + + const FakeCell* cellForRid = + (lastCell && lastCell->rid == _recordId) ? lastCell.get_ptr() : nullptr; + const auto& path = _columnCursors[i].cursor->path(); + + if (cellForRid && (cellForRid->hasSubPaths || cellForRid->hasDuplicateFields)) { + useRowStore = true; + } else if (!useRowStore && _columnCursors[i].includeInOutput) { + if (!cellForRid || cellForRid->isSparse) { + // Must read in the parent information first. + readParentsIntoObj(path, &outObj, &parentPathsRead); + } + + if (cellForRid) { + auto translatedCell = TranslatedCell{cellForRid->arrayInfo, + _columnCursors[i].path(), + cellForRid->tags, + cellForRid->vals}; + + addCellToObject(translatedCell, outObj); + } + } + + if (cellForRid) { + _columnCursors[i].next(); + } } - _rowStoreAccessor->reset(false, - value::TypeTags::bsonObject, - value::bitcastFrom<const char*>(nextRecord->data.data())); + if (useRowStore) { + std::cout << "Using row store for rid " << _recordId << std::endl; - if (_recordExpr) { - auto [owned, tag, val] = _bytecode.run(_recordExprCode.get()); - _recordAccessor->reset(owned, tag, val); + // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead do + // a next() which should be much cheaper. + auto record = _rowStoreCursor->seekExact(_recordId); + + // If there's no record, the index is out of sync with the row store. + invariant(record); + + _rowStoreAccessor->reset(false, + value::TypeTags::bsonObject, + value::bitcastFrom<const char*>(record->data.data())); + + if (_recordExpr) { + auto [owned, tag, val] = _bytecode.run(_recordExprCode.get()); + _recordAccessor->reset(owned, tag, val); + } + } else { + _recordAccessor->reset(true, outTag, outVal); + materializedObjGuard.reset(); + } + + if (_recordIdAccessor) { + _recordIdAccessor->reset( + false, value::TypeTags::RecordId, value::bitcastFrom<RecordId*>(&_recordId)); } for (size_t idx = 0; idx < _outputFields.size(); ++idx) { @@ -267,8 +417,10 @@ void ColumnScanStage::close() { auto optTimer(getOptTimer(_opCtx)); trackClose(); - _cursor.reset(); + _rowStoreCursor.reset(); _coll.reset(); + _columnCursors.clear(); + _parentPathCursors.clear(); _open = false; } @@ -348,6 +500,5 @@ size_t ColumnScanStage::estimateCompileTimeSize() const { size += size_estimator::estimate(_specificStats); return size; } - } // namespace sbe } // namespace mongo diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h index 22ca42ee401..e555d10a114 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.h +++ b/src/mongo/db/exec/sbe/stages/column_scan.h @@ -30,6 +30,7 @@ #pragma once #include "mongo/config.h" +#include "mongo/db/exec/fake_column_cursor.h" #include "mongo/db/exec/sbe/expressions/expression.h" #include "mongo/db/exec/sbe/stages/collection_helpers.h" #include "mongo/db/exec/sbe/stages/stages.h" @@ -77,6 +78,36 @@ protected: TrialRunTracker* tracker, TrialRunTrackerAttachResultMask childrenAttachResult) override; private: + struct ColumnCursor { + std::unique_ptr<FakeCursorForPath> cursor; + boost::optional<FakeCell> lastCell; + bool includeInOutput = false; + + boost::optional<FakeCell>& next() { + // TODO For some reason the destructor of 'lastCell' is not called + // on my local asan build unless we explicitly reset it. Maybe + // the same compiler bug Nikita ran into? + lastCell.reset(); + lastCell = cursor->next(); + return lastCell; + } + + boost::optional<FakeCell>& seekAtOrPast(RecordId id) { + lastCell.reset(); + lastCell = cursor->seekAtOrPast(id); + return lastCell; + } + + const PathValue& path() const { + return cursor->path(); + } + }; + + void readParentsIntoObj(StringData path, + value::Object* out, + StringDataSet* pathsReadSetOut, + bool first = true); + const UUID _collUuid; const std::string _columnIndexName; const value::SlotVector _fieldSlots; @@ -109,7 +140,10 @@ private: CollectionPtr _coll; - std::unique_ptr<SeekableRecordCursor> _cursor; + std::unique_ptr<SeekableRecordCursor> _rowStoreCursor; + + std::vector<ColumnCursor> _columnCursors; + StringMap<std::unique_ptr<FakeCursorForPath>> _parentPathCursors; RecordId _recordId; diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h index 0d1c3c81d03..f8cf708ff83 100644 --- a/src/mongo/db/storage/column_store.h +++ b/src/mongo/db/storage/column_store.h @@ -319,6 +319,19 @@ public: return res; } + /** + * Returns the parent path for the given path, if there is one. + */ + static boost::optional<PathView> getParentPath(PathView path) { + auto lastDot = path.rfind('.'); + if (lastDot == std::string::npos) { + return {}; + } + + return path.substr(0, lastDot); + } + + protected: class Cursor { public: |