diff options
author | Ian Boros <ian.boros@mongodb.com> | 2022-05-03 19:18:43 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-05-11 23:03:48 +0000 |
commit | 83ae5393284912aded0087fd2cdf79edff6f999f (patch) | |
tree | f5b57ad8688a00fccedfb77883bcbf5b0aeeb100 /src | |
parent | 51bff7a1d0145afebb57a04e82bd962985801f06 (diff) | |
download | mongo-83ae5393284912aded0087fd2cdf79edff6f999f.tar.gz |
SERVER-66102 Change column scan stage to read from true column index
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/exec/fake_column_cursor.h | 208 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.cpp | 148 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.h | 75 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/columnar.cpp | 30 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/columnar.h | 22 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/columnar_test.cpp | 84 | ||||
-rw-r--r-- | src/mongo/db/index/columns_access_method.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_columnar_test.cpp | 86 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_params.h | 5 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/storage/column_store.h | 37 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp | 15 |
13 files changed, 370 insertions, 396 deletions
diff --git a/src/mongo/db/exec/fake_column_cursor.h b/src/mongo/db/exec/fake_column_cursor.h deleted file mode 100644 index 7a99381cf9a..00000000000 --- a/src/mongo/db/exec/fake_column_cursor.h +++ /dev/null @@ -1,208 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/db/exec/sbe/values/bson.h" -#include "mongo/db/exec/sbe/values/value.h" -#include "mongo/db/index/column_key_generator.h" -#include "mongo/db/storage/column_store.h" - -namespace mongo::sbe { - -struct FakeCell { - FakeCell(RecordId ridIn, - std::string arrayInfoIn, - std::vector<value::TypeTags> tagsIn, - std::vector<value::Value> valuesIn, - bool hasDuplicateFieldsIn, - bool hasSubPathsIn, - bool isSparseIn, - bool hasDoubleNestedArraysIn) - : rid(ridIn), - arrayInfo(std::move(arrayInfoIn)), - tags(std::move(tagsIn)), - vals(std::move(valuesIn)), - hasDuplicateFields(hasDuplicateFieldsIn), - hasSubPaths(hasSubPathsIn), - isSparse(isSparseIn), - hasDoubleNestedArrays(hasDoubleNestedArraysIn) {} - ~FakeCell() { - for (size_t i = 0; i < tags.size(); ++i) { - value::releaseValue(tags[i], vals[i]); - } - } - FakeCell(const FakeCell&) = delete; - FakeCell(FakeCell&&) = default; - - FakeCell& operator=(const FakeCell&) = delete; - FakeCell& operator=(FakeCell&&) = default; - - - // This really doesn't belong in the cell, but it's convenient to put here. - RecordId rid; - - std::string arrayInfo; - - // Owned here. - std::vector<value::TypeTags> tags; - std::vector<value::Value> vals; - - bool hasDuplicateFields; - bool hasSubPaths; - bool isSparse; - bool hasDoubleNestedArrays; -}; - -/** - * Mock cursor for a single path in a column index. We use this instead of mocking the - * ColumnStore::Cursor class because that allows traversing through the entire index, which - * requires knowledge of all fields present across all documents. - */ -class FakeCursorForPath { -public: - FakeCursorForPath(StringData path, std::unique_ptr<SeekableRecordCursor> cursor) - : _path(path.toString()), _cursor(std::move(cursor)) {} - boost::optional<FakeCell> next() { - if (_eof) - return {}; - return nextUntilResultIsFound(nullptr); - } - boost::optional<FakeCell> seekAtOrPast(RecordId rid) { - auto result = _cursor->seekNear(rid); - if (!result) { - return {}; - } - - // seekNear() will return the key directionally _before_ the one requested if the requested - // key does not exist. - if (result->id < rid) { - return nextUntilResultIsFound(nullptr); - } - - return nextUntilResultIsFound(&*result); - } - boost::optional<FakeCell> seekExact(RecordId rid) { - auto res = _cursor->seekExact(rid); - if (res) { - if (auto cell = extractCellForRecord(*res)) { - return cell; - } - } - return {}; - } - - void save() { - if (_eof) { - return saveUnpositioned(); - } - _cursor->save(); - } - void saveUnpositioned() { - _eof = true; - _cursor->saveUnpositioned(); - } - - void restore() { - _cursor->restore(); - } - - void detachFromOperationContext() { - _cursor->detachFromOperationContext(); - } - void reattachToOperationContext(OperationContext* opCtx) { - _cursor->reattachToOperationContext(opCtx); - } - - const PathValue& path() const { - return _path; - } - -private: - boost::optional<FakeCell> extractCellForRecord(Record record) { - boost::optional<FakeCell> ret; - BSONObj bsonObj(record.data.data()); - - column_keygen::visitCellsForInsert( - bsonObj, [&](PathView path, const column_keygen::UnencodedCellView& cellView) { - if (path != _path) { - return; - } - - std::vector<value::TypeTags> tags; - std::vector<value::Value> vals; - - for (auto elem : cellView.vals) { - auto [t, v] = bson::convertFrom<false>(elem); - tags.push_back(t); - vals.push_back(v); - } - - ret.emplace(record.id, - cellView.arrayInfo.toString(), - std::move(tags), - std::move(vals), - cellView.hasDuplicateFields, - cellView.hasSubPaths, - cellView.isSparse, - cellView.hasDoubleNestedArrays); - }); - return ret; - } - - /* - * Iterates the underlying cursor until a result is found which contains _path and can be - * returned. - * - * If 'startRecord' is not null, it is assumed that the cursor is positioned at 'startRecord' - * and 'startRecord' will be considered as a possible record to return data from. Otherwise, - * the first record considered will be the next result from the cursor. - */ - boost::optional<FakeCell> nextUntilResultIsFound(Record* startRecord) { - boost::optional<Record> currentRecord; - if (startRecord) { - currentRecord = *startRecord; - } else { - currentRecord = _cursor->next(); - } - - while (currentRecord) { - if (auto ret = extractCellForRecord(*currentRecord)) { - return ret; - } - currentRecord = _cursor->next(); - } - - return boost::none; - } - const PathValue _path; - bool _eof = false; - const std::unique_ptr<SeekableRecordCursor> _cursor; -}; -} // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp index ef70102a8c0..0cc99871da1 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp @@ -32,10 +32,23 @@ #include "mongo/db/exec/sbe/expressions/expression.h" #include "mongo/db/exec/sbe/size_estimator.h" +#include "mongo/db/exec/sbe/values/column_store_encoder.h" #include "mongo/db/exec/sbe/values/columnar.h" +#include "mongo/db/index/columns_access_method.h" namespace mongo { namespace sbe { +namespace { +TranslatedCell translateCell(PathView path, const SplitCellView& splitCellView) { + value::ColumnStoreEncoder encoder; + SplitCellView::Cursor<value::ColumnStoreEncoder> cellCursor = + splitCellView.subcellValuesGenerator<value::ColumnStoreEncoder>(std::move(encoder)); + return TranslatedCell{splitCellView.arrInfo, path, std::move(cellCursor)}; +} + + +} // namespace + ColumnScanStage::ColumnScanStage(UUID collectionUuid, StringData columnIndexName, value::SlotVector fieldSlots, @@ -84,7 +97,7 @@ void ColumnScanStage::prepare(CompileCtx& ctx) { for (size_t idx = 0; idx < _outputFields.size(); ++idx) { auto [it, inserted] = _outputFieldsMap.emplace(_fieldSlots[idx], &_outputFields[idx]); - uassert(6298601, str::stream() << "duplicate slot: " << _fieldSlots[idx], inserted); + uassert(6610212, str::stream() << "duplicate slot: " << _fieldSlots[idx], inserted); } if (_recordSlot) { @@ -104,8 +117,16 @@ void ColumnScanStage::prepare(CompileCtx& ctx) { _pathExprsCode.emplace_back(expr->compile(ctx)); } - tassert(6298602, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll); + tassert(6610200, "'_coll' should not be initialized prior to 'acquireCollection()'", !_coll); std::tie(_coll, _collName, _catalogEpoch) = acquireCollection(_opCtx, _collUuid); + + auto indexCatalog = _coll->getIndexCatalog(); + auto indexDesc = indexCatalog->findIndexByName(_opCtx, _columnIndexName); + tassert(6610201, + str::stream() << "could not find index named '" << _columnIndexName + << "' in collection '" << _collName << "'", + indexDesc); + _weakIndexCatalogEntry = indexCatalog->getEntryShared(indexDesc); } value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId slot) { @@ -128,6 +149,10 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId } void ColumnScanStage::doSaveState(bool relinquishCursor) { + for (auto& cursor : _columnCursors) { + cursor.makeOwned(); + } + if (_rowStoreCursor && relinquishCursor) { _rowStoreCursor->save(); } @@ -137,10 +162,10 @@ void ColumnScanStage::doSaveState(bool relinquishCursor) { } for (auto& cursor : _columnCursors) { - cursor.cursor->save(); + cursor.cursor().save(); } for (auto& [path, cursor] : _parentPathCursors) { - cursor->save(); + cursor->saveUnpositioned(); } _coll.reset(); @@ -155,9 +180,14 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) { return; } - tassert(6298603, "Catalog epoch should be initialized", _catalogEpoch); + tassert(6610202, "Catalog epoch should be initialized", _catalogEpoch); _coll = restoreCollection(_opCtx, *_collName, _collUuid, *_catalogEpoch); + auto indexCatalogEntry = _weakIndexCatalogEntry.lock(); + uassert(ErrorCodes::QueryPlanKilled, + str::stream() << "query plan killed :: index '" << _columnIndexName << "' dropped", + indexCatalogEntry && !indexCatalogEntry->isDropped()); + if (_rowStoreCursor) { if (relinquishCursor) { const bool couldRestore = _rowStoreCursor->restore(); @@ -166,7 +196,7 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) { } for (auto& cursor : _columnCursors) { - cursor.cursor->restore(); + cursor.cursor().restore(); } for (auto& [path, cursor] : _parentPathCursors) { cursor->restore(); @@ -178,7 +208,7 @@ void ColumnScanStage::doDetachFromOperationContext() { _rowStoreCursor->detachFromOperationContext(); } for (auto& cursor : _columnCursors) { - cursor.cursor->detachFromOperationContext(); + cursor.cursor().detachFromOperationContext(); } for (auto& [path, cursor] : _parentPathCursors) { cursor->detachFromOperationContext(); @@ -190,7 +220,7 @@ void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) { _rowStoreCursor->reattachToOperationContext(opCtx); } for (auto& cursor : _columnCursors) { - cursor.cursor->reattachToOperationContext(opCtx); + cursor.cursor().reattachToOperationContext(opCtx); } for (auto& [path, cursor] : _parentPathCursors) { cursor->reattachToOperationContext(opCtx); @@ -214,18 +244,18 @@ void ColumnScanStage::open(bool reOpen) { invariant(_opCtx); if (_open) { - tassert(6298604, "reopened ColumnScanStage but reOpen=false", reOpen); - tassert(6298605, "ColumnScanStage is open but _coll is not null", _coll); - tassert(6298606, "ColumnScanStage is open but don't have _rowStoreCursor", _rowStoreCursor); + tassert(6610203, "reopened ColumnScanStage but reOpen=false", reOpen); + tassert(6610204, "ColumnScanStage is open but _coll is not null", _coll); + tassert(6610205, "ColumnScanStage is open but don't have _rowStoreCursor", _rowStoreCursor); } else { - tassert(6298607, "first open to ColumnScanStage but reOpen=true", !reOpen); + tassert(6610206, "first open to ColumnScanStage but reOpen=true", !reOpen); if (!_coll) { // We're being opened after 'close()'. We need to re-acquire '_coll' in this case and // make some validity checks (the collection has not been dropped, renamed, etc.). tassert( - 6298608, "ColumnScanStage is not open but have _rowStoreCursor", !_rowStoreCursor); - tassert(6298609, "Collection name should be initialized", _collName); - tassert(6298610, "Catalog epoch should be initialized", _catalogEpoch); + 6610207, "ColumnScanStage is not open but have _rowStoreCursor", !_rowStoreCursor); + tassert(6610208, "Collection name should be initialized", _collName); + tassert(6610209, "Catalog epoch should be initialized", _catalogEpoch); _coll = restoreCollection(_opCtx, *_collName, _collUuid, *_catalogEpoch); } } @@ -235,24 +265,29 @@ void ColumnScanStage::open(bool reOpen) { } if (_columnCursors.empty()) { - // We fake the special Row ID column by just using _id, for now. - _columnCursors.push_back(ColumnCursor{ - std::make_unique<FakeCursorForPath>("_id", _coll->getCursor(_opCtx, true)), - boost::none, - false /* add to document */ - }); + auto entry = _weakIndexCatalogEntry.lock(); + tassert(6610210, + str::stream() << "expected IndexCatalogEntry for index named: " << _columnIndexName, + static_cast<bool>(entry)); + + auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod()); + + // Eventually we can not include this column for the cases where a known dense column (_id) + // is being read anyway. + _columnCursors.push_back(ColumnCursor(iam->storage()->newCursor(_opCtx, "\xFF"_sd), + false /* add to document */)); for (auto&& path : _paths) { - _columnCursors.push_back(ColumnCursor{ - std::make_unique<FakeCursorForPath>(path, _coll->getCursor(_opCtx, true)), - boost::none, - true /* add to document */ - }); + _columnCursors.push_back( + ColumnCursor(iam->storage()->newCursor(_opCtx, path), true /* add to document */)); } } + for (auto& columnCursor : _columnCursors) { + columnCursor.seekAtOrPast(RecordId()); + } + _open = true; - _firstGetNext = true; } void ColumnScanStage::readParentsIntoObj(StringData path, @@ -282,21 +317,28 @@ void ColumnScanStage::readParentsIntoObj(StringData path, // If we inserted a new entry, replace the null with an actual cursor. if (inserted) { invariant(it->second == nullptr); - it->second = std::make_unique<FakeCursorForPath>(*parent, _coll->getCursor(_opCtx, true)); + + auto entry = _weakIndexCatalogEntry.lock(); + tassert(6610211, + str::stream() << "expected IndexCatalogEntry for index named: " << _columnIndexName, + static_cast<bool>(entry)); + auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod()); + it->second = iam->storage()->newCursor(_opCtx, *parent); } - auto optCell = it->second->seekExact(_recordId); - pathsReadSetOut->insert(*parent); + boost::optional<SplitCellView> splitCellView; + if (auto optCell = it->second->seekExact(_recordId)) { + splitCellView = SplitCellView::parse(optCell->value); + } - if (!optCell || optCell->isSparse) { + pathsReadSetOut->insert(*parent); + if (!splitCellView || splitCellView->isSparse) { // We need this cell's parent too. readParentsIntoObj(*parent, outObj, pathsReadSetOut, false); } - if (optCell) { - auto translatedCell = - TranslatedCell{optCell->arrayInfo, *parent, optCell->tags, optCell->vals}; - + if (splitCellView) { + auto translatedCell = translateCell(*parent, *splitCellView); addCellToObject(translatedCell, *outObj); } } @@ -311,17 +353,10 @@ PlanState ColumnScanStage::getNext() { checkForInterrupt(_opCtx); - if (_firstGetNext) { - for (size_t i = 0; i < _columnCursors.size(); ++i) { - _columnCursors[i].next(); - } - _firstGetNext = false; - } - // Find minimum record ID of all column cursors. _recordId = RecordId(); for (auto& cursor : _columnCursors) { - auto& result = cursor.lastCell; + auto& result = cursor.lastCell(); if (result && (_recordId.isNull() || result->rid < _recordId)) { _recordId = result->rid; } @@ -338,38 +373,35 @@ PlanState ColumnScanStage::getNext() { StringDataSet parentPathsRead; bool useRowStore = false; for (size_t i = 0; i < _columnCursors.size(); ++i) { - auto& lastCell = _columnCursors[i].lastCell; + auto& lastCell = _columnCursors[i].lastCell(); + const auto& path = _columnCursors[i].path(); - const FakeCell* cellForRid = - (lastCell && lastCell->rid == _recordId) ? lastCell.get_ptr() : nullptr; - const auto& path = _columnCursors[i].cursor->path(); + boost::optional<SplitCellView> splitCellView; + if (lastCell && lastCell->rid == _recordId) { + splitCellView = SplitCellView::parse(lastCell->value); + } - if (cellForRid && (cellForRid->hasSubPaths || cellForRid->hasDuplicateFields)) { + if (splitCellView && (splitCellView->hasSubPaths || splitCellView->hasDuplicateFields)) { useRowStore = true; - } else if (!useRowStore && _columnCursors[i].includeInOutput) { - if (!cellForRid || cellForRid->isSparse) { + } else if (!useRowStore && _columnCursors[i].includeInOutput()) { + if (!splitCellView || splitCellView->isSparse) { // Must read in the parent information first. readParentsIntoObj(path, &outObj, &parentPathsRead); } - if (cellForRid) { - auto translatedCell = TranslatedCell{cellForRid->arrayInfo, - _columnCursors[i].path(), - cellForRid->tags, - cellForRid->vals}; + if (splitCellView) { + auto translatedCell = translateCell(path, *splitCellView); addCellToObject(translatedCell, outObj); } } - if (cellForRid) { + if (splitCellView) { _columnCursors[i].next(); } } if (useRowStore) { - std::cout << "Using row store for rid " << _recordId << std::endl; - // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead do // a next() which should be much cheaper. auto record = _rowStoreCursor->seekExact(_recordId); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h index e555d10a114..d00d4641171 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.h +++ b/src/mongo/db/exec/sbe/stages/column_scan.h @@ -30,10 +30,10 @@ #pragma once #include "mongo/config.h" -#include "mongo/db/exec/fake_column_cursor.h" #include "mongo/db/exec/sbe/expressions/expression.h" #include "mongo/db/exec/sbe/stages/collection_helpers.h" #include "mongo/db/exec/sbe/stages/stages.h" +#include "mongo/db/storage/column_store.h" namespace mongo { namespace sbe { @@ -78,29 +78,72 @@ protected: TrialRunTracker* tracker, TrialRunTrackerAttachResultMask childrenAttachResult) override; private: - struct ColumnCursor { - std::unique_ptr<FakeCursorForPath> cursor; - boost::optional<FakeCell> lastCell; - bool includeInOutput = false; + class ColumnCursor { + public: + ColumnCursor(std::unique_ptr<ColumnStore::CursorForPath> curs, bool includeInResult) + : _cursor(std::move(curs)), _includeInOutput(includeInResult) {} - boost::optional<FakeCell>& next() { + + boost::optional<FullCellView>& next() { // TODO For some reason the destructor of 'lastCell' is not called // on my local asan build unless we explicitly reset it. Maybe // the same compiler bug Nikita ran into? - lastCell.reset(); - lastCell = cursor->next(); - return lastCell; + _lastCell.reset(); + _lastCell = _cursor->next(); + clearOwned(); + return _lastCell; } - boost::optional<FakeCell>& seekAtOrPast(RecordId id) { - lastCell.reset(); - lastCell = cursor->seekAtOrPast(id); - return lastCell; + boost::optional<FullCellView>& seekAtOrPast(RecordId id) { + _lastCell.reset(); + _lastCell = _cursor->seekAtOrPast(id); + clearOwned(); + return _lastCell; } const PathValue& path() const { - return cursor->path(); + return _cursor->path(); + } + + /* + * Copies any data owned by the storage engine into a locally owned buffer. + */ + void makeOwned() { + if (_lastCell && _pathOwned.empty() && _cellOwned.empty()) { + _pathOwned.insert( + _pathOwned.begin(), _lastCell->path.begin(), _lastCell->path.end()); + _lastCell->path = StringData(_pathOwned); + + _cellOwned.insert( + _cellOwned.begin(), _lastCell->value.begin(), _lastCell->value.end()); + _lastCell->value = StringData(_cellOwned.data(), _cellOwned.size()); + } + } + ColumnStore::CursorForPath& cursor() { + return *_cursor; + } + bool includeInOutput() const { + return _includeInOutput; + } + boost::optional<FullCellView>& lastCell() { + return _lastCell; } + + private: + void clearOwned() { + _pathOwned.clear(); + _cellOwned.clear(); + } + + std::unique_ptr<ColumnStore::CursorForPath> _cursor; + bool _includeInOutput = false; + + boost::optional<FullCellView> _lastCell; + + // These members are used to store owned copies of the path and the cell data when preparing + // for yield. + std::string _pathOwned; + std::vector<char> _cellOwned; }; void readParentsIntoObj(StringData path, @@ -140,15 +183,15 @@ private: CollectionPtr _coll; + std::weak_ptr<const IndexCatalogEntry> _weakIndexCatalogEntry; std::unique_ptr<SeekableRecordCursor> _rowStoreCursor; std::vector<ColumnCursor> _columnCursors; - StringMap<std::unique_ptr<FakeCursorForPath>> _parentPathCursors; + StringMap<std::unique_ptr<ColumnStore::CursorForPath>> _parentPathCursors; RecordId _recordId; bool _open{false}; - bool _firstGetNext{false}; // If provided, used during a trial run to accumulate certain execution stats. Once the trial // run is complete, this pointer is reset to nullptr. diff --git a/src/mongo/db/exec/sbe/values/columnar.cpp b/src/mongo/db/exec/sbe/values/columnar.cpp index 5292ba54bea..7490d549803 100644 --- a/src/mongo/db/exec/sbe/values/columnar.cpp +++ b/src/mongo/db/exec/sbe/values/columnar.cpp @@ -97,9 +97,10 @@ private: * Tracks state necessary for reconstructing objects including the array info reader, the values * available for extraction, and the path. */ +template <class C> class AddToDocumentState { public: - AddToDocumentState(TranslatedCell& translatedCell, ArrInfoReader reader) + AddToDocumentState(C& translatedCell, ArrInfoReader reader) : cell(translatedCell), arrInfoReader(std::move(reader)) {} /* @@ -146,7 +147,7 @@ public: return offsetInPath == cell.path.size(); } - TranslatedCell& cell; + C& cell; ArrInfoReader arrInfoReader; private: @@ -218,9 +219,10 @@ value::Array* findOrAddArrInArr(size_t idx, sbe::value::Array* arr) { * Adds the given tag,val SBE value to the 'out' object, assuming that there are no arrays along * the remaining path stored by 'state'. */ +template <class C> void addToObjectNoArrays(value::TypeTags tag, value::Value val, - AddToDocumentState& state, + AddToDocumentState<C>& state, value::Object& out, size_t idx) { state.withNextPathComponent([&](StringData nextPathComponent) { @@ -235,8 +237,11 @@ void addToObjectNoArrays(value::TypeTags tag, }); } -void addToObject(value::Object& obj, AddToDocumentState& state); -void addToArray(value::Array& arr, AddToDocumentState& state) { +template <class C> +void addToObject(value::Object& obj, AddToDocumentState<C>& state); + +template <class C> +void addToArray(value::Array& arr, AddToDocumentState<C>& state) { size_t index = state.arrInfoReader.takeNumber(); auto ensureArraySize = [&]() { while (arr.size() <= index) { @@ -310,7 +315,8 @@ void addToArray(value::Array& arr, AddToDocumentState& state) { } } -void addToObject(value::Object& obj, AddToDocumentState& state) { +template <class C> +void addToObject(value::Object& obj, AddToDocumentState<C>& state) { state.withNextPathComponent([&](StringData field) { switch (state.arrInfoReader.takeNextChar()) { case '{': { @@ -332,7 +338,8 @@ void addToObject(value::Object& obj, AddToDocumentState& state) { }); } -void addEmptyObjectIfNotPresent(AddToDocumentState& state, value::Object& out) { +template <class C> +void addEmptyObjectIfNotPresent(AddToDocumentState<C>& state, value::Object& out) { // Add an object to the path. state.withNextPathComponent([&](StringData nextPathComponent) { auto* innerObj = findOrAddObjInObj(nextPathComponent, &out); @@ -342,8 +349,10 @@ void addEmptyObjectIfNotPresent(AddToDocumentState& state, value::Object& out) { }); } } // namespace -void addCellToObject(TranslatedCell& cell, value::Object& out) { - AddToDocumentState state{cell, ArrInfoReader{cell.arrInfo}}; + +template <class C> +void addCellToObject(C& cell, value::Object& out) { + AddToDocumentState<C> state{cell, ArrInfoReader{cell.arrInfo}}; if (cell.arrInfo.empty()) { if (cell.moreValues()) { @@ -361,4 +370,7 @@ void addCellToObject(TranslatedCell& cell, value::Object& out) { addToObject(out, state); invariant(!state.arrInfoReader.moreExplicitComponents()); } + +template void addCellToObject<TranslatedCell>(TranslatedCell& cell, value::Object& out); +template void addCellToObject<MockTranslatedCell>(MockTranslatedCell& cell, value::Object& out); } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/values/columnar.h b/src/mongo/db/exec/sbe/values/columnar.h index a6f8e4f651f..ec0fb5f07c8 100644 --- a/src/mongo/db/exec/sbe/values/columnar.h +++ b/src/mongo/db/exec/sbe/values/columnar.h @@ -31,6 +31,7 @@ #include "mongo/config.h" #include "mongo/db/exec/sbe/expressions/expression.h" +#include "mongo/db/exec/sbe/values/column_store_encoder.h" #include "mongo/db/storage/column_store.h" /** @@ -43,7 +44,24 @@ namespace mongo::sbe { */ struct TranslatedCell { StringData arrInfo; + PathView path; + + SplitCellView::Cursor<value::ColumnStoreEncoder> cursor; + + std::pair<value::TypeTags, value::Value> nextValue() { + auto next = cursor.nextValue(); + invariant(next); + return *next; + } + bool moreValues() const { + return cursor.hasNext(); + } +}; + +// For testing only. +struct MockTranslatedCell { + StringData arrInfo; PathView path; std::vector<value::TypeTags> types; @@ -64,10 +82,10 @@ struct TranslatedCell { } }; - /* * Adds translated cell to an object. This must not be called on an object * which has a structure that is incompatible with the structure described in the cell. */ -void addCellToObject(TranslatedCell& cell, value::Object& out); +template <class T> +void addCellToObject(T& cell, value::Object& out); } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/values/columnar_test.cpp b/src/mongo/db/exec/sbe/values/columnar_test.cpp index 6870bc382f4..9dc9e7717d0 100644 --- a/src/mongo/db/exec/sbe/values/columnar_test.cpp +++ b/src/mongo/db/exec/sbe/values/columnar_test.cpp @@ -38,13 +38,13 @@ #include "mongo/db/storage/column_store.h" namespace mongo::sbe { -void makeObjFromColumns(std::vector<TranslatedCell>& cells, value::Object& out) { +void makeObjFromColumns(std::vector<MockTranslatedCell>& cells, value::Object& out) { for (auto& cell : cells) { addCellToObject(cell, out); } } -void compareMakeObjWithExpected(std::vector<TranslatedCell>& cells, const BSONObj& expected) { +void compareMakeObjWithExpected(std::vector<MockTranslatedCell>& cells, const BSONObj& expected) { auto [expectedTag, expectedVal] = stage_builder::makeValue(expected); value::ValueGuard expectedGuard{expectedTag, expectedVal}; @@ -56,72 +56,73 @@ void compareMakeObjWithExpected(std::vector<TranslatedCell>& cells, const BSONOb expectedVal); } -TranslatedCell makeCellOfIntegers(StringData path, - StringData arrInfo, - std::vector<unsigned int> vals) { - return TranslatedCell{arrInfo, - path, - std::vector<value::TypeTags>(vals.size(), value::TypeTags::NumberInt32), - std::vector<value::Value>(vals.begin(), vals.end())}; +MockTranslatedCell makeCellOfIntegers(StringData path, + StringData arrInfo, + std::vector<unsigned int> vals) { + return MockTranslatedCell{ + arrInfo, + path, + std::vector<value::TypeTags>(vals.size(), value::TypeTags::NumberInt32), + std::vector<value::Value>(vals.begin(), vals.end())}; } TEST(ColumnarObjTest, MakeObjNoArrTest) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "", {32}), - makeCellOfIntegers("a.d", "", {36})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "", {32}), + makeCellOfIntegers("a.d", "", {36})}; compareMakeObjWithExpected(cells, fromjson("{a: {b: 32, d: 36}}")); } TEST(ColumnarObjTest, MakeObjArrOfScalarsTest) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[", {32, 33, 34, 35, 36})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[", {32, 33, 34, 35, 36})}; compareMakeObjWithExpected(cells, fromjson("{a: [32, 33, 34, 35, 36]}")); } TEST(ColumnarObjTest, MakeObjSingletonArrayOfObj) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "[", {32})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "[", {32})}; compareMakeObjWithExpected(cells, fromjson("{a: [{b:32}]}")); } TEST(ColumnarObjTest, MakeObjArrOfObjectsTest) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2, 3})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2, 3})}; compareMakeObjWithExpected( cells, BSON("a" << BSON_ARRAY(BSON("b" << 1) << BSON("b" << 2) << BSON("b" << 3)))); } TEST(ColumnarObjTest, MakeObjBasicArrOfObjectsWithMultipleFields) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2, 3}), - makeCellOfIntegers("a.c", "[", {101, 102, 103})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2, 3}), + makeCellOfIntegers("a.c", "[", {101, 102, 103})}; compareMakeObjWithExpected(cells, fromjson("{a: [{b:1, c:101}, {b:2, c: 102}, {b:3, c: 103}]}")); } TEST(ColumnarObjTest, MakeObjComplexLeafArray) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "", {}), - makeCellOfIntegers("a.b", "", {}), - makeCellOfIntegers("a.b.c", "{{[[|1][|]", {1, 2, 3, 4})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "", {}), + makeCellOfIntegers("a.b", "", {}), + makeCellOfIntegers("a.b.c", "{{[[|1][|]", {1, 2, 3, 4})}; compareMakeObjWithExpected(cells, fromjson("{a:{b:{c:[[1,2],[3],4]}}}")); } TEST(ColumnarObjTest, MakeObjArrayOfArrays) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[[|1][[|]", {1, 2, 3, 4})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[[|1][[|]", {1, 2, 3, 4})}; compareMakeObjWithExpected(cells, fromjson("{a: [[1,2], [[3], 4]]}")); } TEST(ColumnarObjTest, MakeObjArrayOfMixed) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[|+2", {1, 4}), - makeCellOfIntegers("a.b", "[1", {2}), - makeCellOfIntegers("a.c", "[2", {3})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[|+2", {1, 4}), + makeCellOfIntegers("a.b", "[1", {2}), + makeCellOfIntegers("a.c", "[2", {3})}; compareMakeObjWithExpected(cells, fromjson("{a: [1, {b:2}, {c: 3}, 4]}")); } TEST(ColumnarObjTest, MakeObjArrayOfMixed2) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[|o", {1, 3}), - makeCellOfIntegers("a.b", "[1{[", {2})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[|o", {1, 3}), + makeCellOfIntegers("a.b", "[1{[", {2})}; compareMakeObjWithExpected(cells, fromjson("{a:[1,{b:[2]},3]}")); } TEST(ColumnarObjTest, MakeObjTopLevelArrayOfMixed) { - std::vector<TranslatedCell> cells{ + std::vector<MockTranslatedCell> cells{ makeCellOfIntegers("a", "[o1|o5", {99}), makeCellOfIntegers("a.b", "[{[|3]+2{[[|]]{[", {101, 0, 101, 1, 2, 3, 4, 5, 6}), makeCellOfIntegers("a.c", "[1|+1", {0, 1}), @@ -140,15 +141,16 @@ TEST(ColumnarObjTest, MakeObjTopLevelArrayOfMixed) { } TEST(ColumnarObjTest, MakeObjTopLevelObjWithMixedArrays) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "", {}), - makeCellOfIntegers("a.b", "{[[|1][o]", {1, 2, 2}), - makeCellOfIntegers("a.b.c", "{[1[{[[|1][|]", {1, 2, 99, 2}), - makeCellOfIntegers("a.x", "", {1})}; + std::vector<MockTranslatedCell> cells{ + makeCellOfIntegers("a", "", {}), + makeCellOfIntegers("a.b", "{[[|1][o]", {1, 2, 2}), + makeCellOfIntegers("a.b.c", "{[1[{[[|1][|]", {1, 2, 99, 2}), + makeCellOfIntegers("a.x", "", {1})}; compareMakeObjWithExpected(cells, fromjson("{a:{b:[[1,2],[{c:[[1,2],[99],2]}],2], x:1}}")); } TEST(ColumnarObjTest, MakeObjTopLevelArrayWithSubArrays) { - std::vector<TranslatedCell> cells{ + std::vector<MockTranslatedCell> cells{ makeCellOfIntegers("a", "[[|1][o]", {1, 2, 2}), makeCellOfIntegers("a.b", "[1[{[[|1][o]", {1, 2, 2}), makeCellOfIntegers("a.b.c", "[1[{[1[{[[|1][|]", {1, 2, 3, 4})}; @@ -157,46 +159,46 @@ TEST(ColumnarObjTest, MakeObjTopLevelArrayWithSubArrays) { } TEST(ColumnarObjTest, MakeTopLevelArrayOfObjsSparse) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[o1", {}), - makeCellOfIntegers("a.b", "[o", {1}), - makeCellOfIntegers("a.b.c", "[", {1})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[o1", {}), + makeCellOfIntegers("a.b", "[o", {1}), + makeCellOfIntegers("a.b.c", "[", {1})}; compareMakeObjWithExpected(cells, fromjson("{a:[{b:{c:1}},{b:1}]}")); } TEST(ColumnarObjTest, MakeObjEmptyTopLevelField) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("", "[", {1, 2, 3})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("", "[", {1, 2, 3})}; compareMakeObjWithExpected(cells, fromjson("{'': [1,2,3]}")); } TEST(ColumnarObjTest, MakeObjEmptyFieldWhichIsObject) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("..a", "{{[", {1, 2, 3})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("..a", "{{[", {1, 2, 3})}; compareMakeObjWithExpected(cells, fromjson("{'': {'': {a: [1,2,3]}}}")); } TEST(ColumnarObjTest, MakeObjEmptyFieldWhichIsLeaf) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a.", "{[", {1, 2, 3})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.", "{[", {1, 2, 3})}; compareMakeObjWithExpected(cells, fromjson("{a: {'': [1,2,3]}}")); } TEST(ColumnarObjTest, AddTopLevelNonLeafCellWithoutArrayInfoToObject) { // Cell with no array info or values indicates an object. - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "", {})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "", {})}; compareMakeObjWithExpected(cells, fromjson("{a: {}}")); } TEST(ColumnarObjTest, AddNonLeafCellWithoutArrayInfoToObject) { // Cell with no array info or values indicates an object. - std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "", {})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "", {})}; compareMakeObjWithExpected(cells, fromjson("{a: {b: {}}}")); } TEST(ColumnarObjTest, AddTopLevelNonLeafCellWithArrayInfoToObject) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a", "[o1", {})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a", "[o1", {})}; compareMakeObjWithExpected(cells, fromjson("{a: [{}, {}]}")); } TEST(ColumnarObjTest, AddNonLeafCellWithArrayInfoToObject) { - std::vector<TranslatedCell> cells{makeCellOfIntegers("a.b", "{[o1", {})}; + std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "{[o1", {})}; compareMakeObjWithExpected(cells, fromjson("{a: {b: [{}, {}]}}")); } } // namespace mongo::sbe diff --git a/src/mongo/db/index/columns_access_method.cpp b/src/mongo/db/index/columns_access_method.cpp index ce0e5e7225b..015886f4e18 100644 --- a/src/mongo/db/index/columns_access_method.cpp +++ b/src/mongo/db/index/columns_access_method.cpp @@ -151,12 +151,20 @@ Status ColumnStoreAccessMethod::BulkBuilder::commit(OperationContext* opCtx, const RecordIdHandlerFn& onDuplicateRecord) { static constexpr size_t kBufferBlockSize = 1024; SharedBufferFragmentBuilder pooledBufferBuilder(kBufferBlockSize); - return _columnsAccess->insert(opCtx, - pooledBufferBuilder, - collection, - _deferredInserts, - InsertDeleteOptions{}, - &_keysInserted); + + WriteUnitOfWork wunit(opCtx); + auto status = _columnsAccess->insert(opCtx, + pooledBufferBuilder, + collection, + _deferredInserts, + InsertDeleteOptions{}, + &_keysInserted); + if (!status.isOK()) { + return status; + } + + wunit.commit(); + return Status::OK(); } void ColumnStoreAccessMethod::insertOne(OperationContext* opCtx, diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 9ca9ce66f56..fa810499cb4 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -218,9 +218,16 @@ void tryToAddColumnScan(const QueryPlannerParams& params, } // TODO SERVER-63123: Check if the columnar index actually provides the fields we need. - auto [filterSplitByColumn, residualPredicate] = - expression::splitMatchExpressionForColumns(query.root()); - auto canPushFilters = filterSplitByColumn.size() > 0; + std::unique_ptr<MatchExpression> residualPredicate; + StringMap<std::unique_ptr<MatchExpression>> filterSplitByColumn; + if (params.options & QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS) { + std::tie(filterSplitByColumn, residualPredicate) = + expression::splitMatchExpressionForColumns(query.root()); + } else { + residualPredicate = query.root()->shallowClone(); + } + const bool canPushFilters = filterSplitByColumn.size() > 0; + auto columnScan = std::make_unique<ColumnIndexScanNode>(params.columnarIndexes.front(), std::move(outputDeps.fields), std::move(filterDeps.fields), @@ -327,6 +334,9 @@ string optionString(size_t options) { case QueryPlannerParams::RETURN_OWNED_DATA: ss << "RETURN_OWNED_DATA "; break; + case QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS: + ss << "GENERATE_PER_COLUMN_FILTERS "; + break; case QueryPlannerParams::DEFAULT: MONGO_UNREACHABLE; break; diff --git a/src/mongo/db/query/query_planner_columnar_test.cpp b/src/mongo/db/query/query_planner_columnar_test.cpp index 679f6374383..7698a24a08e 100644 --- a/src/mongo/db/query/query_planner_columnar_test.cpp +++ b/src/mongo/db/query/query_planner_columnar_test.cpp @@ -67,8 +67,10 @@ protected: kInternalQueryMaxNumberOfFieldsToChooseFilteredColumnScanDefault); } - void addColumnarIndex() { + void addColumnarIndexAndEnableFilterSplitting() { params.columnarIndexes.emplace_back(kIndexName); + + params.options |= QueryPlannerParams::GENERATE_PER_COLUMN_FILTERS; } std::vector<std::unique_ptr<InnerPipelineStageInterface>> makeInnerPipelineStages( @@ -82,7 +84,7 @@ protected: }; TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1 << "_id" << 0)); @@ -99,7 +101,7 @@ TEST_F(QueryPlannerColumnarTest, InclusionProjectionUsesColumnarIndex) { } TEST_F(QueryPlannerColumnarTest, ExpressionProjectionUsesColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), fromjson(R"({ a: 1, @@ -124,7 +126,7 @@ TEST_F(QueryPlannerColumnarTest, ExpressionProjectionUsesColumnarIndex) { } TEST_F(QueryPlannerColumnarTest, ImplicitlyIncludedIdIsIncludedInProjectedFields) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << BSON("$gt" << 3)), BSONObj(), BSON("a" << 1)); @@ -144,7 +146,7 @@ TEST_F(QueryPlannerColumnarTest, ImplicitlyIncludedIdIsIncludedInProjectedFields } TEST_F(QueryPlannerColumnarTest, InclusionProjectionWithSortUsesColumnarIndexAndBlockingSort) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSONObj(), BSON("a" << 1), BSON("a" << 1 << "_id" << 0)); @@ -164,7 +166,7 @@ TEST_F(QueryPlannerColumnarTest, InclusionProjectionWithSortUsesColumnarIndexAnd } TEST_F(QueryPlannerColumnarTest, SortOnSeparateColumnAddsThatColumnToColumnScan) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSONObj(), BSON("b" << 1), BSON("a" << 1 << "_id" << 0)); @@ -184,7 +186,7 @@ TEST_F(QueryPlannerColumnarTest, SortOnSeparateColumnAddsThatColumnToColumnScan) } TEST_F(QueryPlannerColumnarTest, ExclusionProjectionDoesNotUseColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 0 << "_id" << 0)); assertNumSolutions(1U); @@ -192,7 +194,7 @@ TEST_F(QueryPlannerColumnarTest, ExclusionProjectionDoesNotUseColumnarIndex) { } TEST_F(QueryPlannerColumnarTest, NoProjectionDoesNotUseColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << 1), BSONObj(), BSONObj()); assertNumSolutions(1U); @@ -200,7 +202,7 @@ TEST_F(QueryPlannerColumnarTest, NoProjectionDoesNotUseColumnarIndex) { } TEST_F(QueryPlannerColumnarTest, ProjectionWithTooManyFieldsDoesNotUseColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "b" << 1 << "c" << 1)); @@ -209,7 +211,7 @@ TEST_F(QueryPlannerColumnarTest, ProjectionWithTooManyFieldsDoesNotUseColumnarIn } TEST_F(QueryPlannerColumnarTest, ExpressionProjectionWithTooManyFieldsDoesnotUseColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); // This will need 3 fields for the $concat, so should not be able to use a column scan. @@ -221,7 +223,7 @@ TEST_F(QueryPlannerColumnarTest, ExpressionProjectionWithTooManyFieldsDoesnotUse // Test with a number of fields equal to the limit. TEST_F(QueryPlannerColumnarTest, ImplicitIdCountsTowardsFieldLimit) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); runQuerySortProj( @@ -231,7 +233,7 @@ TEST_F(QueryPlannerColumnarTest, ImplicitIdCountsTowardsFieldLimit) { } TEST_F(QueryPlannerColumnarTest, ProjectionWithJustEnoughFieldsDoesUseColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); // Without the '_id' this should be eligible. @@ -242,7 +244,7 @@ TEST_F(QueryPlannerColumnarTest, ProjectionWithJustEnoughFieldsDoesUseColumnarIn } TEST_F(QueryPlannerColumnarTest, DottedProjectionTooManyFieldsDoesNotUseColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "b" << BSON("c" << 1 << "d" << 1))); @@ -252,7 +254,7 @@ TEST_F(QueryPlannerColumnarTest, DottedProjectionTooManyFieldsDoesNotUseColumnar TEST_F(QueryPlannerColumnarTest, ProjectionWithTooManyFieldsDoesNotUseColumnarIndexUnsupportedPredicate) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseUnfilteredColumnScan.store(2); runQuerySortProj(BSON("unsupported" << BSON("$exists" << false)), @@ -263,7 +265,7 @@ TEST_F(QueryPlannerColumnarTest, } TEST_F(QueryPlannerColumnarTest, StandardIndexPreferredOverColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); addIndex(BSON("a" << 1)); runQuerySortProj(BSON("a" << 5), BSONObj(), BSON("a" << 1 << "_id" << 0)); @@ -273,7 +275,7 @@ TEST_F(QueryPlannerColumnarTest, StandardIndexPreferredOverColumnarIndex) { } TEST_F(QueryPlannerColumnarTest, IneligiblePredicateNeedsToBeAppliedAfterAssembly) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << BSONNULL), BSONObj(), BSON("a" << 1 << "_id" << 0)); assertNumSolutions(1U); @@ -293,7 +295,7 @@ TEST_F(QueryPlannerColumnarTest, IneligiblePredicateNeedsToBeAppliedAfterAssembl } TEST_F(QueryPlannerColumnarTest, MultiplePredicatesAllowedWithColumnarIndex) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSON("a" << 2 << "b" << 3), BSONObj(), BSON("a" << 1 << "_id" << 0)); assertNumSolutions(1U); @@ -313,7 +315,7 @@ TEST_F(QueryPlannerColumnarTest, MultiplePredicatesAllowedWithColumnarIndex) { TEST_F(QueryPlannerColumnarTest, TooManyProjectedFieldsDisqualifiesColumnScanEvenWithEligiblePredicates) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(2); runQuerySortProj(BSON("a" << 2 << "b" << 3), BSONObj(), BSON("a" << 1 << "b" << 1 << "c" << 1)); @@ -322,7 +324,7 @@ TEST_F(QueryPlannerColumnarTest, } TEST_F(QueryPlannerColumnarTest, TooManyFilteredFieldsDisqualifiesColumnScan) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(2); runQuerySortProj(BSON("a" << 2 << "b" << 3 << "c" << 4), @@ -333,7 +335,7 @@ TEST_F(QueryPlannerColumnarTest, TooManyFilteredFieldsDisqualifiesColumnScan) { } TEST_F(QueryPlannerColumnarTest, FilterDependingOnWholeDocumentDisqualifiesColumnScan) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); // The projection only needs 1 field, but the match references '$$ROOT' so needs the whole // document. @@ -345,7 +347,7 @@ TEST_F(QueryPlannerColumnarTest, FilterDependingOnWholeDocumentDisqualifiesColum assertSolutionExists(R"({proj: {spec: {b: 1, _id: 0}, node: {cscan: {dir: 1}}}})"); } TEST_F(QueryPlannerColumnarTest, CombinationOfProjectedAndMatchedFieldsDisqualifiesColumnScan) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); // Neither the match nor the project mentions 4 fields, but together they exceed the threshhold. internalQueryMaxNumberOfFieldsToChooseFilteredColumnScan.store(4); @@ -357,7 +359,7 @@ TEST_F(QueryPlannerColumnarTest, CombinationOfProjectedAndMatchedFieldsDisqualif } TEST_F(QueryPlannerColumnarTest, NumberOfFieldsComputedUsingSetSize) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); // If there are 3 fields referenced in the match and 3 in the projection, but they overlap, we // should be OK to use column scan. @@ -380,7 +382,7 @@ TEST_F(QueryPlannerColumnarTest, NumberOfFieldsComputedUsingSetSize) { })"); } TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); auto complexPredicate = fromjson(R"({ a: {$gte: 0, $lt: 10}, @@ -410,7 +412,7 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitDemo) { } TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); // Same predicate as above, except with exists: false, which disqualifies the whole thing. auto complexPredicate = fromjson(R"({ @@ -445,7 +447,7 @@ TEST_F(QueryPlannerColumnarTest, ComplexPredicateSplitsIntoParts) { } TEST_F(QueryPlannerColumnarTest, EmptyQueryPredicateIsEligible) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); runQuerySortProj(BSONObj(), BSONObj(), BSON("a" << 1 << "_id" << 0)); assertNumSolutions(1U); @@ -458,7 +460,7 @@ TEST_F(QueryPlannerColumnarTest, EmptyQueryPredicateIsEligible) { } TEST_F(QueryPlannerColumnarTest, GroupTest) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); auto pipeline = Pipeline::parse({fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}}}")}, expCtx); @@ -493,7 +495,7 @@ TEST_F(QueryPlannerColumnarTest, GroupTest) { } TEST_F(QueryPlannerColumnarTest, MatchGroupTest) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); auto pipeline = Pipeline::parse({fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}}}")}, expCtx); @@ -524,7 +526,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupTest) { } TEST_F(QueryPlannerColumnarTest, MatchGroupWithOverlappingFieldsTest) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); auto pipeline = Pipeline::parse( {fromjson("{$group: {_id: '$foo', s: {$sum: '$x'}, name: {$first: '$name'}}}")}, expCtx); @@ -557,7 +559,7 @@ TEST_F(QueryPlannerColumnarTest, MatchGroupWithOverlappingFieldsTest) { } TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsIncluded) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); params.options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; params.shardKey = BSON("sk1" << 1 << "sk2.nested" << 1); @@ -587,7 +589,7 @@ TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsIncluded) { } TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsCountTowardsFieldLimit) { - addColumnarIndex(); + addColumnarIndexAndEnableFilterSplitting(); params.options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; params.shardKey = BSON("sk1" << 1 << "sk2.nested" << 1); @@ -613,4 +615,28 @@ TEST_F(QueryPlannerColumnarTest, ShardKeyFieldsCountTowardsFieldLimit) { } })"); } + +TEST_F(QueryPlannerColumnarTest, FullPredicateOption) { + params.columnarIndexes.emplace_back(kIndexName); + + // Filter that could be pushed down, but isn't due to the lack of the + // GENERATE_PER_COLUMN_FILTER flag. + auto predicate = fromjson(R"({ + specialAddress: {$exists: true}, + doNotContact: {$exists: true} + })"); + runQuerySortProj(predicate, BSONObj(), BSON("a" << 1 << "_id" << 0)); + assertSolutionExists(R"({ + proj: { + spec: {a: 1, _id: 0}, + node: { + column_scan: { + outputFields: ['a'], + postAssemblyFilter: { + specialAddress: {$exists: true}, + doNotContact: {$exists: true} + }, + matchFields: + ['specialAddress', 'doNotContact']}}}})"); +} } // namespace mongo diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 41d096d65d0..af643a632c8 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -139,6 +139,11 @@ struct QueryPlannerParams { // Ensure that any plan generated returns data that is "owned." That is, all BSONObjs are // in an "owned" state and are not pointing to data that belongs to the storage engine. RETURN_OWNED_DATA = 1 << 12, + + // When generating column scan queries, splits match expressions so that the filters can be + // applied per-column. This is off by default, since the execution side doesn't support it + // yet. + GENERATE_PER_COLUMN_FILTERS = 1 << 13, }; // See Options enum above. diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 5ec6d124c29..f1e1e3d6e74 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -870,6 +870,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder "'postAssemblyFilter' to be used instead.", !csn->filter); + tassert(6610251, "Expected no filters by path", csn->filtersByPath.empty()); + PlanStageSlots outputs; auto recordSlot = _slotIdGenerator.generate(); @@ -906,7 +908,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder slotMap[rootStr] = rowStoreSlot; auto abt = builder.generateABT(); auto exprOut = abt ? abtToExpr(*abt, slotMap) : emptyExpr->clone(); - auto stage = std::make_unique<sbe::ColumnScanStage>( + std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>( getCurrentCollection(reqs)->uuid(), csn->indexEntry.catalogName, fieldSlotIds, @@ -919,6 +921,22 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder _yieldPolicy, csn->nodeId()); + // Generate post assembly filter. + if (csn->postAssemblyFilter) { + auto relevantSlots = sbe::makeSV(recordSlot); + if (ridSlot) { + relevantSlots.push_back(*ridSlot); + } + relevantSlots.insert(relevantSlots.end(), fieldSlotIds.begin(), fieldSlotIds.end()); + + auto [_, outputStage] = generateFilter(_state, + csn->postAssemblyFilter.get(), + {std::move(stage), std::move(relevantSlots)}, + recordSlot, + csn->nodeId()); + stage = std::move(outputStage.stage); + } + return {std::move(stage), std::move(outputs)}; } diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h index 39d3dd384c9..52367496611 100644 --- a/src/mongo/db/storage/column_store.h +++ b/src/mongo/db/storage/column_store.h @@ -380,22 +380,29 @@ struct SplitCellView { bool hasDoubleNestedArrays = false; template <class ValueEncoder> - auto subcellValuesGenerator(ValueEncoder&& valEncoder) const { - struct Cursor { - using Out = typename std::remove_reference_t<ValueEncoder>::Out; - Out nextValue() { - if (elemPtr == end) - return Out(); - - invariant(elemPtr < end); - return decodeAndAdvance(elemPtr, encoder); - } + struct Cursor { + using Out = typename std::remove_reference_t<ValueEncoder>::Out; + Out nextValue() { + if (elemPtr == end) + return Out(); + + invariant(elemPtr < end); + return decodeAndAdvance(elemPtr, encoder); + } + bool hasNext() const { + return elemPtr != end; + } - const char* elemPtr; - const char* end; - ValueEncoder encoder; - }; - return Cursor{firstValuePtr, arrInfo.rawData(), std::forward<ValueEncoder>(valEncoder)}; + const char* elemPtr; + const char* end; + ValueEncoder encoder; + }; + + + template <class ValueEncoder> + auto subcellValuesGenerator(ValueEncoder&& valEncoder) const { + return Cursor<ValueEncoder>{ + firstValuePtr, arrInfo.rawData(), std::forward<ValueEncoder>(valEncoder)}; } static SplitCellView parse(CellView cell) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp index db5933605ab..9cd9acb83b9 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp @@ -127,7 +127,7 @@ std::string& WiredTigerColumnStore::makeKey(std::string& buffer, PathView path, // If we end up reserving more values, the above check should be changed. buffer += '\0'; } - rid.withFormat([](RecordId::Null) { /* do nothing */ }, + rid.withFormat([](RecordId::Null) { /* Do nothing. */ }, [&](int64_t num) { num = endian::nativeToBig(num); buffer.append(reinterpret_cast<const char*>(&num), sizeof(num)); @@ -234,6 +234,7 @@ void WiredTigerColumnStore::fullValidate(OperationContext* opCtx, int64_t* numKeysOut, IndexValidateResults* fullResults) const { // TODO SERVER-65484: Validation for column indexes. + // uasserted(ErrorCodes::NotImplemented, "WiredTigerColumnStore::fullValidate()"); return; } @@ -241,14 +242,17 @@ class WiredTigerColumnStore::Cursor final : public ColumnStore::Cursor, public WiredTigerIndexCursorGeneric { public: Cursor(OperationContext* opCtx, const WiredTigerColumnStore* idx) - : WiredTigerIndexCursorGeneric(opCtx, true /* forward */), _opCtx(opCtx), _idx(*idx) { + : WiredTigerIndexCursorGeneric(opCtx, true /* forward */), _idx(*idx) { _cursor.emplace(_idx.uri(), _idx._tableId, false, _opCtx); } boost::optional<FullCellView> next() override { - if (_eof) + if (_eof) { return {}; - if (!_lastMoveSkippedKey) + } + if (!_lastMoveSkippedKey) { advanceWTCursor(); + } + return curr(); } boost::optional<FullCellView> seekAtOrPast(PathView path, RecordId rid) override { @@ -303,7 +307,6 @@ public: private: void resetCursor() { WiredTigerIndexCursorGeneric::resetCursor(); - _eof = true; } bool seekWTCursor(bool exactOnly = false) { // Ensure an active transaction is open. @@ -377,8 +380,6 @@ private: // false by any operation that moves the cursor, other than subsequent save/restore pairs. bool _lastMoveSkippedKey = false; - OperationContext* _opCtx; - boost::optional<WiredTigerCursor> _cursor; const WiredTigerColumnStore& _idx; // not owned }; |