diff options
author | Dianna Hohensee <dianna.hohensee@mongodb.com> | 2022-12-21 17:27:52 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-12-21 17:58:18 +0000 |
commit | 42667e3c762bd83d700696ec4635e41fb8471c0f (patch) | |
tree | ce261b77ca80040883658b0e00afa14f515a28c1 | |
parent | 96eca5891fe7e7fa3cdc30950530249e2c109d31 (diff) | |
download | mongo-42667e3c762bd83d700696ec4635e41fb8471c0f.tar.gz |
SERVER-68377 Skip creating a dense column cursor during a column scan when an _id column cursor (also dense) is present
-rw-r--r-- | jstests/core/columnstore_index_correctness.js | 35 | ||||
-rw-r--r-- | jstests/noPassthroughWithMongod/column_scan_explain.js | 63 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.cpp | 85 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.h | 17 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 3 |
5 files changed, 133 insertions, 70 deletions
diff --git a/jstests/core/columnstore_index_correctness.js b/jstests/core/columnstore_index_correctness.js index 3ab0dd5b11a..1466324c98c 100644 --- a/jstests/core/columnstore_index_correctness.js +++ b/jstests/core/columnstore_index_correctness.js @@ -105,6 +105,41 @@ const coll = db.columnstore_index_correctness; } })(); +(function testColumnScanFindsAllDocumentsUsingDenseColumn() { + // Check that column scan projections without filters will return all the documents, leveraging + // dense columns internally regardless of whether _id is included or not. Internally, the dense + // RowId Column will be scanned if the dense _id field is not included in the query. Scanning a + // dense field column, which has a value present for every document in the collection, is + // necessary to find documents missing all projected fields -- in the following queries + // ultimately returning empty documents for such documents, rather than not at all. + + // Store documents where 'x' and 'y' fields are missing in some documents but present in other + // documents. This should cause the dense _id or internal RowId columns to be used to identify + // null field values. + const docs = [ + {_id: 0, x: 1, y: "fee"}, + {_id: 1}, + {_id: 2, x: 1}, + {_id: 3, y: "fii"}, + {_id: 4, x: 1, y: "foo"}, + {_id: 5}, + {_id: 6, x: 1} + ]; + coll.drop(); + assert.commandWorked(coll.insertMany(docs)); + assert.commandWorked(coll.createIndex({"$**": "columnstore"})); + + const findDocsWithoutID = coll.find({}, {_id: 0, "x": 1, "y": 1}).toArray(); + assert.eq(findDocsWithoutID.length, + docs.length, + `Unexpected number of documents: ${tojson(findDocsWithoutID)}`); + + const findDocsWithID = coll.find({}, {_id: 1, "x": 1, "y": 1}).toArray(); + assert.eq(findDocsWithID.length, + docs.length, + `Unexpected number of documents: ${tojson(findDocsWithID)}`); +})(); + // Multiple tests in this file use the same dataset. Intentionally not using _id as the unique // identifier, to avoid getting IDHACK plans when we query by it. const docs = [ diff --git a/jstests/noPassthroughWithMongod/column_scan_explain.js b/jstests/noPassthroughWithMongod/column_scan_explain.js index 97fff6a51ca..52e4d789503 100644 --- a/jstests/noPassthroughWithMongod/column_scan_explain.js +++ b/jstests/noPassthroughWithMongod/column_scan_explain.js @@ -8,8 +8,6 @@ "use strict"; load("jstests/aggregation/extras/utils.js"); // For assertArrayEq -load("jstests/libs/analyze_plan.js"); // For planHasStage. -load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. load("jstests/libs/sbe_explain_helpers.js"); // For getSbePlanStages. load("jstests/libs/columnstore_util.js"); // For setUpServerForColumnStoreIndexTest. @@ -34,7 +32,7 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); (function testScanOnTwoColumns() { const explain = coll.find({}, {x: 1, 'y.a': 1}).explain("executionStats"); - // Validate SBE part. + // Validate that the plan was SBE and using a ColumnScan. const columnScanStages = getSbePlanStages(explain, "columnscan"); assert.eq(columnScanStages.length, 1, `Could not find 'columnscan' stage: ${tojson(explain)}`); const columnScan = columnScanStages[0]; @@ -45,18 +43,17 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); extraErrorMsg: 'Paths used by column scan stage' }); - // Verifying column fields. + // Verify that the expected number of column fields were scanned. const columns = columnScan.columns; assert.eq( - Object.keys(columns).length, 4, `Should access 4 columns but accessed: ${tojson(columns)}`); + Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`); // We seek into each column once, when setting up the cursors. The dense column is the first // to hit EOF after iterating over all documents so other columns iterate at least one time // less. const expectedColumns = { - "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, - "x": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, + "_id": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": true}, + "x": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": true}, "y.a": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true} }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { @@ -79,8 +76,7 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all // columns. assert.eq(explain.executionStats.totalKeysExamined, - columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + - columns["_id"].numNexts + columns["_id"].numSeeks + columns["x"].numNexts + + columns["_id"].numNexts + columns["_id"].numSeeks + columns["x"].numNexts + columns["x"].numSeeks + columns["y.a"].numNexts + columns["y.a"].numSeeks + parentColumns["y"].numNexts + parentColumns["y"].numSeeks, `Mismatch in totalKeysExamined.`); @@ -96,14 +92,15 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); {"allFields": ["_id", "x", "y.a"], "extraFieldsPermitted": true}, false /* verbose */, null /* valueComparator */, - ["stage", "planNodeId"])); + ["stage", "planNodeId"]), + `Mismatching column scan plan stage ${tojson(columnScanPlanStages[0])}`); }()); // Test the explain output for a scan on a nonexistent field. (function testNonexistentField() { const explain = coll.find({}, {z: 1}).explain("executionStats"); - // Validate SBE part. + // Validate that the plan was SBE and using a ColumnScan. const columnScanStages = getSbePlanStages(explain, "columnscan"); assert.eq(columnScanStages.length, 1, `Could not find 'columnscan' stage: ${tojson(explain)}`); const columnScan = columnScanStages[0]; @@ -114,13 +111,12 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); extraErrorMsg: 'Paths used by column scan stage' }); - // Verifying column fields. + // Verify that the expected number of column fields were scanned. const columns = columnScan.columns; assert.eq( - Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`); + Object.keys(columns).length, 2, `Should access 2 columns but accessed: ${tojson(columns)}`); const expectedColumns = { - "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, + "_id": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": true}, "z": {"numNexts": 0, "numSeeks": 1, "usedInOutput": true}, }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { @@ -136,8 +132,7 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); // 'totalKeysExamined' should be equal to the sum of "next" and "seek" calls across all // columns. assert.eq(explain.executionStats.totalKeysExamined, - columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + - columns["_id"].numNexts + columns["_id"].numSeeks + columns["z"].numNexts + + columns["_id"].numNexts + columns["_id"].numSeeks + columns["z"].numNexts + columns["z"].numSeeks, `Mismatch in totalKeysExamined.`); @@ -152,31 +147,32 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); {"allFields": ["_id", "z"], "extraFieldsPermitted": false}, false /* verbose */, null /* valueComparator */, - ["stage", "planNodeId"])); + ["stage", "planNodeId"]), + `Mismatching column scan plan stage ${tojson(columnScanPlanStages[0])}`); }()); -// Test the explain output for a scan on a 2-level nested field. +// Test the explain output for a scan on a 2-level nested field; and exclude the _id field to +// exercise explain for the hidden dense RowId column. (function testMultipleNestedColumns() { - const explain = coll.find({}, {'y.b.c': 1}).explain("executionStats"); + const explain = coll.find({}, {'_id': 0, 'y.b.c': 1}).explain("executionStats"); - // Validate SBE part. + // Validate that the plan was SBE and using a ColumnScan. const columnScanStages = getSbePlanStages(explain, "columnscan"); assert.eq(columnScanStages.length, 1, `Could not find 'columnscan' stage: ${tojson(explain)}`); const columnScan = columnScanStages[0]; assertArrayEq({ actual: columnScan.paths, - expected: ["_id", "y.b.c"], + expected: ["y.b.c"], extraErrorMsg: 'Paths used by column scan stage' }); - // Verifying column fields. + // Verify that the expected number of column fields were scanned. const columns = columnScan.columns; assert.eq( - Object.keys(columns).length, 3, `Should access 3 columns but accessed: ${tojson(columns)}`); + Object.keys(columns).length, 2, `Should access 2 columns but accessed: ${tojson(columns)}`); const expectedColumns = { "<<RowId Column>>": {"numNexts": docs.length, "numSeeks": 1, "usedInOutput": false}, - "_id": {"numNexts": docs.length - 1, "numSeeks": 1, "usedInOutput": true}, "y.b.c": {"numNexts": 1, "numSeeks": 1, "usedInOutput": true}, }; for (const [columnName, expectedObj] of Object.entries(expectedColumns)) { @@ -205,10 +201,9 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); // columns. assert.eq(explain.executionStats.totalKeysExamined, columns["<<RowId Column>>"].numNexts + columns["<<RowId Column>>"].numSeeks + - columns["_id"].numNexts + columns["_id"].numSeeks + columns["y.b.c"].numNexts + - columns["y.b.c"].numSeeks + parentColumns["y.b"].numNexts + - parentColumns["y.b"].numSeeks + parentColumns["y"].numNexts + - parentColumns["y"].numSeeks, + columns["y.b.c"].numNexts + columns["y.b.c"].numSeeks + + parentColumns["y.b"].numNexts + parentColumns["y.b"].numSeeks + + parentColumns["y"].numNexts + parentColumns["y"].numSeeks, `Mismatch in totalKeysExamined.`); assert.eq(columnScan.numRowStoreFetches, 0, 'Mismatch in numRowStoreFetches'); @@ -219,10 +214,11 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); assert.eq( columnScanPlanStages.length, 1, `Could not find 'COLUMN_SCAN' stage: ${tojson(explain)}`); assert(documentEq(columnScanPlanStages[0], - {"allFields": ["_id", "y.b.c"], "extraFieldsPermitted": true}, + {"allFields": ["y.b.c"], "extraFieldsPermitted": true}, false /* verbose */, null /* valueComparator */, - ["stage", "planNodeId"])); + ["stage", "planNodeId"]), + `Mismatching column scan plan stage ${tojson(columnScanPlanStages[0])}`); }()); // Test fallback to the row store. @@ -268,6 +264,7 @@ assert.commandWorked(coll.insertMany(docs, {ordered: false})); }, false /* verbose */, null /* valueComparator */, - ["stage", "planNodeId"])); + ["stage", "planNodeId"]), + `Mismatching column scan plan stage ${tojson(columnScanPlanStages[0])}`); }()); }()); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp index 602531ee570..c21f16246f6 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp @@ -37,6 +37,7 @@ namespace sbe { ColumnScanStage::ColumnScanStage(UUID collectionUuid, StringData columnIndexName, std::vector<std::string> paths, + bool densePathIncludedInScan, std::vector<bool> includeInOutput, boost::optional<value::SlotId> recordIdSlot, boost::optional<value::SlotId> reconstuctedRecordSlot, @@ -55,7 +56,8 @@ ColumnScanStage::ColumnScanStage(UUID collectionUuid, _reconstructedRecordSlot(reconstuctedRecordSlot), _rowStoreSlot(rowStoreSlot), _rowStoreExpr(std::move(rowStoreExpr)), - _filteredPaths(std::move(filteredPaths)) { + _filteredPaths(std::move(filteredPaths)), + _densePathIncludedInScan(densePathIncludedInScan) { invariant(_filteredPaths.size() <= _paths.size(), "Filtered paths should be a subset of all paths"); invariant(_paths.size() == _includeInOutput.size()); @@ -69,6 +71,7 @@ std::unique_ptr<PlanStage> ColumnScanStage::clone() const { return std::make_unique<ColumnScanStage>(_collUuid, _columnIndexName, _paths, + _densePathIncludedInScan, _includeInOutput, _recordIdSlot, _reconstructedRecordSlot, @@ -138,9 +141,9 @@ value::SlotAccessor* ColumnScanStage::getAccessor(CompileCtx& ctx, value::SlotId } void ColumnScanStage::doSaveState(bool relinquishCursor) { - if (_denseColumnCursor) { - _denseColumnCursor->makeOwned(); - _denseColumnCursor->cursor().save(); + if (_recordIdColumnCursor) { + _recordIdColumnCursor->makeOwned(); + _recordIdColumnCursor->cursor().save(); } for (auto& cursor : _columnCursors) { @@ -187,8 +190,8 @@ void ColumnScanStage::doRestoreState(bool relinquishCursor) { } } - if (_denseColumnCursor) { - _denseColumnCursor->cursor().restore(); + if (_recordIdColumnCursor) { + _recordIdColumnCursor->cursor().restore(); } for (auto& cursor : _columnCursors) { cursor.cursor().restore(); @@ -202,8 +205,8 @@ void ColumnScanStage::doDetachFromOperationContext() { if (_rowStoreCursor) { _rowStoreCursor->detachFromOperationContext(); } - if (_denseColumnCursor) { - _denseColumnCursor->cursor().detachFromOperationContext(); + if (_recordIdColumnCursor) { + _recordIdColumnCursor->cursor().detachFromOperationContext(); } for (auto& cursor : _columnCursors) { cursor.cursor().detachFromOperationContext(); @@ -217,8 +220,8 @@ void ColumnScanStage::doAttachToOperationContext(OperationContext* opCtx) { if (_rowStoreCursor) { _rowStoreCursor->reattachToOperationContext(opCtx); } - if (_denseColumnCursor) { - _denseColumnCursor->cursor().reattachToOperationContext(opCtx); + if (_recordIdColumnCursor) { + _recordIdColumnCursor->cursor().reattachToOperationContext(opCtx); } for (auto& cursor : _columnCursors) { cursor.cursor().reattachToOperationContext(opCtx); @@ -273,21 +276,22 @@ void ColumnScanStage::open(bool reOpen) { auto iam = static_cast<ColumnStoreAccessMethod*>(entry->accessMethod()); - // The dense _recordId column is only needed if there are no filters (TODO SERVER-68377: - // eventually we can avoid including this column for the cases where a known dense column - // such as _id is being read anyway). - if (_filteredPaths.empty()) { - _denseColumnCursor = std::make_unique<ColumnCursor>( - iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath), - _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), - false /*includeInOutput*/)); - } for (size_t i = 0; i < _paths.size(); i++) { _columnCursors.emplace_back( iam->storage()->newCursor(_opCtx, _paths[i]), _specificStats.cursorStats.emplace_back(_paths[i], _includeInOutput[i])); } + + // The dense _recordId column is only needed if there are no filters and no dense field is + // being scanned already for the query. + if (_filteredPaths.empty() && !_densePathIncludedInScan) { + _recordIdColumnCursor = std::make_unique<ColumnCursor>( + iam->storage()->newCursor(_opCtx, ColumnStore::kRowIdPath), + _specificStats.cursorStats.emplace_back(ColumnStore::kRowIdPath.toString(), + false /*includeInOutput*/)); + } } + _rowId = ColumnStore::kNullRowId; _open = true; } @@ -430,10 +434,10 @@ RowId ColumnScanStage::findNextRowIdForFilteredColumns() { } RowId ColumnScanStage::findMinRowId() const { - if (_denseColumnCursor) { - // The cursor of the dense column cannot be ahead of any other, so it's always at the - // minimum. - auto& result = _denseColumnCursor->lastCell(); + if (_recordIdColumnCursor) { + // The cursor of the dense _recordId column cannot be ahead of any other (there are no + // filters on it to move it forward arbitrarily), so it's always at the minimum. + auto& result = _recordIdColumnCursor->lastCell(); if (!result) { return ColumnStore::kNullRowId; } @@ -450,10 +454,20 @@ RowId ColumnScanStage::findMinRowId() const { return recordId; } +/** + * When called for the first time, initializes all the column cursors to the beginning of their + * respective columns. On subsequent calls, if path filters are present, forwards all cursors to + * their next filter match. If no filters are present, cursors are stepped forward passed the + * current _rowId, if necessary: there may be gaps in columns, putting one cursor far ahead of the + * others in past cursor advancement. + * + * Returns the lowest RowId across cursors if there are no filtered paths; otherwise the RowId of + * the current cursors' position where all filters match. + */ RowId ColumnScanStage::advanceCursors() { if (_rowId == ColumnStore::kNullRowId) { - if (_denseColumnCursor) { - _denseColumnCursor->seekAtOrPast(ColumnStore::kNullRowId); + if (_recordIdColumnCursor) { + _recordIdColumnCursor->seekAtOrPast(ColumnStore::kNullRowId); } for (auto& columnCursor : _columnCursors) { columnCursor.seekAtOrPast(ColumnStore::kNullRowId); @@ -468,15 +482,17 @@ RowId ColumnScanStage::advanceCursors() { return findNextRowIdForFilteredColumns(); } + /* no filtered paths */ + // In absence of filters all cursors iterate forward on their own. Some of the cursors might - // be ahead of the current '_rowId' because there are gaps in their columns - don't move them - // but only those that are at '_rowId' and therefore their values have been consumed. - // While at it, compute the new min row ID. auto nextRecordId = RecordId(); + // be ahead of the current '_rowId' because there are gaps in their columns: don't move them, + // unless they are at '_rowId' and therefore their values have been consumed. + // While at it, compute the new min row ID. auto nextRowId = ColumnStore::kNullRowId; - if (_denseColumnCursor) { - invariant(_denseColumnCursor->lastCell()->rid == _rowId, - "Dense cursor should always be at the current minimum record ID"); - auto cell = _denseColumnCursor->next(); + if (_recordIdColumnCursor) { + invariant(_recordIdColumnCursor->lastCell()->rid == _rowId, + "The dense _recordId cursor should always be at the current minimum record ID"); + auto cell = _recordIdColumnCursor->next(); if (!cell) { return ColumnStore::kNullRowId; } @@ -491,7 +507,8 @@ RowId ColumnScanStage::advanceCursors() { cell = cursor.next(); } if (cell && (nextRowId == ColumnStore::kNullRowId || cell->rid < nextRowId)) { - invariant(!_denseColumnCursor, "Dense cursor should have the next lowest record ID"); + invariant(!_recordIdColumnCursor, + "The dense _recordId cursor should have the next lowest record ID"); nextRowId = cell->rid; } } @@ -608,7 +625,7 @@ void ColumnScanStage::close() { _coll.reset(); _columnCursors.clear(); _parentPathCursors.clear(); - _denseColumnCursor.reset(); + _recordIdColumnCursor.reset(); _open = false; } diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h index 004c21520ba..5bb185b106a 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.h +++ b/src/mongo/db/exec/sbe/stages/column_scan.h @@ -74,6 +74,7 @@ public: ColumnScanStage(UUID collectionUuid, StringData columnIndexName, std::vector<std::string> paths, + bool densePathIncludedInScan, std::vector<bool> includeInOutput, boost::optional<value::SlotId> recordIdSlot, boost::optional<value::SlotId> reconstructedRecordSlot, @@ -291,9 +292,19 @@ private: // Cursors to simultaneously read from the sections of the index for each path. std::vector<ColumnCursor> _columnCursors; StringMap<std::unique_ptr<ColumnCursor>> _parentPathCursors; - // Dense column contains record ids for all records. It is necessary to support projection - // semantics for missing values on paths. - std::unique_ptr<ColumnCursor> _denseColumnCursor; + + // A dense column contains records for all documents in the collection. It is sometimes + // necessary to support projection semantics for missing values on paths. If a dense path is not + // specified to the constructor, noted in '_densePathIncludedInScan', and there are no pushed + // down filters (_filteredPaths), then a cursor will be implicitly opened against the dense + // _recordId column. + std::unique_ptr<ColumnCursor> _recordIdColumnCursor; + + // 'densePathIncludedInScan' indicates whether there is a path present in 'paths' that is + // expected to be present for every document in the collection. This avoids the extra cost of + // iterating the _recordId dense column to ensure all null values for a column are observed. + bool _densePathIncludedInScan = false; + // Cursor into the associated row store. std::unique_ptr<SeekableRecordCursor> _rowStoreCursor; diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 0539bce9ef4..c1945066c88 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -765,8 +765,10 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // column_scan stage defines the order of fields in the reconstructed record). std::vector<std::string> paths; paths.reserve(csn->allFields.size()); + bool densePathIncludeInFields = false; if (csn->allFields.find("_id") != csn->allFields.end()) { paths.push_back("_id"); + densePathIncludeInFields = true; } for (const auto& path : csn->allFields) { if (path != "_id") { @@ -838,6 +840,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(), csn->indexEntry.identifier.catalogName, std::move(paths), + densePathIncludeInFields, std::move(includeInOutput), ridSlot, reconstructedRecordSlot, |