diff options
author | Parker Felix <parker.felix@mongodb.com> | 2022-08-17 21:28:48 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-18 00:46:55 +0000 |
commit | 582fc560a31731a674e8e0bfac78981aff2b9fe8 (patch) | |
tree | e4cd7c8097f8126c748e4ec7c6cb6c197d6439b0 | |
parent | 8ea624563847736c94f0e500d3097557ab4d8315 (diff) | |
download | mongo-582fc560a31731a674e8e0bfac78981aff2b9fe8.tar.gz |
SERVER-67623 Use int64_t instead of RecordId in column index read path
-rw-r--r-- | jstests/noPassthrough/column_store_index_load.js | 2 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.cpp | 91 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.h | 22 | ||||
-rw-r--r-- | src/mongo/db/index/column_store_sorter.cpp | 23 | ||||
-rw-r--r-- | src/mongo/db/index/column_store_sorter.h | 8 | ||||
-rw-r--r-- | src/mongo/db/index/column_store_sorter_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/index/columns_access_method.cpp | 35 | ||||
-rw-r--r-- | src/mongo/db/storage/column_store.h | 34 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_column_store.h | 12 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_column_store_test.cpp | 4 |
11 files changed, 145 insertions, 140 deletions
diff --git a/jstests/noPassthrough/column_store_index_load.js b/jstests/noPassthrough/column_store_index_load.js index 92baccbe06e..f0e194e2442 100644 --- a/jstests/noPassthrough/column_store_index_load.js +++ b/jstests/noPassthrough/column_store_index_load.js @@ -81,7 +81,7 @@ const testProjection = { const maxMemUsageBytes = 20000; const numDocs = testDocs.length; -const approxDocSize = 800; +const approxDocSize = 500; const approxMemoryUsage = numDocs * approxDocSize; const expectedSpilledRanges = Math.ceil(approxMemoryUsage / maxMemUsageBytes); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp index addf5fe25e3..66da5619ceb 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp @@ -303,7 +303,7 @@ void ColumnScanStage::open(bool reOpen) { _specificStats.cursorStats.emplace_back(_paths[i], _includeInOutput[i])); } } - _recordId = RecordId(); + _rowId = ColumnStore::kNullRowId; _open = true; } @@ -345,7 +345,7 @@ void ColumnScanStage::readParentsIntoObj(StringData path, } boost::optional<SplitCellView> splitCellView; - if (auto optCell = it->second->seekExact(_recordId)) { + if (auto optCell = it->second->seekExact(_rowId)) { splitCellView = SplitCellView::parse(optCell->value); } @@ -468,17 +468,17 @@ bool ColumnScanStage::checkFilter(CellView cell, size_t filterIndex, const PathV return false; } -RecordId ColumnScanStage::findNextRecordIdForFilteredColumns() { +RowId ColumnScanStage::findNextRowIdForFilteredColumns() { invariant(!_filteredPaths.empty()); // Initialize 'targetRecordId' from the filtered cursor we are currently iterating. - RecordId targetRecordId; + RowId targetRowId; { auto& cursor = cursorForFilteredPath(_filteredPaths[_nextUnmatched]); if (!cursor.lastCell()) { - return RecordId(); // Have exhausted one of the columns. + return ColumnStore::kNullRowId; // Have exhausted one of the columns. } - targetRecordId = cursor.lastCell()->rid; + targetRowId = cursor.lastCell()->rid; } size_t matchedSinceAdvance = 0; @@ -491,17 +491,17 @@ RecordId ColumnScanStage::findNextRecordIdForFilteredColumns() { // Avoid seeking into the column that we started with. auto& result = cursor.lastCell(); - if (result && result->rid < targetRecordId) { - result = cursor.seekAtOrPast(targetRecordId); + if (result && result->rid < targetRowId) { + result = cursor.seekAtOrPast(targetRowId); } if (!result) { - return RecordId(); + return ColumnStore::kNullRowId; } - if (result->rid > targetRecordId) { + if (result->rid > targetRowId) { // The column skipped ahead - have to restart at this new record ID. matchedSinceAdvance = 0; - targetRecordId = result->rid; + targetRowId = result->rid; } if (!checkFilter(result->value, _nextUnmatched, cursor.path())) { @@ -509,97 +509,97 @@ RecordId ColumnScanStage::findNextRecordIdForFilteredColumns() { do { result = cursor.next(); if (!result) { - return RecordId(); + return ColumnStore::kNullRowId; } } while (!checkFilter(result->value, _nextUnmatched, cursor.path())); matchedSinceAdvance = 0; - invariant(result->rid > targetRecordId); - targetRecordId = result->rid; + invariant(result->rid > targetRowId); + targetRowId = result->rid; } ++matchedSinceAdvance; _nextUnmatched = (_nextUnmatched + 1) % _filteredPaths.size(); } - invariant(!targetRecordId.isNull()); + invariant(targetRowId != ColumnStore::kNullRowId); // Ensure that _all_ cursors have caugth up with the filtered record ID. Some of the cursors // might skip ahead, which would mean the column is missing a value for this 'recordId'. for (auto& cursor : _columnCursors) { const auto& result = cursor.lastCell(); - if (result && result->rid < targetRecordId) { - cursor.seekAtOrPast(targetRecordId); + if (result && result->rid < targetRowId) { + cursor.seekAtOrPast(targetRowId); } } - return targetRecordId; + return targetRowId; } -RecordId ColumnScanStage::findMinRecordId() const { +RowId ColumnScanStage::findMinRowId() const { if (_denseColumnCursor) { // The cursor of the dense column cannot be ahead of any other, so it's always at the // minimum. auto& result = _denseColumnCursor->lastCell(); if (!result) { - return RecordId(); + return ColumnStore::kNullRowId; } return result->rid; } - auto recordId = RecordId(); + auto recordId = ColumnStore::kNullRowId; for (const auto& cursor : _columnCursors) { const auto& result = cursor.lastCell(); - if (result && (recordId.isNull() || result->rid < recordId)) { + if (result && (recordId == ColumnStore::kNullRowId || result->rid < recordId)) { recordId = result->rid; } } return recordId; } -RecordId ColumnScanStage::advanceCursors() { - if (_recordId.isNull()) { +RowId ColumnScanStage::advanceCursors() { + if (_rowId == ColumnStore::kNullRowId) { if (_denseColumnCursor) { - _denseColumnCursor->seekAtOrPast(RecordId()); + _denseColumnCursor->seekAtOrPast(ColumnStore::kNullRowId); } for (auto& columnCursor : _columnCursors) { - columnCursor.seekAtOrPast(RecordId()); + columnCursor.seekAtOrPast(ColumnStore::kNullRowId); } - return _filteredPaths.empty() ? findMinRecordId() : findNextRecordIdForFilteredColumns(); + return _filteredPaths.empty() ? findMinRowId() : findNextRowIdForFilteredColumns(); } if (!_filteredPaths.empty()) { // Nudge forward the "active" filtered cursor. The remaining ones will be synchronized // by 'findNextRecordIdForFilteredColumns()'. cursorForFilteredPath(_filteredPaths[_nextUnmatched]).next(); - return findNextRecordIdForFilteredColumns(); + return findNextRowIdForFilteredColumns(); } // In absence of filters all cursors iterate forward on their own. Some of the cursors might - // be ahead of the current '_recordId' because there are gaps in their columns - don't move - // them but only those that are at '_recordId' and therefore their values have been - // consumed. While at it, compute the new min record ID. - auto nextRecordId = RecordId(); + // be ahead of the current '_rowId' because there are gaps in their columns - don't move them + // but only those that are at '_rowId' and therefore their values have been consumed. + // While at it, compute the new min row ID. auto nextRecordId = RecordId(); + auto nextRowId = ColumnStore::kNullRowId; if (_denseColumnCursor) { - invariant(_denseColumnCursor->lastCell()->rid == _recordId, + invariant(_denseColumnCursor->lastCell()->rid == _rowId, "Dense cursor should always be at the current minimum record ID"); auto cell = _denseColumnCursor->next(); if (!cell) { - return RecordId(); + return ColumnStore::kNullRowId; } - nextRecordId = cell->rid; + nextRowId = cell->rid; } for (auto& cursor : _columnCursors) { auto& cell = cursor.lastCell(); if (!cell) { continue; // this column has been exhausted } - if (cell->rid == _recordId) { + if (cell->rid == _rowId) { cell = cursor.next(); } - if (cell && (nextRecordId.isNull() || cell->rid < nextRecordId)) { + if (cell && (nextRowId == ColumnStore::kNullRowId || cell->rid < nextRowId)) { invariant(!_denseColumnCursor, "Dense cursor should have the next lowest record ID"); - nextRecordId = cell->rid; + nextRowId = cell->rid; } } - return nextRecordId; + return nextRowId; } PlanState ColumnScanStage::getNext() { @@ -612,8 +612,8 @@ PlanState ColumnScanStage::getNext() { checkForInterrupt(_opCtx); - _recordId = advanceCursors(); - if (_recordId.isNull()) { + _rowId = advanceCursors(); + if (_rowId == ColumnStore::kNullRowId) { return trackPlanState(PlanState::IS_EOF); } @@ -632,7 +632,7 @@ PlanState ColumnScanStage::getNext() { auto& lastCell = cursor.lastCell(); boost::optional<SplitCellView> splitCellView; - if (lastCell && lastCell->rid == _recordId) { + if (lastCell && lastCell->rid == _rowId) { splitCellView = SplitCellView::parse(lastCell->value); } @@ -658,9 +658,9 @@ PlanState ColumnScanStage::getNext() { if (useRowStore) { ++_specificStats.numRowStoreFetches; - // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead - // do a next() which should be much cheaper. - auto record = _rowStoreCursor->seekExact(_recordId); + // TODO: In some cases we can avoid calling seek() on the row store cursor, and instead do + // a next() which should be much cheaper. + auto record = _rowStoreCursor->seekExact(RecordId(_rowId)); // If there's no record, the index is out of sync with the row store. invariant(record); @@ -684,6 +684,7 @@ PlanState ColumnScanStage::getNext() { } if (_recordIdAccessor) { + _recordId = RecordId(_rowId); _recordIdAccessor->reset( false, value::TypeTags::RecordId, value::bitcastFrom<RecordId*>(&_recordId)); } diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h index 7d30152f46d..71f8489dfb3 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.h +++ b/src/mongo/db/exec/sbe/stages/column_scan.h @@ -38,6 +38,7 @@ namespace mongo { namespace sbe { + /** * A stage that scans provided columnar index. * @@ -127,17 +128,17 @@ private: return _lastCell; } - boost::optional<FullCellView>& seekAtOrPast(RecordId id) { + boost::optional<FullCellView>& seekAtOrPast(RowId rid) { _lastCell.reset(); - _lastCell = _cursor->seekAtOrPast(id); + _lastCell = _cursor->seekAtOrPast(rid); clearOwned(); ++_stats.numSeeks; return _lastCell; } - boost::optional<FullCellView>& seekExact(RecordId id) { + boost::optional<FullCellView>& seekExact(RowId rid) { _lastCell.reset(); - _lastCell = _cursor->seekExact(id); + _lastCell = _cursor->seekExact(rid); clearOwned(); ++_stats.numSeeks; return _lastCell; @@ -205,18 +206,18 @@ private: bool checkFilter(CellView cell, size_t filterIndex, const PathValue& path); - // Finds the smallest record ID such that: - // 1) it is greater or equal to the record ID of all filtered columns cursors prior to the call; + // Finds the smallest row ID such that: + // 1) it is greater or equal to the row ID of all filtered columns cursors prior to the call; // 2) the record with this ID passes the filters of all filtered columns. - // Ensures that the cursors are set to this record ID unless it's missing in the column (which + // Ensures that the cursors are set to this row ID unless it's missing in the column (which // is only possible for the non-filtered columns). - RecordId findNextRecordIdForFilteredColumns(); + RowId findNextRowIdForFilteredColumns(); // Finds the lowest record ID across all cursors. Doesn't move any of the cursors. - RecordId findMinRecordId() const; + RowId findMinRowId() const; // Move cursors to the next record to be processed. - RecordId advanceCursors(); + RowId advanceCursors(); // The columnar index this stage is scanning and the associated row store collection. const UUID _collUuid; @@ -237,6 +238,7 @@ private: // The record id in the row store that is used to connect the per-path entries in the columnar // index and to retrieve the full record from the row store, if necessary. RecordId _recordId; + RowId _rowId; const boost::optional<value::SlotId> _recordIdSlot; // The object that is equivalent to the record from the associated row store when accessing diff --git a/src/mongo/db/index/column_store_sorter.cpp b/src/mongo/db/index/column_store_sorter.cpp index 8a2886d9b92..50285f00d74 100644 --- a/src/mongo/db/index/column_store_sorter.cpp +++ b/src/mongo/db/index/column_store_sorter.cpp @@ -41,7 +41,9 @@ struct ComparisonForPathAndRid { const std::pair<ColumnStoreSorter::Key, ColumnStoreSorter::Value>& right) const { auto stringComparison = left.first.path.compare(right.first.path); return (stringComparison != 0) ? stringComparison - : left.first.recordId.compare(right.first.recordId); + : ((left.first.rowId == right.first.rowId) + ? 0 + : (left.first.rowId > right.first.rowId ? 1 : -1)); } }; @@ -49,20 +51,20 @@ bool ColumnStoreSorter::Key::operator<(const Key& other) const { if (auto cmp = path.compare(other.path); cmp != 0) { return cmp < 0; } else { - return recordId < other.recordId; + return rowId < other.rowId; } } void ColumnStoreSorter::Key::serializeForSorter(BufBuilder& buf) const { buf.appendStr(path); - recordId.serializeToken(buf); + buf.appendNum(rowId); } ColumnStoreSorter::Key ColumnStoreSorter::Key::deserializeForSorter( BufReader& buf, ColumnStoreSorter::Key::SorterDeserializeSettings) { // Note: unlike function call parameters, the order of evaluation for initializer // parameters is defined. - return {buf.readCStr(), RecordId::deserializeToken(buf)}; + return {buf.readCStr(), buf.read<LittleEndian<int64_t>>()}; } void ColumnStoreSorter::Value::serializeForSorter(BufBuilder& buf) const { @@ -117,7 +119,7 @@ ColumnStoreSorter::ColumnStoreSorter(size_t maxMemoryUsageBytes, }); } -void ColumnStoreSorter::add(PathView path, const RecordId& recordId, CellView cellContents) { +void ColumnStoreSorter::add(PathView path, RowId rowId, CellView cellContents) { auto& cellListAtPath = _dataByPath[path]; if (cellListAtPath.empty()) { // Track memory usage of this new path. @@ -127,11 +129,10 @@ void ColumnStoreSorter::add(PathView path, const RecordId& recordId, CellView ce // The sorter assumes that RecordIds are added in sorted order. tassert(6548102, "Out-of-order record during columnar index build", - cellListAtPath.empty() || cellListAtPath.back().first < recordId); + cellListAtPath.empty() || cellListAtPath.back().first < rowId); - cellListAtPath.emplace_back(recordId, CellValue(cellContents.rawData(), cellContents.size())); - _memUsed += cellListAtPath.back().first.memUsage() + sizeof(CellValue) + - cellListAtPath.back().second.size(); + cellListAtPath.emplace_back(rowId, CellValue(cellContents.rawData(), cellContents.size())); + _memUsed += sizeof(RowId) + sizeof(CellValue) + cellListAtPath.back().second.size(); if (_memUsed > _maxMemoryUsageBytes) { spill(); } @@ -184,7 +185,7 @@ void ColumnStoreSorter::spill() { size_t cellVectorSize = std::accumulate( cellVector.begin(), cellVector.end(), 0, [& path = path](size_t sum, auto& ridAndCell) { - return sum + path.size() + ridAndCell.first.memUsage() + ridAndCell.second.size(); + return sum + path.size() + sizeof(RowId) + ridAndCell.second.size(); }); // Add (path, rid, cell) records to the spill file so that the first cell in each contiguous @@ -222,7 +223,7 @@ void ColumnStoreSorter::spill() { } for (auto& ridAndCell : cellVector) { const auto& cell = ridAndCell.second; - currentChunkSize += path.size() + ridAndCell.first.memUsage() + cell.size(); + currentChunkSize += path.size() + sizeof(RowId) + cell.size(); writer.addAlreadySorted(Key{path, ridAndCell.first}, Value{CellView{cell.c_str(), cell.size()}}); diff --git a/src/mongo/db/index/column_store_sorter.h b/src/mongo/db/index/column_store_sorter.h index ec9859fdac1..3bdf9f72b07 100644 --- a/src/mongo/db/index/column_store_sorter.h +++ b/src/mongo/db/index/column_store_sorter.h @@ -64,11 +64,11 @@ public: const std::vector<SorterRange>& ranges, SorterTracker* tracker = nullptr); - void add(PathView path, const RecordId& recordId, CellView cellContents); + void add(PathView path, RowId rowId, CellView cellContents); struct Key { PathView path; - RecordId recordId; + RowId rowId; struct SorterDeserializeSettings {}; @@ -80,7 +80,7 @@ public: static Key deserializeForSorter(BufReader& buf, SorterDeserializeSettings); size_t memUsageForSorter() const { - return sizeof(path) + path.size() + recordId.memUsage(); + return sizeof(path) + path.size() + sizeof(rowId); } Key getOwned() const { @@ -130,7 +130,7 @@ private: /** * Mapping from path name to the sorted list of (RecordId, Cell) pairs. */ - using CellVector = std::vector<std::pair<RecordId, CellValue>>; + using CellVector = std::vector<std::pair<RowId, CellValue>>; StringMap<CellVector> _dataByPath; std::shared_ptr<Sorter<Key, Value>::File> _spillFile; diff --git a/src/mongo/db/index/column_store_sorter_test.cpp b/src/mongo/db/index/column_store_sorter_test.cpp index 57534ed4b78..57253023287 100644 --- a/src/mongo/db/index/column_store_sorter_test.cpp +++ b/src/mongo/db/index/column_store_sorter_test.cpp @@ -84,8 +84,8 @@ TEST(ColumnStoreSorter, SortTest) { // that cell contents travel with the (Field name, RecordId) key. The null-byte // delimiter tests that the sorter correctly stores cells with internal null bytes. std::string cell = str::stream() << fieldName << "\0" << i; - inMemorySorter->add(fieldName, RecordId(i), cell); - externalSorter->add(fieldName, RecordId(i), cell); + inMemorySorter->add(fieldName, RowId(i), cell); + externalSorter->add(fieldName, RowId(i), cell); } } @@ -100,7 +100,7 @@ TEST(ColumnStoreSorter, SortTest) { auto [columnKey, columnValue] = sortedItInMemory->next(); ASSERT_EQ(expected.first, columnKey.path); - ASSERT_EQ(RecordId(expected.second), columnKey.recordId); + ASSERT_EQ(expected.second, columnKey.rowId); ASSERT_EQ(expectedCell, columnValue.cell); } @@ -109,7 +109,7 @@ TEST(ColumnStoreSorter, SortTest) { auto [columnKey, columnValue] = sortedItExternal->next(); ASSERT_EQ(expected.first, columnKey.path); - ASSERT_EQ(RecordId(expected.second), columnKey.recordId); + ASSERT_EQ(expected.second, columnKey.rowId); ASSERT_EQ(expectedCell, columnValue.cell); } } diff --git a/src/mongo/db/index/columns_access_method.cpp b/src/mongo/db/index/columns_access_method.cpp index 231efe1f447..d1ef1f73b62 100644 --- a/src/mongo/db/index/columns_access_method.cpp +++ b/src/mongo/db/index/columns_access_method.cpp @@ -152,7 +152,8 @@ Status ColumnStoreAccessMethod::BulkBuilder::insert( obj, [&](PathView path, const column_keygen::UnencodedCellView& cell) { _cellBuilder.reset(); writeEncodedCell(cell, &_cellBuilder); - _sorter.add(path, rid, CellView(_cellBuilder.buf(), _cellBuilder.len())); + tassert(6762300, "RecordID cannot be a string for column store indexes", !rid.isStr()); + _sorter.add(path, rid.getLong(), CellView(_cellBuilder.buf(), _cellBuilder.len())); ++_keysInserted; }); @@ -208,7 +209,7 @@ Status ColumnStoreAccessMethod::BulkBuilder::commit(OperationContext* opCtx, auto builder = _columnsAccess->_store->makeBulkBuilder(opCtx); int64_t iterations = 0; - boost::optional<std::pair<PathValue, RecordId>> previousPathAndRecordId; + boost::optional<std::pair<PathValue, RowId>> previousPathAndRowId; std::unique_ptr<ColumnStoreSorter::Iterator> it(_sorter.done()); while (it->more()) { opCtx->checkForInterrupt(); @@ -219,29 +220,28 @@ Status ColumnStoreAccessMethod::BulkBuilder::commit(OperationContext* opCtx, // In debug mode only, assert that keys are retrieved from the sorter in strictly increasing // order. if (kDebugBuild) { - if (previousPathAndRecordId && - !(ColumnStoreSorter::Key{previousPathAndRecordId->first, - previousPathAndRecordId->second} < key)) { + if (previousPathAndRowId && + !(ColumnStoreSorter::Key{previousPathAndRowId->first, + previousPathAndRowId->second} < key)) { LOGV2_FATAL_NOTRACE(6548100, "Out-of-order result from sorter for column store bulk loader", - "prevPathName"_attr = previousPathAndRecordId->first, - "prevRecordId"_attr = previousPathAndRecordId->second, + "prevPathName"_attr = previousPathAndRowId->first, + "prevRecordId"_attr = previousPathAndRowId->second, "nextPathName"_attr = key.path, - "nextRecordId"_attr = key.recordId, + "nextRecordId"_attr = key.rowId, "index"_attr = _columnsAccess->_descriptor->indexName()); } // It is not safe to safe to directly store the 'key' object, because it includes a // PathView, which may be invalid the next time we read it. - previousPathAndRecordId.emplace(key.path, key.recordId); + previousPathAndRowId.emplace(key.path, key.rowId); } try { writeConflictRetry(opCtx, "addingKey", ns.ns(), [&] { WriteUnitOfWork wunit(opCtx); auto& [columnStoreKey, columnStoreValue] = columnStoreKeyWithValue; - builder->addCell( - columnStoreKey.path, columnStoreKey.recordId, columnStoreValue.cell); + builder->addCell(columnStoreKey.path, columnStoreKey.rowId, columnStoreValue.cell); wunit.commit(); }); } catch (DBException& e) { @@ -288,7 +288,8 @@ Status ColumnStoreAccessMethod::insert(OperationContext* opCtx, buf.reset(); column_keygen::writeEncodedCell(cell, &buf); - cursor->insert(path, rec.id, CellView{buf.buf(), size_t(buf.len())}); + invariant(!rec.id.isStr()); + cursor->insert(path, rec.id.getLong(), CellView{buf.buf(), size_t(buf.len())}); inc(keysInsertedOut); }); @@ -309,7 +310,8 @@ void ColumnStoreAccessMethod::remove(OperationContext* opCtx, CheckRecordId checkRecordId) { auto cursor = _store->newWriteCursor(opCtx); column_keygen::visitPathsForDelete(obj, [&](PathView path) { - cursor->remove(path, rid); + tassert(6762301, "RecordID cannot be a string for column store indexes", !rid.isStr()); + cursor->remove(path, rid.getLong()); inc(keysDeletedOut); }); } @@ -332,7 +334,9 @@ Status ColumnStoreAccessMethod::update(OperationContext* opCtx, StringData path, const column_keygen::UnencodedCellView* cell) { if (diffAction == column_keygen::DiffAction::kDelete) { - cursor->remove(path, rid); + tassert( + 6762302, "RecordID cannot be a string for column store indexes", !rid.isStr()); + cursor->remove(path, rid.getLong()); inc(keysDeletedOut); return; } @@ -346,7 +350,8 @@ Status ColumnStoreAccessMethod::update(OperationContext* opCtx, const auto method = diffAction == column_keygen::DiffAction::kInsert ? &ColumnStore::WriteCursor::insert : &ColumnStore::WriteCursor::update; - (cursor.get()->*method)(path, rid, CellView{buf.buf(), size_t(buf.len())}); + tassert(6762303, "RecordID cannot be a string for column store indexes", !rid.isStr()); + (cursor.get()->*method)(path, rid.getLong(), CellView{buf.buf(), size_t(buf.len())}); inc(keysInsertedOut); }); diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h index 58e3183b24d..19472a633ec 100644 --- a/src/mongo/db/storage/column_store.h +++ b/src/mongo/db/storage/column_store.h @@ -43,15 +43,16 @@ using PathView = StringData; using PathValue = std::string; using CellView = StringData; using CellValue = std::string; +using RowId = int64_t; struct FullCellView { PathView path; - RecordId rid; + RowId rid; CellView value; }; struct CellViewForPath { - RecordId rid; + RowId rid; CellView value; }; @@ -63,9 +64,9 @@ public: class WriteCursor { public: virtual ~WriteCursor() = default; - virtual void insert(PathView, const RecordId&, CellView) = 0; - virtual void remove(PathView, const RecordId&) = 0; - virtual void update(PathView, const RecordId&, CellView) = 0; + virtual void insert(PathView, RowId, CellView) = 0; + virtual void remove(PathView, RowId) = 0; + virtual void update(PathView, RowId, CellView) = 0; }; class CursorForPath { @@ -77,10 +78,10 @@ public: return {}; return handleResult(_cursor->next()); } - boost::optional<FullCellView> seekAtOrPast(const RecordId& rid) { + boost::optional<FullCellView> seekAtOrPast(RowId rid) { return handleResult(_cursor->seekAtOrPast(_path, rid)); } - boost::optional<FullCellView> seekExact(const RecordId& rid) { + boost::optional<FullCellView> seekExact(RowId rid) { return handleResult(_cursor->seekExact(_path, rid)); } @@ -127,7 +128,7 @@ public: class BulkBuilder { public: virtual ~BulkBuilder() = default; - virtual void addCell(PathView, const RecordId&, CellView) = 0; + virtual void addCell(PathView, RowId, CellView) = 0; }; /** @@ -138,6 +139,9 @@ public: */ static constexpr StringData kRowIdPath = "\xFF"_sd; + // RowId equivalent of a null RecordId + static const RowId kNullRowId = 0; + // This is really just a namespace struct Bytes { static constexpr uint8_t kFirstNonBson = 0x20; @@ -263,9 +267,9 @@ public: // CRUD // virtual std::unique_ptr<WriteCursor> newWriteCursor(OperationContext*) = 0; - virtual void insert(OperationContext*, PathView, const RecordId&, CellView) = 0; - virtual void remove(OperationContext*, PathView, const RecordId&) = 0; - virtual void update(OperationContext*, PathView, const RecordId&, CellView) = 0; + virtual void insert(OperationContext*, PathView, RowId, CellView) = 0; + virtual void remove(OperationContext*, PathView, RowId) = 0; + virtual void update(OperationContext*, PathView, RowId, CellView) = 0; virtual std::unique_ptr<Cursor> newCursor(OperationContext*) const = 0; std::unique_ptr<CursorForPath> newCursor(OperationContext* opCtx, PathView path) const { return std::make_unique<CursorForPath>(path, newCursor(opCtx)); @@ -273,14 +277,14 @@ public: bool haveAnyWithPath(OperationContext* opCtx, PathView path) const { // TODO could avoid extra allocation. May also be more efficient to do a different way. - return bool(newCursor(opCtx, path)->seekAtOrPast(RecordId())); + return bool(newCursor(opCtx, path)->seekAtOrPast(kNullRowId)); } std::vector<PathValue> uniquePaths(OperationContext* opCtx) const { std::vector<PathValue> out; PathValue nextPath = ""; auto cursor = newCursor(opCtx); - while (auto next = cursor->seekAtOrPast(nextPath, RecordId())) { + while (auto next = cursor->seekAtOrPast(nextPath, kNullRowId)) { out.push_back(next->path.toString()); nextPath.assign(next->path.rawData(), next->path.size()); nextPath += '\x01'; // next possible path (\0 is not allowed) @@ -354,8 +358,8 @@ protected: public: virtual ~Cursor() = default; virtual boost::optional<FullCellView> next() = 0; - virtual boost::optional<FullCellView> seekAtOrPast(PathView, const RecordId&) = 0; - virtual boost::optional<FullCellView> seekExact(PathView, const RecordId&) = 0; + virtual boost::optional<FullCellView> seekAtOrPast(PathView, RowId) = 0; + virtual boost::optional<FullCellView> seekExact(PathView, RowId) = 0; virtual void save() = 0; virtual void saveUnpositioned() { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp index e9000044126..d21279e0baf 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp @@ -115,26 +115,18 @@ WiredTigerColumnStore::WiredTigerColumnStore(OperationContext* ctx, _desc(desc), _indexName(desc->indexName()) {} -std::string& WiredTigerColumnStore::makeKey(std::string& buffer, - PathView path, - const RecordId& rid) { - const auto ridSize = - rid.withFormat([](RecordId::Null) -> unsigned long { return 0; }, - [](int64_t) -> unsigned long { return sizeof(int64_t); }, - [](const char* data, size_t len) -> unsigned long { MONGO_UNREACHABLE; }); +std::string& WiredTigerColumnStore::makeKey(std::string& buffer, PathView path, RowId rid) { buffer.clear(); - buffer.reserve(path.size() + 1 /*NUL byte*/ + ridSize); + buffer.reserve(path.size() + 1 /*NUL byte*/ + sizeof(RowId)); buffer += path; if (path != kRowIdPath) { // If we end up reserving more values, the above check should be changed. buffer += '\0'; } - rid.withFormat([](RecordId::Null) { /* Do nothing. */ }, - [&](int64_t num) { - num = endian::nativeToBig(num); - buffer.append(reinterpret_cast<const char*>(&num), sizeof(num)); - }, - [&](const char* data, size_t len) { MONGO_UNREACHABLE; }); + if (rid > 0) { + RowId num = endian::nativeToBig(rid); + buffer.append(reinterpret_cast<const char*>(&num), sizeof(num)); + } return buffer; } @@ -145,9 +137,9 @@ public: _curwrap.assertInActiveTxn(); } - void insert(PathView, const RecordId&, CellView) override; - void remove(PathView, const RecordId&) override; - void update(PathView, const RecordId&, CellView) override; + void insert(PathView, RowId, CellView) override; + void remove(PathView, RowId) override; + void update(PathView, RowId, CellView) override; WT_CURSOR* c() { return _curwrap.get(); @@ -165,11 +157,11 @@ std::unique_ptr<ColumnStore::WriteCursor> WiredTigerColumnStore::newWriteCursor( void WiredTigerColumnStore::insert(OperationContext* opCtx, PathView path, - const RecordId& rid, + RowId rid, CellView cell) { WriteCursor(opCtx, _uri, _tableId).insert(path, rid, cell); } -void WiredTigerColumnStore::WriteCursor::insert(PathView path, const RecordId& rid, CellView cell) { +void WiredTigerColumnStore::WriteCursor::insert(PathView path, RowId rid, CellView cell) { dassert(_opCtx->lockState()->isWriteLocked()); auto key = makeKey(path, rid); @@ -189,10 +181,10 @@ void WiredTigerColumnStore::WriteCursor::insert(PathView path, const RecordId& r } } -void WiredTigerColumnStore::remove(OperationContext* opCtx, PathView path, const RecordId& rid) { +void WiredTigerColumnStore::remove(OperationContext* opCtx, PathView path, RowId rid) { WriteCursor(opCtx, _uri, _tableId).remove(path, rid); } -void WiredTigerColumnStore::WriteCursor::remove(PathView path, const RecordId& rid) { +void WiredTigerColumnStore::WriteCursor::remove(PathView path, RowId rid) { dassert(_opCtx->lockState()->isWriteLocked()); auto key = makeKey(path, rid); @@ -209,11 +201,11 @@ void WiredTigerColumnStore::WriteCursor::remove(PathView path, const RecordId& r } void WiredTigerColumnStore::update(OperationContext* opCtx, PathView path, - const RecordId& rid, + RowId rid, CellView cell) { WriteCursor(opCtx, _uri, _tableId).update(path, rid, cell); } -void WiredTigerColumnStore::WriteCursor::update(PathView path, const RecordId& rid, CellView cell) { +void WiredTigerColumnStore::WriteCursor::update(PathView path, RowId rid, CellView cell) { dassert(_opCtx->lockState()->isWriteLocked()); auto key = makeKey(path, rid); @@ -257,12 +249,12 @@ public: return curr(); } - boost::optional<FullCellView> seekAtOrPast(PathView path, const RecordId& rid) override { + boost::optional<FullCellView> seekAtOrPast(PathView path, RowId rid) override { makeKey(_buffer, path, rid); seekWTCursor(); return curr(); } - boost::optional<FullCellView> seekExact(PathView path, const RecordId& rid) override { + boost::optional<FullCellView> seekExact(PathView path, RowId rid) override { makeKey(_buffer, path, rid); seekWTCursor(/*exactOnly*/ true); return curr(); @@ -371,7 +363,7 @@ private: const auto ridStart = static_cast<const char*>(key.data) + out->path.size() + nullByteSize; invariant(ridSize == 8); - out->rid = RecordId(ConstDataView(ridStart).read<BigEndian<int64_t>>()); + out->rid = ConstDataView(ridStart).read<BigEndian<int64_t>>(); return out; } @@ -395,7 +387,7 @@ public: BulkBuilder(WiredTigerColumnStore* idx, OperationContext* opCtx) : _opCtx(opCtx), _cursor(idx->uri(), opCtx) {} - void addCell(PathView path, const RecordId& rid, CellView cell) override { + void addCell(PathView path, RowId rid, CellView cell) override { const std::string& key = makeKey(_buffer, path, rid); WiredTigerItem keyItem(key.c_str(), key.size()); _cursor->set_key(_cursor.get(), keyItem.Get()); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.h index 887dfea9fbe..9d1bd5e507b 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.h @@ -59,9 +59,9 @@ public: // CRUD // std::unique_ptr<ColumnStore::WriteCursor> newWriteCursor(OperationContext*) override; - void insert(OperationContext*, PathView, const RecordId&, CellView) override; - void remove(OperationContext*, PathView, const RecordId&) override; - void update(OperationContext*, PathView, const RecordId&, CellView) override; + void insert(OperationContext*, PathView, RowId, CellView) override; + void remove(OperationContext*, PathView, RowId) override; + void update(OperationContext*, PathView, RowId, CellView) override; std::unique_ptr<ColumnStore::Cursor> newCursor(OperationContext*) const override; std::unique_ptr<ColumnStore::BulkBuilder> makeBulkBuilder(OperationContext* opCtx) override; @@ -83,7 +83,7 @@ public: bool isEmpty(OperationContext* opCtx) override; - static std::string makeKey_ForTest(PathView path, const RecordId& id) { + static std::string makeKey_ForTest(PathView path, RowId id) { return makeKey(path, id); } @@ -92,8 +92,8 @@ private: return _uri; } - static std::string& makeKey(std::string& buffer, PathView, const RecordId&); - static std::string makeKey(PathView path, const RecordId& rid) { + static std::string& makeKey(std::string& buffer, PathView, RowId); + static std::string makeKey(PathView path, RowId rid) { std::string out; makeKey(out, path, rid); return out; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store_test.cpp index 9cb0901a791..10465796a58 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store_test.cpp @@ -51,7 +51,7 @@ namespace { using std::string; TEST(WiredTigerColumnStoreTest, MakeKey) { - std::string out = WiredTigerColumnStore::makeKey_ForTest("a.b", RecordId(66)); + std::string out = WiredTigerColumnStore::makeKey_ForTest("a.b", 66 /* RowId */); // a . b \0 // < Big Endian encoding of the number 27 in uint 64> @@ -60,7 +60,7 @@ TEST(WiredTigerColumnStoreTest, MakeKey) { } TEST(WiredTigerColumnStoreTest, MakeKeyRIDColumn) { - std::string out = WiredTigerColumnStore::makeKey_ForTest("\xFF", RecordId(256)); + std::string out = WiredTigerColumnStore::makeKey_ForTest("\xFF", 256 /* RowId */); // For the special path 0xff, we do not encode a NUL terminator. |