diff options
author | Louis Williams <louis.williams@mongodb.com> | 2021-02-08 12:57:37 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-02-10 17:03:27 +0000 |
commit | e09ce369e4912a945454a5494248046535c70460 (patch) | |
tree | dd445b60779ce625085343de71eaa114afc966bc /src/mongo/db/storage | |
parent | 861023b28a5dc270d2c459194dab8ea72b3bb817 (diff) | |
download | mongo-e09ce369e4912a945454a5494248046535c70460.tar.gz |
SERVER-53989 Generalize RecordId to store small binary strings
Diffstat (limited to 'src/mongo/db/storage')
26 files changed, 134 insertions, 92 deletions
diff --git a/src/mongo/db/storage/devnull/ephemeral_catalog_record_store.cpp b/src/mongo/db/storage/devnull/ephemeral_catalog_record_store.cpp index 30b9914bcaf..fbe23c1fdc9 100644 --- a/src/mongo/db/storage/devnull/ephemeral_catalog_record_store.cpp +++ b/src/mongo/db/storage/devnull/ephemeral_catalog_record_store.cpp @@ -563,7 +563,7 @@ int64_t EphemeralForTestRecordStore::storageSize(OperationContext* opCtx, RecordId EphemeralForTestRecordStore::allocateLoc(WithLock) { RecordId out = RecordId(_data->nextId++); - invariant(out.isNormal()); + invariant(out.isValid()); return out; } diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h index cea85d7ddf4..7f8120c2751 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h @@ -144,7 +144,7 @@ public: virtual Timestamp getAllDurableTimestamp() const override { RecordId id = _visibilityManager->getAllCommittedRecord(); - return Timestamp(id.as<int64_t>()); + return Timestamp(id.asLong()); } boost::optional<Timestamp> getOplogNeededForCrashRecovery() const final { diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp index e757e70ffb5..5783ccaf470 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp @@ -126,7 +126,7 @@ int64_t RecordStore::storageSize(OperationContext* opCtx, bool RecordStore::findRecord(OperationContext* opCtx, const RecordId& loc, RecordData* rd) const { StringStore* workingCopy(RecoveryUnit::get(opCtx)->getHead()); - auto it = workingCopy->find(createKey(_ident, loc.as<int64_t>())); + auto it = workingCopy->find(createKey(_ident, loc.asLong())); if (it == workingCopy->end()) { return false; } @@ -139,7 +139,7 @@ void RecordStore::deleteRecord(OperationContext* opCtx, const RecordId& dl) { auto ru = RecoveryUnit::get(opCtx); StringStore* workingCopy(ru->getHead()); SizeAdjuster adjuster(opCtx, this); - invariant(workingCopy->erase(createKey(_ident, dl.as<int64_t>()))); + invariant(workingCopy->erase(createKey(_ident, dl.asLong()))); ru->makeDirty(); } @@ -165,7 +165,7 @@ Status RecordStore::insertRecords(OperationContext* opCtx, oploghack::extractKey(record.data.data(), record.data.size()); if (!status.isOK()) return status.getStatus(); - thisRecordId = status.getValue().as<int64_t>(); + thisRecordId = status.getValue().asLong(); _visibilityManager->addUncommittedRecord(opCtx, this, RecordId(thisRecordId)); } else { thisRecordId = _nextRecordId(opCtx); @@ -188,7 +188,7 @@ Status RecordStore::updateRecord(OperationContext* opCtx, StringStore* workingCopy(RecoveryUnit::get(opCtx)->getHead()); SizeAdjuster adjuster(opCtx, this); { - std::string key = createKey(_ident, oldLocation.as<int64_t>()); + std::string key = createKey(_ident, oldLocation.asLong()); StringStore::const_iterator it = workingCopy->find(key); invariant(it != workingCopy->end()); workingCopy->update(StringStore::value_type{key, std::string(data, len)}); @@ -254,7 +254,7 @@ void RecordStore::cappedTruncateAfter(OperationContext* opCtx, RecordId end, boo auto ru = RecoveryUnit::get(opCtx); StringStore* workingCopy(ru->getHead()); WriteUnitOfWork wuow(opCtx); - const auto recordKey = createKey(_ident, end.as<int64_t>()); + const auto recordKey = createKey(_ident, end.asLong()); auto recordIt = inclusive ? workingCopy->lower_bound(recordKey) : workingCopy->upper_bound(recordKey); auto endIt = workingCopy->upper_bound(_postfix); @@ -316,7 +316,7 @@ boost::optional<RecordId> RecordStore::oplogStartHack(OperationContext* opCtx, StringStore* workingCopy{RecoveryUnit::get(opCtx)->getHead()}; - std::string key = createKey(_ident, startingPosition.as<int64_t>()); + std::string key = createKey(_ident, startingPosition.asLong()); StringStore::const_reverse_iterator it(workingCopy->upper_bound(key)); if (it == workingCopy->rend()) @@ -353,13 +353,13 @@ void RecordStore::_initHighestIdIfNeeded(OperationContext* opCtx) { return; } - // Need to start at 1 so we are always higher than RecordId::min<int64_t>() + // Need to start at 1 so we are always higher than RecordId::minLong() int64_t nextId = 1; // Find the largest RecordId currently in use. std::unique_ptr<SeekableRecordCursor> cursor = getCursor(opCtx, /*forward=*/false); if (auto record = cursor->next()) { - nextId = record->id.as<int64_t>() + 1; + nextId = record->id.asLong() + 1; } _highestRecordId.store(nextId); @@ -457,7 +457,7 @@ boost::optional<Record> RecordStore::Cursor::seekExact(const RecordId& id) { _savedPosition = boost::none; _lastMoveWasRestore = false; StringStore* workingCopy(RecoveryUnit::get(opCtx)->getHead()); - std::string key = createKey(_rs._ident, id.as<int64_t>()); + std::string key = createKey(_rs._ident, id.asLong()); it = workingCopy->find(key); if (it == workingCopy->end() || !inPrefix(it->first)) @@ -541,7 +541,7 @@ boost::optional<Record> RecordStore::ReverseCursor::seekExact(const RecordId& id _needFirstSeek = false; _savedPosition = boost::none; StringStore* workingCopy(RecoveryUnit::get(opCtx)->getHead()); - std::string key = createKey(_rs._ident, id.as<int64_t>()); + std::string key = createKey(_rs._ident, id.asLong()); StringStore::const_iterator canFind = workingCopy->find(key); if (canFind == workingCopy->end() || !inPrefix(canFind->first)) { it = workingCopy->rend(); diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store_test.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store_test.cpp index af44537dfd1..2bbc8c5860f 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store_test.cpp +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store_test.cpp @@ -86,6 +86,10 @@ public: std::unique_ptr<mongo::RecoveryUnit> newRecoveryUnit() final { return std::make_unique<RecoveryUnit>(&_kvEngine); } + + KVEngine* getEngine() override final { + return &_kvEngine; + } }; std::unique_ptr<mongo::RecordStoreHarnessHelper> makeRecordStoreHarnessHelper() { diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_sorted_impl.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_sorted_impl.cpp index 95a5d38eae8..303740dbeab 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_sorted_impl.cpp +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_sorted_impl.cpp @@ -140,7 +140,7 @@ IndexDataEntry::IndexDataEntry(const std::string& indexDataEntry) : _buffer(reinterpret_cast<const uint8_t*>(indexDataEntry.data())) {} std::string IndexDataEntry::create(RecordId loc, const KeyString::TypeBits& typeBits) { - uint64_t repr = loc.as<int64_t>(); + uint64_t repr = loc.asLong(); uint64_t typebitsSize = typeBits.getSize(); std::string output(sizeof(repr) + sizeof(typebitsSize) + typebitsSize, '\0'); @@ -606,10 +606,10 @@ void CursorBase<CursorImpl>::setEndPosition(const BSONObj& key, bool inclusive) // ident. Otherwise, we use the first as our bound. if (_forward == inclusive) it = workingCopy->upper_bound( - createRadixKeyFromObj(key, RecordId::max<int64_t>(), _prefix, _order)); + createRadixKeyFromObj(key, RecordId::maxLong(), _prefix, _order)); else it = workingCopy->lower_bound( - createRadixKeyFromObj(key, RecordId::min<int64_t>(), _prefix, _order)); + createRadixKeyFromObj(key, RecordId::minLong(), _prefix, _order)); if (_forward) _endPos = it; else @@ -661,10 +661,10 @@ boost::optional<KeyStringEntry> CursorBase<CursorImpl>::seekAfterProcessing( // is also reversed. if (_forward == inclusive) it = _workingCopy->lower_bound( - createRadixKeyFromKSWithoutRecordId(keyStringVal, RecordId::min<int64_t>(), _prefix)); + createRadixKeyFromKSWithoutRecordId(keyStringVal, RecordId::minLong(), _prefix)); else it = _workingCopy->upper_bound( - createRadixKeyFromKSWithoutRecordId(keyStringVal, RecordId::max<int64_t>(), _prefix)); + createRadixKeyFromKSWithoutRecordId(keyStringVal, RecordId::maxLong(), _prefix)); if (_forward) _forwardIt = it; else @@ -897,11 +897,11 @@ bool CursorUnique::checkCursorValid() { // For unique indexes, we need to check if the cursor moved up a position when it // was restored. This isn't required for non-unique indexes because we store the // RecordId in the KeyString and use a "<" comparison instead of "<=" since we know - // that no RecordId will ever reach RecordId::max<int64_t>() so we don't need to + // that no RecordId will ever reach RecordId::maxLong() so we don't need to // check the equal side of things. This assumption doesn't hold for unique index // KeyStrings. std::string endPosKeyString = - createRadixKeyFromObj(*_endPosKey, RecordId::min<int64_t>(), _prefix, _order); + createRadixKeyFromObj(*_endPosKey, RecordId::minLong(), _prefix, _order); if (_forwardIt->first.compare(endPosKeyString) <= 0) return true; @@ -923,7 +923,7 @@ bool CursorUnique::checkCursorValid() { return true; std::string endPosKeyString = - createRadixKeyFromObj(*_endPosKey, RecordId::min<int64_t>(), _prefix, _order); + createRadixKeyFromObj(*_endPosKey, RecordId::minLong(), _prefix, _order); if (_reverseIt->first.compare(endPosKeyString) >= 0) return true; @@ -1464,19 +1464,19 @@ SortedDataInterfaceStandard::SortedDataInterfaceStandard(OperationContext* opCtx // This is the string representation of the KeyString before elements in this ident, which is // ident + \0. This is before all elements in this ident. _KSForIdentStart = createRadixKeyWithLocFromObj( - BSONObj(), RecordId::min<int64_t>(), ident.toString().append(1, '\0'), _ordering); + BSONObj(), RecordId::minLong(), ident.toString().append(1, '\0'), _ordering); // Similarly, this is the string representation of the KeyString for something greater than // all other elements in this ident. _KSForIdentEnd = - createRadixKeyWithLocFromObj(BSONObj(), RecordId::min<int64_t>(), _identEnd, _ordering); + createRadixKeyWithLocFromObj(BSONObj(), RecordId::minLong(), _identEnd, _ordering); } SortedDataInterfaceStandard::SortedDataInterfaceStandard(const Ordering& ordering, StringData ident) : SortedDataInterfaceBase(ordering, ident) { _KSForIdentStart = createRadixKeyWithLocFromObj( - BSONObj(), RecordId::min<int64_t>(), ident.toString().append(1, '\0'), _ordering); + BSONObj(), RecordId::minLong(), ident.toString().append(1, '\0'), _ordering); _KSForIdentEnd = - createRadixKeyWithLocFromObj(BSONObj(), RecordId::min<int64_t>(), _identEnd, _ordering); + createRadixKeyWithLocFromObj(BSONObj(), RecordId::minLong(), _identEnd, _ordering); } Status SortedDataInterfaceStandard::insert(OperationContext* opCtx, diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_visibility_manager.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_visibility_manager.cpp index fb9eadb2841..00677757b1d 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_visibility_manager.cpp +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_visibility_manager.cpp @@ -101,7 +101,7 @@ void VisibilityManager::addUncommittedRecord(OperationContext* opCtx, RecordId VisibilityManager::getAllCommittedRecord() { stdx::lock_guard<Latch> lock(_stateLock); return _uncommittedRecords.empty() ? _highestSeen - : RecordId(_uncommittedRecords.begin()->as<int64_t>() - 1); + : RecordId(_uncommittedRecords.begin()->asLong() - 1); } bool VisibilityManager::isFirstHidden(RecordId rid) { diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp index 173febc1313..58c080dde60 100644 --- a/src/mongo/db/storage/key_string.cpp +++ b/src/mongo/db/storage/key_string.cpp @@ -463,11 +463,11 @@ void BuilderBase<BufferT>::appendRecordId(RecordId loc) { // big-endian order. This does not encode negative RecordIds to give maximum space to // positive RecordIds which are the only ones that are allowed to be stored in an index. - int64_t raw = loc.as<int64_t>(); + int64_t raw = loc.asLong(); if (raw < 0) { - // Note: we encode RecordId::min<int64_t>() and RecordId() the same which is ok, as they + // Note: we encode RecordId::minLong() and RecordId() the same which is ok, as they // are never stored so they will never be compared to each other. - invariant(raw == RecordId::min<int64_t>().as<int64_t>()); + invariant(raw == RecordId::minLong().asLong()); raw = 0; } const uint64_t value = static_cast<uint64_t>(raw); diff --git a/src/mongo/db/storage/key_string_test.cpp b/src/mongo/db/storage/key_string_test.cpp index b40f12f74b4..91d740130e7 100644 --- a/src/mongo/db/storage/key_string_test.cpp +++ b/src/mongo/db/storage/key_string_test.cpp @@ -444,7 +444,7 @@ TEST_F(KeyStringBuilderTest, Array1) { ROUNDTRIP(version, BSON("" << BSON_ARRAY(1 << 2 << 3))); { - KeyString::Builder a(version, emptyArray, ALL_ASCENDING, RecordId::min<int64_t>()); + KeyString::Builder a(version, emptyArray, ALL_ASCENDING, RecordId::minLong()); KeyString::Builder b(version, emptyArray, ALL_ASCENDING, RecordId(5)); ASSERT_LESS_THAN(a, b); } @@ -886,7 +886,7 @@ TEST_F(KeyStringBuilderTest, LotsOfNumbers3) { TEST_F(KeyStringBuilderTest, RecordIdOrder1) { Ordering ordering = Ordering::make(BSON("a" << 1)); - KeyString::Builder a(version, BSON("" << 5), ordering, RecordId::min<int64_t>()); + KeyString::Builder a(version, BSON("" << 5), ordering, RecordId::minLong()); KeyString::Builder b(version, BSON("" << 5), ordering, RecordId(2)); KeyString::Builder c(version, BSON("" << 5), ordering, RecordId(3)); KeyString::Builder d(version, BSON("" << 6), ordering, RecordId()); @@ -901,7 +901,7 @@ TEST_F(KeyStringBuilderTest, RecordIdOrder1) { TEST_F(KeyStringBuilderTest, RecordIdOrder2) { Ordering ordering = Ordering::make(BSON("a" << -1 << "b" << -1)); - KeyString::Builder a(version, BSON("" << 5 << "" << 6), ordering, RecordId::min<int64_t>()); + KeyString::Builder a(version, BSON("" << 5 << "" << 6), ordering, RecordId::minLong()); KeyString::Builder b(version, BSON("" << 5 << "" << 6), ordering, RecordId(5)); KeyString::Builder c(version, BSON("" << 5 << "" << 5), ordering, RecordId(4)); KeyString::Builder d(version, BSON("" << 3 << "" << 4), ordering, RecordId(3)); @@ -917,7 +917,7 @@ TEST_F(KeyStringBuilderTest, RecordIdOrder2) { TEST_F(KeyStringBuilderTest, RecordIdOrder2Double) { Ordering ordering = Ordering::make(BSON("a" << -1 << "b" << -1)); - KeyString::Builder a(version, BSON("" << 5.0 << "" << 6.0), ordering, RecordId::min<int64_t>()); + KeyString::Builder a(version, BSON("" << 5.0 << "" << 6.0), ordering, RecordId::minLong()); KeyString::Builder b(version, BSON("" << 5.0 << "" << 6.0), ordering, RecordId(5)); KeyString::Builder c(version, BSON("" << 3.0 << "" << 4.0), ordering, RecordId(3)); @@ -1555,11 +1555,11 @@ TEST_F(KeyStringBuilderTest, RecordIds) { if (rid.isValid()) { ASSERT_GT(ks, KeyString::Builder(version, RecordId())); - ASSERT_GT(ks, KeyString::Builder(version, RecordId::min<int64_t>())); - ASSERT_LT(ks, KeyString::Builder(version, RecordId::max<int64_t>())); + ASSERT_GT(ks, KeyString::Builder(version, RecordId::minLong())); + ASSERT_LT(ks, KeyString::Builder(version, RecordId::maxLong())); - ASSERT_GT(ks, KeyString::Builder(version, RecordId(rid.as<int64_t>() - 1))); - ASSERT_LT(ks, KeyString::Builder(version, RecordId(rid.as<int64_t>() + 1))); + ASSERT_GT(ks, KeyString::Builder(version, RecordId(rid.asLong() - 1))); + ASSERT_LT(ks, KeyString::Builder(version, RecordId(rid.asLong() + 1))); } } @@ -1579,7 +1579,7 @@ TEST_F(KeyStringBuilderTest, RecordIds) { { // Test concatenating RecordIds like in a unique index. KeyString::Builder ks(version); - ks.appendRecordId(RecordId::max<int64_t>()); // uses all bytes + ks.appendRecordId(RecordId::maxLong()); // uses all bytes ks.appendRecordId(rid); ks.appendRecordId(RecordId(0xDEADBEEF)); // uses some extra bytes ks.appendRecordId(rid); @@ -1591,7 +1591,7 @@ TEST_F(KeyStringBuilderTest, RecordIds) { // forward scan BufReader reader(ks.getBuffer(), ks.getSize()); - ASSERT_EQ(KeyString::decodeRecordId(&reader), RecordId::max<int64_t>()); + ASSERT_EQ(KeyString::decodeRecordId(&reader), RecordId::maxLong()); ASSERT_EQ(KeyString::decodeRecordId(&reader), rid); ASSERT_EQ(KeyString::decodeRecordId(&reader), RecordId(0xDEADBEEF)); ASSERT_EQ(KeyString::decodeRecordId(&reader), rid); diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h index acf5562c1b4..7fbfd14e642 100644 --- a/src/mongo/db/storage/kv/kv_engine.h +++ b/src/mongo/db/storage/kv/kv_engine.h @@ -235,6 +235,14 @@ public: } /** + * Returns true if the storage engine supports collections clustered on _id. That is, + * collections will use _id values as their RecordId and do not need a separate _id index. + */ + virtual bool supportsClusteredIdIndex() const { + return false; + } + + /** * Returns true if storage engine supports --directoryperdb. * See: * http://docs.mongodb.org/manual/reference/program/mongod/#cmdoption--directoryperdb diff --git a/src/mongo/db/storage/kv/kv_engine_test_harness.cpp b/src/mongo/db/storage/kv/kv_engine_test_harness.cpp index 4c28829fb8e..e7129872570 100644 --- a/src/mongo/db/storage/kv/kv_engine_test_harness.cpp +++ b/src/mongo/db/storage/kv/kv_engine_test_harness.cpp @@ -567,7 +567,7 @@ TEST(KVEngineTestHarness, BasicTimestampSingle) { opCtx1.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, kReadTimestamp); - ASSERT(!rs->findRecord(&opCtx1, RecordId::min<int64_t>(), nullptr)); + ASSERT(!rs->findRecord(&opCtx1, RecordId::minLong(), nullptr)); // Insert a record at a later time. RecordId rid; diff --git a/src/mongo/db/storage/oplog_hack.cpp b/src/mongo/db/storage/oplog_hack.cpp index 69dd1c4d900..dc3ac3e2f5f 100644 --- a/src/mongo/db/storage/oplog_hack.cpp +++ b/src/mongo/db/storage/oplog_hack.cpp @@ -53,9 +53,9 @@ StatusWith<RecordId> keyForOptime(const Timestamp& opTime) { return StatusWith<RecordId>(ErrorCodes::BadValue, "ts inc too high"); const RecordId out = RecordId(opTime.getSecs(), opTime.getInc()); - if (out <= RecordId::min<int64_t>()) + if (out <= RecordId::minLong()) return StatusWith<RecordId>(ErrorCodes::BadValue, "ts too low"); - if (out >= RecordId::max<int64_t>()) + if (out >= RecordId::maxLong()) return StatusWith<RecordId>(ErrorCodes::BadValue, "ts too high"); return StatusWith<RecordId>(out); diff --git a/src/mongo/db/storage/record_id_bm.cpp b/src/mongo/db/storage/record_id_bm.cpp index a246db21e8e..1f28a970f3e 100644 --- a/src/mongo/db/storage/record_id_bm.cpp +++ b/src/mongo/db/storage/record_id_bm.cpp @@ -37,13 +37,13 @@ namespace mongo { namespace { RecordId incInt(RecordId r) { - return RecordId(r.as<int64_t>() + 1); + return RecordId(r.asLong() + 1); } RecordId incOID(RecordId r) { - OID o = r.as<OID>(); + OID o = OID::from(r.strData()); o.setTimestamp(o.getTimestamp() + 1); - return RecordId(o); + return RecordId(o.view().view(), OID::kOIDSize); } void BM_RecordIdCopyLong(benchmark::State& state) { @@ -55,7 +55,7 @@ void BM_RecordIdCopyLong(benchmark::State& state) { } void BM_RecordIdCopyOID(benchmark::State& state) { - RecordId rid(OID::gen()); + RecordId rid(OID::gen().view().view(), OID::kOIDSize); for (auto _ : state) { benchmark::ClobberMemory(); benchmark::DoNotOptimize(rid = incOID(rid)); diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h index 77fcfdc374c..972ddb9adfa 100644 --- a/src/mongo/db/storage/record_store.h +++ b/src/mongo/db/storage/record_store.h @@ -61,7 +61,7 @@ struct Record { }; /** - * The format of a RecordStore's RecordId keys. + * The data format of a RecordStore's RecordId keys. */ enum class KeyFormat { /** Signed 64-bit integer */ diff --git a/src/mongo/db/storage/record_store_test_harness.cpp b/src/mongo/db/storage/record_store_test_harness.cpp index 7d4fed29240..29db8562890 100644 --- a/src/mongo/db/storage/record_store_test_harness.cpp +++ b/src/mongo/db/storage/record_store_test_harness.cpp @@ -406,15 +406,17 @@ TEST(RecordStoreTestHarness, Cursor1) { } TEST(RecordStoreTestHarness, ClusteredRecordStore) { - const std::string ns = "test.system.buckets.a"; const auto harnessHelper = newRecordStoreHarnessHelper(); + if (!harnessHelper->getEngine()->supportsClusteredIdIndex()) { + // Only WiredTiger supports clustered indexes on _id. + return; + } + + const std::string ns = "test.system.buckets.a"; CollectionOptions options; options.clusteredIndex = ClusteredIndexOptions{}; std::unique_ptr<RecordStore> rs = harnessHelper->newNonCappedRecordStore(ns, options); - if (rs->keyFormat() == KeyFormat::Long) { - // ephemeralForTest does not support clustered indexes. - return; - } + invariant(rs->keyFormat() == KeyFormat::String); auto opCtx = harnessHelper->newOperationContext(); @@ -427,7 +429,7 @@ TEST(RecordStoreTestHarness, ClusteredRecordStore) { RecordData recordData = RecordData(doc.objdata(), doc.objsize()); recordData.makeOwned(); - records.push_back({RecordId(OID::gen()), recordData}); + records.push_back({RecordId(OID::gen().view().view(), OID::kOIDSize), recordData}); } { @@ -474,8 +476,10 @@ TEST(RecordStoreTestHarness, ClusteredRecordStore) { ASSERT_EQ(0, strcmp(records.at(i).data.data(), rd.data())); } - ASSERT_FALSE(rs->findRecord(opCtx.get(), RecordId::min<OID>(), nullptr)); - ASSERT_FALSE(rs->findRecord(opCtx.get(), RecordId::max<OID>(), nullptr)); + RecordId minOid(OID().view().view(), OID::kOIDSize); + RecordId maxOid(OID::max().view().view(), OID::kOIDSize); + ASSERT_FALSE(rs->findRecord(opCtx.get(), minOid, nullptr)); + ASSERT_FALSE(rs->findRecord(opCtx.get(), maxOid, nullptr)); } { diff --git a/src/mongo/db/storage/record_store_test_harness.h b/src/mongo/db/storage/record_store_test_harness.h index 638154c9b03..40e3d0895ed 100644 --- a/src/mongo/db/storage/record_store_test_harness.h +++ b/src/mongo/db/storage/record_store_test_harness.h @@ -35,6 +35,7 @@ #include "mongo/db/catalog/collection_options.h" #include "mongo/db/operation_context_noop.h" #include "mongo/db/service_context.h" +#include "mongo/db/storage/kv/kv_engine.h" #include "mongo/db/storage/test_harness_helper.h" namespace mongo { @@ -60,6 +61,8 @@ public: virtual std::unique_ptr<RecordStore> newCappedRecordStore(const std::string& ns, int64_t cappedSizeBytes, int64_t cappedMaxDocs) = 0; + + virtual KVEngine* getEngine() = 0; }; void registerRecordStoreHarnessHelperFactory( diff --git a/src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp b/src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp index 9a863745d6c..6ae7a83205d 100644 --- a/src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp +++ b/src/mongo/db/storage/sorted_data_interface_test_bulkbuilder.cpp @@ -112,7 +112,7 @@ TEST(SortedDataInterface, BuilderAddKeyWithReservedRecordId) { RecordId reservedLoc( RecordId::reservedIdFor<int64_t>(RecordId::Reservation::kWildcardMultikeyMetadataId)); - ASSERT(reservedLoc.isReserved()); + invariant(RecordId::isReserved<int64_t>(reservedLoc)); WriteUnitOfWork wuow(opCtx.get()); ASSERT_OK(builder->addKey(makeKeyString(sorted.get(), key1, reservedLoc))); diff --git a/src/mongo/db/storage/sorted_data_interface_test_insert.cpp b/src/mongo/db/storage/sorted_data_interface_test_insert.cpp index 32b674cae0e..eede384528a 100644 --- a/src/mongo/db/storage/sorted_data_interface_test_insert.cpp +++ b/src/mongo/db/storage/sorted_data_interface_test_insert.cpp @@ -709,7 +709,7 @@ TEST(SortedDataInterface, InsertReservedRecordId) { WriteUnitOfWork uow(opCtx.get()); RecordId reservedLoc( RecordId::reservedIdFor<int64_t>(RecordId::Reservation::kWildcardMultikeyMetadataId)); - ASSERT(reservedLoc.isReserved()); + invariant(RecordId::isReserved<int64_t>(reservedLoc)); ASSERT_OK(sorted->insert(opCtx.get(), makeKeyString(sorted.get(), key1, reservedLoc), /*dupsAllowed*/ true)); diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h index 1c43ea90235..7e22bb34661 100644 --- a/src/mongo/db/storage/storage_engine.h +++ b/src/mongo/db/storage/storage_engine.h @@ -421,6 +421,12 @@ public: virtual void setJournalListener(JournalListener* jl) = 0; /** + * Returns true if the storage engine supports collections clustered on _id. That is, + * collections will use _id values as their RecordId and do not need a separate _id index. + */ + virtual bool supportsClusteredIdIndex() const = 0; + + /** * Returns whether the storage engine supports "recover to stable timestamp". Returns true * if the storage engine supports "recover to stable timestamp" but does not currently have * a stable timestamp. In that case StorageEngine::recoverToStableTimestamp() will return diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index 9ba689a9d7f..0e8414cb147 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -1012,6 +1012,10 @@ boost::optional<Timestamp> StorageEngineImpl::getLastStableRecoveryTimestamp() c return _engine->getLastStableRecoveryTimestamp(); } +bool StorageEngineImpl::supportsClusteredIdIndex() const { + return _engine->supportsClusteredIdIndex(); +} + bool StorageEngineImpl::supportsReadConcernSnapshot() const { return _engine->supportsReadConcernSnapshot(); } diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h index cc6b0b4f84e..27cdda2796e 100644 --- a/src/mongo/db/storage/storage_engine_impl.h +++ b/src/mongo/db/storage/storage_engine_impl.h @@ -153,6 +153,8 @@ public: boost::optional<Timestamp> getOplogNeededForCrashRecovery() const final; + bool supportsClusteredIdIndex() const final; + bool supportsReadConcernSnapshot() const final; bool supportsReadConcernMajority() const final; diff --git a/src/mongo/db/storage/storage_engine_mock.h b/src/mongo/db/storage/storage_engine_mock.h index 11f350ee9fb..37e44d497cf 100644 --- a/src/mongo/db/storage/storage_engine_mock.h +++ b/src/mongo/db/storage/storage_engine_mock.h @@ -106,6 +106,9 @@ public: return nullptr; } void setJournalListener(JournalListener* jl) final {} + bool supportsClusteredIdIndex() const final { + return false; + } bool supportsRecoverToStableTimestamp() const final { return false; } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index b9f6e1fb635..5619449784d 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -255,6 +255,10 @@ public: Timestamp getAllDurableTimestamp() const override; + bool supportsClusteredIdIndex() const final override { + return true; + } + bool supportsReadConcernSnapshot() const final override; bool supportsOplogStones() const final override; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp index e55281db60f..4c4cb283643 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp @@ -63,7 +63,7 @@ void WiredTigerOplogManager::startVisibilityThread(OperationContext* opCtx, // event of a secondary crashing, replication recovery will truncate the oplog, resetting // visibility to the truncate point. In the event of a primary crashing, it will perform // rollback before servicing oplog reads. - auto topOfOplogTimestamp = Timestamp(lastRecord->id.as<int64_t>()); + auto topOfOplogTimestamp = Timestamp(lastRecord->id.asLong()); setOplogReadTimestamp(topOfOplogTimestamp); LOGV2_DEBUG(22368, 1, @@ -174,7 +174,7 @@ void WiredTigerOplogManager::waitForAllEarlierOplogWritesToBeVisible( LOGV2_DEBUG(22371, 2, "Operation is waiting for an entry to become visible in the oplog.", - "awaitedOplogEntryTimestamp"_attr = Timestamp(waitingFor.as<int64_t>()), + "awaitedOplogEntryTimestamp"_attr = Timestamp(waitingFor.asLong()), "currentLatestVisibleOplogEntryTimestamp"_attr = Timestamp(currentLatestVisibleTimestamp)); } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index 124a3711896..33765cb8da0 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -262,7 +262,7 @@ void WiredTigerRecordStore::OplogStones::awaitHasExcessStonesOrDead() { "wallTime"_attr = stone.wallTime, "pinnedOplog"_attr = _rs->getPinnedOplog()); - if (static_cast<std::uint64_t>(stone.lastRecord.as<int64_t>()) < + if (static_cast<std::uint64_t>(stone.lastRecord.asLong()) < _rs->getPinnedOplog().asULL()) { break; } @@ -519,7 +519,7 @@ void WiredTigerRecordStore::OplogStones::_calculateStonesBySampling(OperationCon _calculateStonesByScanning(opCtx); return; } - earliestOpTime = Timestamp(record->id.as<int64_t>()); + earliestOpTime = Timestamp(record->id.asLong()); } { @@ -534,7 +534,7 @@ void WiredTigerRecordStore::OplogStones::_calculateStonesBySampling(OperationCon _calculateStonesByScanning(opCtx); return; } - latestOpTime = Timestamp(record->id.as<int64_t>()); + latestOpTime = Timestamp(record->id.asLong()); } LOGV2(22389, @@ -686,9 +686,9 @@ public: RecordId id; if (_rs->keyFormat() == KeyFormat::String) { - const char* oidBytes; - invariantWTOK(_cursor->get_key(_cursor, &oidBytes)); - id = RecordId(OID::from(oidBytes)); + const char* data; + invariantWTOK(_cursor->get_key(_cursor, &data)); + id = RecordId(data, RecordId::kSmallStrSize); } else { int64_t key; invariantWTOK(_cursor->get_key(_cursor, &key)); @@ -804,6 +804,7 @@ StatusWith<std::string> WiredTigerRecordStore::generateCreateString( // for correct behavior of the server. if (options.clusteredIndex) { // If the RecordId format is a String, assume a 12-byte fix-length string key format. + invariant(RecordId::kSmallStrSize == 12); ss << "key_format=12s"; } else { // All other collections use an int64_t as their table keys. @@ -1435,8 +1436,7 @@ void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx, Timestamp mayT while (auto stone = _oplogStones->peekOldestStoneIfNeeded()) { invariant(stone->lastRecord.isValid()); - if (static_cast<std::uint64_t>(stone->lastRecord.as<int64_t>()) >= - mayTruncateUpTo.asULL()) { + if (static_cast<std::uint64_t>(stone->lastRecord.asLong()) >= mayTruncateUpTo.asULL()) { // Do not truncate oplogs needed for replication recovery. return; } @@ -1486,12 +1486,12 @@ void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx, Timestamp mayT } invariantWTOK(ret); RecordId nextRecord = getKey(cursor); - if (static_cast<std::uint64_t>(nextRecord.as<int64_t>()) > mayTruncateUpTo.asULL()) { + if (static_cast<std::uint64_t>(nextRecord.asLong()) > mayTruncateUpTo.asULL()) { LOGV2_DEBUG(5140901, 0, "Cannot truncate as there are no oplog entries after the stone but " "before the truncate-up-to point", - "nextRecord"_attr = Timestamp(nextRecord.as<int64_t>()), + "nextRecord"_attr = Timestamp(nextRecord.asLong()), "mayTruncateUpTo"_attr = mayTruncateUpTo); return; } @@ -1587,7 +1587,7 @@ Status WiredTigerRecordStore::_insertRecords(OperationContext* opCtx, // flush. Because these are direct writes into the oplog, the machinery to trigger a // journal flush is bypassed. A followup oplog read will require a fresh visibility // value to make progress. - ts = Timestamp(record.id.as<int64_t>()); + ts = Timestamp(record.id.asLong()); opCtx->recoveryUnit()->setOrderedCommit(false); } else { ts = timestamps[i]; @@ -1626,10 +1626,10 @@ Status WiredTigerRecordStore::_insertRecords(OperationContext* opCtx, bool WiredTigerRecordStore::isOpHidden_forTest(const RecordId& id) const { invariant(_isOplog); - invariant(id.as<int64_t>() > 0); + invariant(id.asLong() > 0); invariant(_kvEngine->getOplogManager()->isRunning()); return _kvEngine->getOplogManager()->getOplogReadTimestamp() < - static_cast<std::uint64_t>(id.as<int64_t>()); + static_cast<std::uint64_t>(id.asLong()); } bool WiredTigerRecordStore::haveCappedWaiters() { @@ -1666,7 +1666,7 @@ StatusWith<Timestamp> WiredTigerRecordStore::getLatestOplogTimestamp( RecordId recordId = getKey(cursor); - return {Timestamp(static_cast<unsigned long long>(recordId.as<int64_t>()))}; + return {Timestamp(static_cast<unsigned long long>(recordId.asLong()))}; } StatusWith<Timestamp> WiredTigerRecordStore::getEarliestOplogTimestamp(OperationContext* opCtx) { @@ -1694,7 +1694,7 @@ StatusWith<Timestamp> WiredTigerRecordStore::getEarliestOplogTimestamp(Operation _cappedFirstRecord = getKey(cursor); } - return {Timestamp(static_cast<unsigned long long>(_cappedFirstRecord.as<int64_t>()))}; + return {Timestamp(static_cast<unsigned long long>(_cappedFirstRecord.asLong()))}; } Status WiredTigerRecordStore::updateRecord(OperationContext* opCtx, @@ -1996,7 +1996,7 @@ boost::optional<RecordId> WiredTigerRecordStore::oplogStartHack( RecordId searchFor = startingPosition; auto visibilityTs = wtRu->getOplogVisibilityTs(); - if (visibilityTs && searchFor.as<int64_t>() > *visibilityTs) { + if (visibilityTs && searchFor.asLong() > *visibilityTs) { searchFor = RecordId(*visibilityTs); } @@ -2050,13 +2050,13 @@ void WiredTigerRecordStore::_initNextIdIfNeeded(OperationContext* opCtx) { return; } - // Need to start at 1 so we are always higher than RecordId::min<int64_t>() + // Need to start at 1 so we are always higher than RecordId::minLong() int64_t nextId = 1; // Find the largest RecordId currently in use. std::unique_ptr<SeekableRecordCursor> cursor = getCursor(opCtx, /*forward=*/false); if (auto record = cursor->next()) { - nextId = record->id.as<int64_t>() + 1; + nextId = record->id.asLong() + 1; } _nextIdNum.store(nextId); @@ -2069,7 +2069,7 @@ RecordId WiredTigerRecordStore::_nextId(OperationContext* opCtx) { invariant(!_isOplog); _initNextIdIfNeeded(opCtx); RecordId out = RecordId(_nextIdNum.fetchAndAdd(1)); - invariant(out.isNormal()); + invariant(out.isValid()); return out; } @@ -2228,7 +2228,7 @@ void WiredTigerRecordStore::cappedTruncateAfter(OperationContext* opCtx, if (_isOplog) { // Immediately rewind visibility to our truncation point, to prevent new // transactions from appearing. - Timestamp truncTs(lastKeptId.as<int64_t>()); + Timestamp truncTs(lastKeptId.asLong()); if (!serverGlobalParams.enableMajorityReadConcern && _kvEngine->getOldestTimestamp() > truncTs) { @@ -2321,7 +2321,7 @@ boost::optional<Record> WiredTigerRecordStoreCursorBase::next() { id = getKey(c); } - if (_forward && _oplogVisibleTs && id.as<int64_t>() > *_oplogVisibleTs) { + if (_forward && _oplogVisibleTs && id.asLong() > *_oplogVisibleTs) { _eof = true; return {}; } @@ -2354,7 +2354,7 @@ boost::optional<Record> WiredTigerRecordStoreCursorBase::next() { boost::optional<Record> WiredTigerRecordStoreCursorBase::seekExact(const RecordId& id) { invariant(_hasRestored); - if (_forward && _oplogVisibleTs && id.as<int64_t>() > *_oplogVisibleTs) { + if (_forward && _oplogVisibleTs && id.asLong() > *_oplogVisibleTs) { _eof = true; return {}; } @@ -2485,9 +2485,9 @@ StandardWiredTigerRecordStore::StandardWiredTigerRecordStore(WiredTigerKVEngine* RecordId StandardWiredTigerRecordStore::getKey(WT_CURSOR* cursor) const { if (_keyFormat == KeyFormat::String) { - const char* oidBytes; - invariantWTOK(cursor->get_key(cursor, &oidBytes)); - return RecordId(OID::from(oidBytes)); + const char* data; + invariantWTOK(cursor->get_key(cursor, &data)); + return RecordId(data, RecordId::kSmallStrSize); } else { std::int64_t recordId; invariantWTOK(cursor->get_key(cursor, &recordId)); @@ -2497,9 +2497,9 @@ RecordId StandardWiredTigerRecordStore::getKey(WT_CURSOR* cursor) const { void StandardWiredTigerRecordStore::setKey(WT_CURSOR* cursor, RecordId id) const { if (_keyFormat == KeyFormat::String) { - cursor->set_key(cursor, id.as<OID>().view().view()); + cursor->set_key(cursor, id.strData()); } else { - cursor->set_key(cursor, id.as<int64_t>()); + cursor->set_key(cursor, id.asLong()); } } @@ -2529,17 +2529,17 @@ WiredTigerRecordStoreStandardCursor::WiredTigerRecordStoreStandardCursor( void WiredTigerRecordStoreStandardCursor::setKey(WT_CURSOR* cursor, RecordId id) const { if (_rs.keyFormat() == KeyFormat::String) { - cursor->set_key(cursor, id.as<OID>().view().view()); + cursor->set_key(cursor, id.strData()); } else { - cursor->set_key(cursor, id.as<int64_t>()); + cursor->set_key(cursor, id.asLong()); } } RecordId WiredTigerRecordStoreStandardCursor::getKey(WT_CURSOR* cursor) const { if (_rs.keyFormat() == KeyFormat::String) { - const char* oidBytes; - invariantWTOK(cursor->get_key(cursor, &oidBytes)); - return RecordId(OID::from(oidBytes)); + const char* data; + invariantWTOK(cursor->get_key(cursor, &data)); + return RecordId(data, RecordId::kSmallStrSize); } else { std::int64_t recordId; invariantWTOK(cursor->get_key(cursor, &recordId)); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp index fba746efab0..e8ae6f73835 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp @@ -951,7 +951,7 @@ TEST(WiredTigerRecordStoreTest, GetLatestOplogTest) { ServiceContext::UniqueOperationContext op1(harnessHelper->newOperationContext()); op1->recoveryUnit()->beginUnitOfWork(op1.get()); Timestamp tsOne = Timestamp( - static_cast<unsigned long long>(_oplogOrderInsertOplog(op1.get(), rs, 1).as<int64_t>())); + static_cast<unsigned long long>(_oplogOrderInsertOplog(op1.get(), rs, 1).asLong())); op1->recoveryUnit()->commitUnitOfWork(); // Asserting on a recovery unit without a snapshot. ASSERT_EQ(tsOne, wtrs->getLatestOplogTimestamp(op1.get())); @@ -970,7 +970,7 @@ TEST(WiredTigerRecordStoreTest, GetLatestOplogTest) { ServiceContext::UniqueOperationContext op2(harnessHelper->newOperationContext()); op2->recoveryUnit()->beginUnitOfWork(op2.get()); Timestamp tsThree = Timestamp( - static_cast<unsigned long long>(_oplogOrderInsertOplog(op2.get(), rs, 3).as<int64_t>())); + static_cast<unsigned long long>(_oplogOrderInsertOplog(op2.get(), rs, 3).asLong())); // Before committing, the query still only sees timestamp "1". ASSERT_EQ(tsOne, wtrs->getLatestOplogTimestamp(op2.get())); op2->recoveryUnit()->commitUnitOfWork(); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_standard_record_store_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_standard_record_store_test.cpp index 92b2848b80c..9959470a730 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_standard_record_store_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_standard_record_store_test.cpp @@ -191,6 +191,10 @@ public: return _engine.getConnection(); } + KVEngine* getEngine() override final { + return &_engine; + } + private: unittest::TempDir _dbpath; ClockSourceMock _cs; |