summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorYuhong Zhang <yuhong.zhang@mongodb.com>2022-10-06 21:25:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-10-06 23:15:00 +0000
commit9179d0a7109efb34631a6e19a87e055ed4e8c730 (patch)
tree876ebf658d94753fa6ed1f158c88a0bb1c5dd6ee /src/mongo/db
parent08986dc909c370494348a08bf11b51cb03778759 (diff)
downloadmongo-9179d0a7109efb34631a6e19a87e055ed4e8c730.tar.gz
SERVER-68689 Switch background validation to use checkpoint cursors again
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/catalog/collection_validation.cpp12
-rw-r--r--src/mongo/db/catalog/collection_validation_test.cpp17
-rw-r--r--src/mongo/db/catalog/validate_state.cpp107
-rw-r--r--src/mongo/db/catalog/validate_state.h6
-rw-r--r--src/mongo/db/catalog/validate_state_test.cpp77
-rw-r--r--src/mongo/db/storage/durable_catalog.h4
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.cpp15
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.h2
-rw-r--r--src/mongo/db/storage/recovery_unit.h5
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_cursor.cpp7
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp4
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp15
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h2
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp11
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp4
15 files changed, 206 insertions, 82 deletions
diff --git a/src/mongo/db/catalog/collection_validation.cpp b/src/mongo/db/catalog/collection_validation.cpp
index a587b935bdb..62963db1410 100644
--- a/src/mongo/db/catalog/collection_validation.cpp
+++ b/src/mongo/db/catalog/collection_validation.cpp
@@ -625,6 +625,18 @@ Status validate(OperationContext* opCtx,
"corruption found",
logAttrs(validateState.nss()),
logAttrs(validateState.uuid()));
+ } catch (ExceptionFor<ErrorCodes::CursorNotFound>&) {
+ invariant(validateState.isBackground());
+ string warning = str::stream()
+ << "Collection validation with {background: true} validates"
+ << " the latest checkpoint (data in a snapshot written to disk in a consistent"
+ << " way across all data files). During this validation, some tables have not yet been"
+ << " checkpointed.";
+ results->warnings.push_back(warning);
+
+ // Nothing to validate, so it must be valid.
+ results->valid = true;
+ return Status::OK();
} catch (const DBException& e) {
if (!opCtx->checkForInterruptNoAssert().isOK() || e.code() == ErrorCodes::Interrupted) {
LOGV2_OPTIONS(5160301,
diff --git a/src/mongo/db/catalog/collection_validation_test.cpp b/src/mongo/db/catalog/collection_validation_test.cpp
index f0cfa7842c8..15f8f475aec 100644
--- a/src/mongo/db/catalog/collection_validation_test.cpp
+++ b/src/mongo/db/catalog/collection_validation_test.cpp
@@ -45,6 +45,9 @@ namespace {
const NamespaceString kNss = NamespaceString("test.t");
class CollectionValidationTest : public CatalogTestFixture {
+protected:
+ CollectionValidationTest(Options options = {}) : CatalogTestFixture(std::move(options)) {}
+
private:
void setUp() override {
CatalogTestFixture::setUp();
@@ -56,6 +59,12 @@ private:
};
};
+// Background validation opens checkpoint cursors which requires reading from the disk.
+class CollectionValidationDiskTest : public CollectionValidationTest {
+protected:
+ CollectionValidationDiskTest() : CollectionValidationTest(Options{}.ephemeral(false)) {}
+};
+
/**
* Calls validate on collection kNss with both kValidateFull and kValidateNormal validation levels
* and verifies the results.
@@ -209,7 +218,7 @@ TEST_F(CollectionValidationTest, ValidateEmpty) {
/*numInvalidDocuments*/ 0,
/*numErrors*/ 0);
}
-TEST_F(CollectionValidationTest, BackgroundValidateEmpty) {
+TEST_F(CollectionValidationDiskTest, BackgroundValidateEmpty) {
backgroundValidate(operationContext(),
/*valid*/ true,
/*numRecords*/ 0,
@@ -227,7 +236,7 @@ TEST_F(CollectionValidationTest, Validate) {
/*numInvalidDocuments*/ 0,
/*numErrors*/ 0);
}
-TEST_F(CollectionValidationTest, BackgroundValidate) {
+TEST_F(CollectionValidationDiskTest, BackgroundValidate) {
auto opCtx = operationContext();
backgroundValidate(opCtx,
/*valid*/ true,
@@ -246,7 +255,7 @@ TEST_F(CollectionValidationTest, ValidateError) {
/*numInvalidDocuments*/ 1,
/*numErrors*/ 1);
}
-TEST_F(CollectionValidationTest, BackgroundValidateError) {
+TEST_F(CollectionValidationDiskTest, BackgroundValidateError) {
auto opCtx = operationContext();
backgroundValidate(opCtx,
/*valid*/ false,
@@ -280,7 +289,7 @@ void waitUntilValidateFailpointHasBeenReached() {
ASSERT(CollectionValidation::getIsValidationPausedForTest());
}
-TEST_F(CollectionValidationTest, BackgroundValidateRunsConcurrentlyWithWrites) {
+TEST_F(CollectionValidationDiskTest, BackgroundValidateRunsConcurrentlyWithWrites) {
auto opCtx = operationContext();
auto serviceContext = opCtx->getServiceContext();
diff --git a/src/mongo/db/catalog/validate_state.cpp b/src/mongo/db/catalog/validate_state.cpp
index 2fb6505ce1b..862eff71fac 100644
--- a/src/mongo/db/catalog/validate_state.cpp
+++ b/src/mongo/db/catalog/validate_state.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/db_raii.h"
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/operation_context.h"
+#include "mongo/db/storage/durable_catalog.h"
#include "mongo/logv2/log.h"
#include "mongo/util/fail_point.h"
@@ -69,7 +70,6 @@ ValidateState::ValidateState(OperationContext* opCtx,
// being validated.
_noPBWM.emplace(opCtx->lockState());
- _globalLock.emplace(opCtx, MODE_IS);
_databaseLock.emplace(opCtx, _nss.db(), MODE_IS);
_collectionLock.emplace(opCtx, _nss, MODE_IS);
} else {
@@ -196,11 +196,6 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) {
_traverseRecordStoreCursor->save();
_seekRecordStoreCursor->save();
- if (isBackground() && _validateTs) {
- // Reset snapshot to help ameliorate WiredTiger cache pressure.
- opCtx->recoveryUnit()->refreshSnapshot();
- }
-
// Restore all the cursors.
for (const auto& indexCursor : _indexCursors) {
indexCursor.second->restore();
@@ -218,25 +213,12 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
invariant(!_traverseRecordStoreCursor && !_seekRecordStoreCursor && _indexCursors.size() == 0 &&
_indexes.size() == 0);
- // Background validation (on replica sets) will read from a snapshot opened on the kNoOverlap
- // read source, which is the minimum of the last applied and all durable timestamps, instead of
- // the latest data. Using the kNoOverlap read source prevents us from having to take the PBWM
- // lock, which blocks replication. We cannot solely rely on the all durable timestamp as it can
- // be set while we're in the middle of applying a batch on secondary nodes.
- // Background validation on standalones uses the kNoTimestamp read source because standalones
- // have no timestamps to use for maintaining a consistent snapshot.
+ // Background validation reads from the last stable checkpoint instead of the latest data. This
+ // allows concurrent writes to go ahead without interfering with validation's view of the data.
RecoveryUnit::ReadSource rs = RecoveryUnit::ReadSource::kNoTimestamp;
if (isBackground()) {
opCtx->recoveryUnit()->abandonSnapshot();
- // Background validation is expecting to read from the no overlap timestamp, but
- // standalones do not support timestamps. Therefore, if this process is currently running as
- // a standalone, don't use a timestamp.
-
- if (repl::ReplicationCoordinator::get(opCtx)->isReplEnabled()) {
- rs = RecoveryUnit::ReadSource::kNoOverlap;
- } else {
- rs = RecoveryUnit::ReadSource::kNoTimestamp;
- }
+ rs = RecoveryUnit::ReadSource::kCheckpoint;
opCtx->recoveryUnit()->setTimestampReadSource(rs);
}
@@ -247,15 +229,28 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
_dataThrottle.turnThrottlingOff();
}
- _traverseRecordStoreCursor = std::make_unique<SeekableRecordThrottleCursor>(
- opCtx, _collection->getRecordStore(), &_dataThrottle);
- _seekRecordStoreCursor = std::make_unique<SeekableRecordThrottleCursor>(
- opCtx, _collection->getRecordStore(), &_dataThrottle);
-
- if (rs != RecoveryUnit::ReadSource::kNoTimestamp) {
- invariant(rs == RecoveryUnit::ReadSource::kNoOverlap);
+ boost::optional<Lock::ResourceLock> checkpointLock;
+ if (isBackground()) {
+ // Acquires a resource mutex to prevent taking a checkpoint during opening the checkpoint
+ // cursors to make sure all cursors are reading from the same point in time.
+ checkpointLock.emplace(opCtx, ResourceId(RESOURCE_MUTEX, "checkpoint"), MODE_X);
+ }
+ StringSet readyDurableIndexes;
+ try {
+ _traverseRecordStoreCursor = std::make_unique<SeekableRecordThrottleCursor>(
+ opCtx, _collection->getRecordStore(), &_dataThrottle);
+ _seekRecordStoreCursor = std::make_unique<SeekableRecordThrottleCursor>(
+ opCtx, _collection->getRecordStore(), &_dataThrottle);
+ DurableCatalog::get(opCtx)->getReadyIndexes(
+ opCtx, _collection->getCatalogId(), &readyDurableIndexes);
+ } catch (const ExceptionFor<ErrorCodes::CursorNotFound>& ex) {
invariant(isBackground());
- _validateTs = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx);
+ // End the validation if we can't open a checkpoint cursor on the collection.
+ LOGV2(6868900,
+ "Skipping background validation because the collection is not yet in a checkpoint",
+ "nss"_attr = _nss,
+ "ex"_attr = ex);
+ throw;
}
const IndexCatalog* indexCatalog = _collection->getIndexCatalog();
@@ -266,6 +261,36 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
const IndexCatalogEntry* entry = it->next();
const IndexDescriptor* desc = entry->descriptor();
+ // Filter out any in-memory index in the collection that is not in our PIT view of the
+ // MDB catalog. This is only important when background:true because we are then reading
+ // from the checkpoint's view of the MDB catalog and data.
+ if (isBackground() &&
+ readyDurableIndexes.find(desc->indexName()) == readyDurableIndexes.end()) {
+ LOGV2(6868901,
+ "Skipping background validation on the index because the index is not yet in a "
+ "checkpoint.",
+ "desc_indexName"_attr = desc->indexName(),
+ "nss"_attr = _nss);
+ continue;
+ }
+
+ // Read the index's ident from disk (the checkpoint if background:true). If it does not
+ // match the in-memory ident saved in the IndexCatalogEntry, then our PIT view of the
+ // index is old and the index has been dropped and recreated. In this case we will skip
+ // it since there is no utility in checking a dropped index (we also cannot currently
+ // access it because its in-memory representation is gone).
+ auto diskIndexIdent =
+ opCtx->getServiceContext()->getStorageEngine()->getCatalog()->getIndexIdent(
+ opCtx, _collection->getCatalogId(), desc->indexName());
+ if (entry->getIdent() != diskIndexIdent) {
+ LOGV2(6868902,
+ "Skipping validation on the index because the index was recreated and is not "
+ "yet in a checkpoint.",
+ "desc_indexName"_attr = desc->indexName(),
+ "nss"_attr = _nss);
+ continue;
+ }
+
auto iam = entry->accessMethod()->asSortedData();
if (!iam)
continue;
@@ -274,9 +299,31 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
desc->indexName(),
std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle));
+ // Skip any newly created indexes that, because they were built with a WT bulk loader,
+ // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of
+ // the data.
+ if (isBackground() &&
+ opCtx->getServiceContext()->getStorageEngine()->isInIndividuallyCheckpointedIndexes(
+ diskIndexIdent)) {
+ _indexCursors.erase(desc->indexName());
+ LOGV2(6868903,
+ "Skipping background validation on the index because the index data is not yet "
+ "consistent in the checkpoint.",
+ "desc_indexName"_attr = desc->indexName(),
+ "nss"_attr = _nss);
+ continue;
+ }
+
_indexes.push_back(indexCatalog->getEntryShared(desc));
}
+ if (rs != RecoveryUnit::ReadSource::kNoTimestamp) {
+ invariant(rs == RecoveryUnit::ReadSource::kCheckpoint);
+ invariant(isBackground());
+ auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
+ _validateTs = storageEngine->getLastStableRecoveryTimestamp();
+ }
+
// Because SeekableRecordCursors don't have a method to reset to the start, we save and then
// use a seek to the first RecordId to reset the cursor (and reuse it) as needed. When
// iterating through a Record Store cursor, we initialize the loop (and obtain the first
diff --git a/src/mongo/db/catalog/validate_state.h b/src/mongo/db/catalog/validate_state.h
index 140a4a24fb1..c508a6b5850 100644
--- a/src/mongo/db/catalog/validate_state.h
+++ b/src/mongo/db/catalog/validate_state.h
@@ -177,8 +177,7 @@ public:
/**
* Initializes all the cursors to be used during validation and moves the traversal record
- * store cursor to the first record. For background validation, this should be called while
- * holding the checkpoint lock when performing a background validation.
+ * store cursor to the first record.
*/
void initializeCursors(OperationContext* opCtx);
@@ -224,8 +223,7 @@ private:
/**
* Saves and restores the open cursors to release snapshots and minimize cache pressure for
- * validation. For background validation, also refreshes the snapshot by starting a new storage
- * transaction.
+ * validation.
*/
void _yieldCursors(OperationContext* opCtx);
diff --git a/src/mongo/db/catalog/validate_state_test.cpp b/src/mongo/db/catalog/validate_state_test.cpp
index a3ccd77ff58..caababd402a 100644
--- a/src/mongo/db/catalog/validate_state_test.cpp
+++ b/src/mongo/db/catalog/validate_state_test.cpp
@@ -36,6 +36,7 @@
#include "mongo/db/op_observer/op_observer_registry.h"
#include "mongo/db/op_observer/oplog_writer_mock.h"
#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/storage/durable_catalog.h"
#include "mongo/db/storage/snapshot_manager.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_global_options.h"
#include "mongo/util/fail_point.h"
@@ -50,7 +51,9 @@ namespace {
const NamespaceString kNss = NamespaceString("fooDB.fooColl");
class ValidateStateTest : public CatalogTestFixture {
-public:
+protected:
+ ValidateStateTest(Options options = {}) : CatalogTestFixture(std::move(options)) {}
+
/**
* Create collection 'nss'. It will possess a default _id index.
*/
@@ -65,6 +68,12 @@ private:
void setUp() override;
};
+// Background validation opens checkpoint cursors which requires reading from the disk.
+class ValidateStateDiskTest : public ValidateStateTest {
+protected:
+ ValidateStateDiskTest() : ValidateStateTest(Options{}.ephemeral(false)) {}
+};
+
void ValidateStateTest::createCollection(OperationContext* opCtx, const NamespaceString& nss) {
// Create collection.
CollectionOptions defaultCollectionOptions;
@@ -170,7 +179,8 @@ TEST_F(ValidateStateTest, NonExistentCollectionShouldThrowNamespaceNotFoundError
ErrorCodes::NamespaceNotFound);
}
-TEST_F(ValidateStateTest, UncheckpointedCollectionShouldBeAbleToInitializeCursors) {
+// Validate with {background:true} should fail to find an uncheckpoint'ed collection.
+TEST_F(ValidateStateDiskTest, UncheckpointedCollectionShouldThrowCursorNotFoundError) {
auto opCtx = operationContext();
// Disable periodic checkpoint'ing thread so we can control when checkpoints occur.
@@ -179,16 +189,16 @@ TEST_F(ValidateStateTest, UncheckpointedCollectionShouldBeAbleToInitializeCursor
// Checkpoint of all of the data.
opCtx->recoveryUnit()->waitUntilUnjournaledWritesDurable(opCtx, /*stableCheckpoint*/ false);
+ // Create the collection, which will not be in the checkpoint, and check that a CursorNotFound
+ // error is thrown when attempting to open cursors.
createCollectionAndPopulateIt(opCtx, kNss);
CollectionValidation::ValidateState validateState(
opCtx,
kNss,
CollectionValidation::ValidateMode::kBackground,
CollectionValidation::RepairMode::kNone);
- // Assert that cursors are able to created on the new collection.
- validateState.initializeCursors(opCtx);
- // There should only be a first record id if cursors were initialized successfully.
- ASSERT(!validateState.getFirstRecordId().isNull());
+ ASSERT_THROWS_CODE(
+ validateState.initializeCursors(opCtx), AssertionException, ErrorCodes::CursorNotFound);
}
// Basic test with {background:false} to open cursors against all collection indexes.
@@ -234,8 +244,8 @@ TEST_F(ValidateStateTest, OpenCursorsOnAllIndexes) {
ASSERT_EQ(validateState.getIndexes().size(), 5);
}
-// Open cursors against all indexes with {background:true}.
-TEST_F(ValidateStateTest, OpenCursorsOnAllIndexesWithBackground) {
+// Open cursors against checkpoint'ed indexes with {background:true}.
+TEST_F(ValidateStateDiskTest, OpenCursorsOnCheckpointedIndexes) {
auto opCtx = operationContext();
createCollectionAndPopulateIt(opCtx, kNss);
@@ -259,14 +269,57 @@ TEST_F(ValidateStateTest, OpenCursorsOnAllIndexesWithBackground) {
CollectionValidation::RepairMode::kNone);
validateState.initializeCursors(opCtx);
- // We should be able to open a cursor on each index.
- // (Note the _id index was create with collection creation, so we have 5 indexes.)
- ASSERT_EQ(validateState.getIndexes().size(), 5);
+ // Make sure the uncheckpoint'ed indexes are not found.
+ // (Note the _id index was create with collection creation, so we have 3 indexes.)
+ ASSERT_EQ(validateState.getIndexes().size(), 3);
+}
+
+// Only open cursors against indexes that are consistent with the rest of the checkpoint'ed data.
+TEST_F(ValidateStateDiskTest, OpenCursorsOnConsistentlyCheckpointedIndexes) {
+ auto opCtx = operationContext();
+ createCollectionAndPopulateIt(opCtx, kNss);
+
+ // Disable periodic checkpoint'ing thread so we can control when checkpoints occur.
+ FailPointEnableBlock failPoint("pauseCheckpointThread");
+
+ // Create several indexes.
+ createIndex(opCtx, kNss, BSON("a" << 1));
+ createIndex(opCtx, kNss, BSON("b" << 1));
+ createIndex(opCtx, kNss, BSON("c" << 1));
+ createIndex(opCtx, kNss, BSON("d" << 1));
+
+ // Checkpoint the indexes.
+ opCtx->recoveryUnit()->waitUntilUnjournaledWritesDurable(opCtx, /*stableCheckpoint*/ false);
+
+ {
+ // Artificially set two indexes as inconsistent with the checkpoint.
+ AutoGetCollection autoColl(opCtx, kNss, MODE_IS);
+ auto indexIdentA =
+ opCtx->getServiceContext()->getStorageEngine()->getCatalog()->getIndexIdent(
+ opCtx, autoColl.getCollection()->getCatalogId(), "a_1");
+ auto indexIdentB =
+ opCtx->getServiceContext()->getStorageEngine()->getCatalog()->getIndexIdent(
+ opCtx, autoColl.getCollection()->getCatalogId(), "b_1");
+ opCtx->getServiceContext()->getStorageEngine()->addIndividuallyCheckpointedIndex(
+ indexIdentA);
+ opCtx->getServiceContext()->getStorageEngine()->addIndividuallyCheckpointedIndex(
+ indexIdentB);
+ }
+
+ // The two inconsistent indexes should not be found.
+ // (Note the _id index was create with collection creation, so we have 3 indexes.)
+ CollectionValidation::ValidateState validateState(
+ opCtx,
+ kNss,
+ CollectionValidation::ValidateMode::kBackground,
+ CollectionValidation::RepairMode::kNone);
+ validateState.initializeCursors(opCtx);
+ ASSERT_EQ(validateState.getIndexes().size(), 3);
}
// Indexes in the checkpoint that were dropped in the present should not have cursors opened against
// them.
-TEST_F(ValidateStateTest, CursorsAreNotOpenedAgainstCheckpointedIndexesThatWereLaterDropped) {
+TEST_F(ValidateStateDiskTest, CursorsAreNotOpenedAgainstCheckpointedIndexesThatWereLaterDropped) {
auto opCtx = operationContext();
createCollectionAndPopulateIt(opCtx, kNss);
diff --git a/src/mongo/db/storage/durable_catalog.h b/src/mongo/db/storage/durable_catalog.h
index 3ed725b8a2d..73515f34f8f 100644
--- a/src/mongo/db/storage/durable_catalog.h
+++ b/src/mongo/db/storage/durable_catalog.h
@@ -238,6 +238,10 @@ public:
virtual int getTotalIndexCount(OperationContext* opCtx, const RecordId& catalogId) const = 0;
+ virtual void getReadyIndexes(OperationContext* opCtx,
+ RecordId catalogId,
+ StringSet* names) const = 0;
+
virtual bool isIndexPresent(OperationContext* opCtx,
const RecordId& catalogId,
StringData indexName) const = 0;
diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp
index d7a3acbaead..c8d33e99b30 100644
--- a/src/mongo/db/storage/durable_catalog_impl.cpp
+++ b/src/mongo/db/storage/durable_catalog_impl.cpp
@@ -854,6 +854,21 @@ int DurableCatalogImpl::getTotalIndexCount(OperationContext* opCtx,
return md->getTotalIndexCount();
}
+void DurableCatalogImpl::getReadyIndexes(OperationContext* opCtx,
+ RecordId catalogId,
+ StringSet* names) const {
+ auto md = getMetaData(opCtx, catalogId);
+
+ if (!md) {
+ return;
+ }
+
+ for (const auto& index : md->indexes) {
+ if (index.ready)
+ names->insert(index.spec["name"].String());
+ }
+}
+
bool DurableCatalogImpl::isIndexPresent(OperationContext* opCtx,
const RecordId& catalogId,
StringData indexName) const {
diff --git a/src/mongo/db/storage/durable_catalog_impl.h b/src/mongo/db/storage/durable_catalog_impl.h
index cc45ef07c2b..df909525c97 100644
--- a/src/mongo/db/storage/durable_catalog_impl.h
+++ b/src/mongo/db/storage/durable_catalog_impl.h
@@ -152,6 +152,8 @@ public:
int getTotalIndexCount(OperationContext* opCtx, const RecordId& catalogId) const;
+ void getReadyIndexes(OperationContext* opCtx, RecordId catalogId, StringSet* names) const;
+
bool isIndexPresent(OperationContext* opCtx,
const RecordId& catalogId,
StringData indexName) const;
diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h
index 028a978bb2a..20c49c637d3 100644
--- a/src/mongo/db/storage/recovery_unit.h
+++ b/src/mongo/db/storage/recovery_unit.h
@@ -428,11 +428,6 @@ public:
}
/**
- * Refreshes a read transaction by resetting the snapshot in use
- */
- virtual void refreshSnapshot() {}
-
- /**
* The ReadSource indicates which external or provided timestamp to read from for future
* transactions.
*/
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_cursor.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_cursor.cpp
index a1c6924a4cd..49346bfc671 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_cursor.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_cursor.cpp
@@ -93,7 +93,12 @@ WiredTigerCursor::WiredTigerCursor(const std::string& uri,
}
WiredTigerCursor::~WiredTigerCursor() {
- _session->releaseCursor(_tableID, _cursor, _config);
+ if (_isCheckpoint) {
+ // Closes the checkpoint cursor to avoid outdated data view when opening a new one.
+ _session->closeCursor(_cursor);
+ } else {
+ _session->releaseCursor(_tableID, _cursor, _config);
+ }
}
void WiredTigerCursor::reset() {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index d704fa0b4d1..c7b922ad0b6 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -1935,12 +1935,12 @@ void WiredTigerKVEngine::_checkpoint(OperationContext* opCtx, WT_SESSION* sessio
if (initialDataTimestamp.asULL() <= 1) {
Lock::ResourceLock checkpointLock{
opCtx, ResourceId(RESOURCE_MUTEX, "checkpoint"), MODE_X};
- clearIndividuallyCheckpointedIndexes();
invariantWTOK(session->checkpoint(session, "use_timestamp=false"), session);
LOGV2_FOR_RECOVERY(5576602,
2,
"Completed unstable checkpoint.",
"initialDataTimestamp"_attr = initialDataTimestamp.toString());
+ clearIndividuallyCheckpointedIndexes();
} else if (stableTimestamp < initialDataTimestamp) {
LOGV2_FOR_RECOVERY(
23985,
@@ -1960,8 +1960,8 @@ void WiredTigerKVEngine::_checkpoint(OperationContext* opCtx, WT_SESSION* sessio
{
Lock::ResourceLock checkpointLock{
opCtx, ResourceId(RESOURCE_MUTEX, "checkpoint"), MODE_X};
- clearIndividuallyCheckpointedIndexes();
invariantWTOK(session->checkpoint(session, "use_timestamp=true"), session);
+ clearIndividuallyCheckpointedIndexes();
}
if (oplogNeededForRollback.isOK()) {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
index e9888b40e0e..9d3995562c1 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
@@ -372,21 +372,6 @@ void WiredTigerRecoveryUnit::preallocateSnapshotForOplogRead() {
preallocateSnapshot();
}
-void WiredTigerRecoveryUnit::refreshSnapshot() {
- // Currently, this code only works for kNoOverlap or kNoTimestamp.
- invariant(_timestampReadSource == ReadSource::kNoOverlap ||
- _timestampReadSource == ReadSource::kNoTimestamp);
- invariant(_isActive());
- invariant(!_inUnitOfWork());
- invariant(!_noEvictionAfterRollback);
- invariant(_abandonSnapshotMode == AbandonSnapshotMode::kAbort);
-
- auto session = _session->getSession();
- invariantWTOK(session->reset_snapshot(session), session);
- LOGV2_DEBUG(
- 6235000, 3, "WT refreshed snapshot", "snapshotId"_attr = getSnapshotId().toNumber());
-}
-
void WiredTigerRecoveryUnit::_txnClose(bool commit) {
invariant(_isActive(), toString(_getState()));
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
index 5397b91fb91..26102686dbd 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
@@ -181,8 +181,6 @@ public:
std::shared_ptr<StorageStats> getOperationStatistics() const override;
- void refreshSnapshot() override;
-
void ignoreAllMultiTimestampConstraints() {
_multiTimestampConstraintTracker.ignoreAllMultiTimestampConstraints = true;
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
index efbce9ac0ad..35e6b0a4302 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
@@ -626,7 +626,7 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, CommitTimestampAfterSetTimestampOnAbor
ASSERT(!commitTs);
}
-TEST_F(WiredTigerRecoveryUnitTestFixture, CheckpointCursorsCached) {
+TEST_F(WiredTigerRecoveryUnitTestFixture, CheckpointCursorsAreNotCached) {
auto opCtx = clientAndCtx1.second.get();
// Hold the global lock throughout the test to avoid having the global lock destructor
@@ -666,7 +666,8 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, CheckpointCursorsCached) {
// Force a checkpoint.
engine->flushAllFiles(opCtx, /*callerHoldsReadLock*/ false);
- // Test 2: Checkpoint cursors will be released into the cache.
+ // Test 2: Checkpoint cursors are not expected to be cached, they
+ // should be immediately closed when destructed.
ru->setTimestampReadSource(WiredTigerRecoveryUnit::ReadSource::kCheckpoint);
// Close any cached cursors to establish a new 'before' state.
@@ -676,9 +677,9 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, CheckpointCursorsCached) {
// Will search the checkpoint cursor for the record, then release the checkpoint cursor.
ASSERT_TRUE(rs->findRecord(opCtx, s.getValue(), &rd));
- // A new cursor should have been released into the cache, along with a metadata cursor that is
- // opened to determine if the table is LSM. Metadata cursors are cached.
- ASSERT_GT(ru->getSession()->cachedCursors(), cachedCursorsBefore + 1);
+ // No new cursors should have been released into the cache, with the exception of a metadata
+ // cursor that is opened to determine if the table is LSM. Metadata cursors are cached.
+ ASSERT_EQ(ru->getSession()->cachedCursors(), cachedCursorsBefore + 1);
// All opened cursors are closed.
ASSERT_EQ(0, ru->getSession()->cursorsOut());
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp
index c3cfaf7b478..5e8337030d1 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_session_cache.cpp
@@ -298,8 +298,8 @@ void WiredTigerSessionCache::waitUntilDurable(OperationContext* opCtx,
{
Lock::ResourceLock checkpointLock{
opCtx, ResourceId(RESOURCE_MUTEX, "checkpoint"), MODE_X};
- _engine->clearIndividuallyCheckpointedIndexes();
invariantWTOK(s->checkpoint(s, config), s);
+ _engine->clearIndividuallyCheckpointedIndexes();
}
if (token) {
@@ -353,10 +353,10 @@ void WiredTigerSessionCache::waitUntilDurable(OperationContext* opCtx,
LOGV2_DEBUG(22419, 4, "flushed journal");
} else {
Lock::ResourceLock checkpointLock{opCtx, ResourceId(RESOURCE_MUTEX, "checkpoint"), MODE_X};
- _engine->clearIndividuallyCheckpointedIndexes();
invariantWTOK(_waitUntilDurableSession->checkpoint(_waitUntilDurableSession, nullptr),
_waitUntilDurableSession);
LOGV2_DEBUG(22420, 4, "created checkpoint");
+ _engine->clearIndividuallyCheckpointedIndexes();
}
if (token) {