summaryrefslogtreecommitdiff
path: root/src/mongo/db/catalog/validate_state.cpp
diff options
context:
space:
mode:
authorYuhong Zhang <yuhong.zhang@mongodb.com>2022-09-06 18:56:16 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-06 20:33:38 +0000
commit1a8f5ae47b2cd941b96cf8795fa45a167ac4fbeb (patch)
tree4fcbf5c206427d070d918f3376cd4572cc7a801d /src/mongo/db/catalog/validate_state.cpp
parentd7926ae6e17f1a6b5075ef53b0daf6a562576499 (diff)
downloadmongo-1a8f5ae47b2cd941b96cf8795fa45a167ac4fbeb.tar.gz
SERVER-68689 Switch background validation to use checkpoint cursors again
Diffstat (limited to 'src/mongo/db/catalog/validate_state.cpp')
-rw-r--r--src/mongo/db/catalog/validate_state.cpp149
1 files changed, 104 insertions, 45 deletions
diff --git a/src/mongo/db/catalog/validate_state.cpp b/src/mongo/db/catalog/validate_state.cpp
index 2fb6505ce1b..0f9301c31f8 100644
--- a/src/mongo/db/catalog/validate_state.cpp
+++ b/src/mongo/db/catalog/validate_state.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/db_raii.h"
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/operation_context.h"
+#include "mongo/db/storage/durable_catalog.h"
#include "mongo/logv2/log.h"
#include "mongo/util/fail_point.h"
@@ -69,7 +70,6 @@ ValidateState::ValidateState(OperationContext* opCtx,
// being validated.
_noPBWM.emplace(opCtx->lockState());
- _globalLock.emplace(opCtx, MODE_IS);
_databaseLock.emplace(opCtx, _nss.db(), MODE_IS);
_collectionLock.emplace(opCtx, _nss, MODE_IS);
} else {
@@ -196,11 +196,6 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) {
_traverseRecordStoreCursor->save();
_seekRecordStoreCursor->save();
- if (isBackground() && _validateTs) {
- // Reset snapshot to help ameliorate WiredTiger cache pressure.
- opCtx->recoveryUnit()->refreshSnapshot();
- }
-
// Restore all the cursors.
for (const auto& indexCursor : _indexCursors) {
indexCursor.second->restore();
@@ -218,25 +213,12 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
invariant(!_traverseRecordStoreCursor && !_seekRecordStoreCursor && _indexCursors.size() == 0 &&
_indexes.size() == 0);
- // Background validation (on replica sets) will read from a snapshot opened on the kNoOverlap
- // read source, which is the minimum of the last applied and all durable timestamps, instead of
- // the latest data. Using the kNoOverlap read source prevents us from having to take the PBWM
- // lock, which blocks replication. We cannot solely rely on the all durable timestamp as it can
- // be set while we're in the middle of applying a batch on secondary nodes.
- // Background validation on standalones uses the kNoTimestamp read source because standalones
- // have no timestamps to use for maintaining a consistent snapshot.
+ // Background validation reads from the last stable checkpoint instead of the latest data. This
+ // allows concurrent writes to go ahead without interfering with validation's view of the data.
RecoveryUnit::ReadSource rs = RecoveryUnit::ReadSource::kNoTimestamp;
if (isBackground()) {
opCtx->recoveryUnit()->abandonSnapshot();
- // Background validation is expecting to read from the no overlap timestamp, but
- // standalones do not support timestamps. Therefore, if this process is currently running as
- // a standalone, don't use a timestamp.
-
- if (repl::ReplicationCoordinator::get(opCtx)->isReplEnabled()) {
- rs = RecoveryUnit::ReadSource::kNoOverlap;
- } else {
- rs = RecoveryUnit::ReadSource::kNoTimestamp;
- }
+ rs = RecoveryUnit::ReadSource::kCheckpoint;
opCtx->recoveryUnit()->setTimestampReadSource(rs);
}
@@ -247,34 +229,111 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
_dataThrottle.turnThrottlingOff();
}
- _traverseRecordStoreCursor = std::make_unique<SeekableRecordThrottleCursor>(
- opCtx, _collection->getRecordStore(), &_dataThrottle);
- _seekRecordStoreCursor = std::make_unique<SeekableRecordThrottleCursor>(
- opCtx, _collection->getRecordStore(), &_dataThrottle);
+ // Capture the checkpointTimestamp before and after opening the cursors. If it has moved, the
+ // cursors are out of sync.
+ auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
+ boost::optional<Timestamp> checkpointTimestamp = boost::none;
+ boost::optional<Timestamp> currCheckpointTimestamp = boost::none;
+ do {
+ _indexCursors.clear();
+ _indexes.clear();
+ checkpointTimestamp = storageEngine->getLastStableRecoveryTimestamp();
+ StringSet readyDurableIndexes;
+ try {
+ _traverseRecordStoreCursor = std::make_unique<SeekableRecordThrottleCursor>(
+ opCtx, _collection->getRecordStore(), &_dataThrottle);
+ _seekRecordStoreCursor = std::make_unique<SeekableRecordThrottleCursor>(
+ opCtx, _collection->getRecordStore(), &_dataThrottle);
+ DurableCatalog::get(opCtx)->getReadyIndexes(
+ opCtx, _collection->getCatalogId(), &readyDurableIndexes);
+ } catch (const ExceptionFor<ErrorCodes::CursorNotFound>& ex) {
+ invariant(isBackground());
+ // End the validation if we can't open a checkpoint cursor on the collection.
+ LOGV2(
+ 6868900,
+ "Skipping background validation because the collection is not yet in a checkpoint",
+ "nss"_attr = _nss,
+ "ex"_attr = ex);
+ throw;
+ }
- if (rs != RecoveryUnit::ReadSource::kNoTimestamp) {
- invariant(rs == RecoveryUnit::ReadSource::kNoOverlap);
- invariant(isBackground());
- _validateTs = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx);
- }
+ const IndexCatalog* indexCatalog = _collection->getIndexCatalog();
+ // The index iterator for ready indexes is timestamp-aware and will only return indexes that
+ // are visible at our read time.
+ const auto it =
+ indexCatalog->getIndexIterator(opCtx, IndexCatalog::InclusionPolicy::kReady);
+ while (it->more()) {
+ const IndexCatalogEntry* entry = it->next();
+ const IndexDescriptor* desc = entry->descriptor();
+
+ // Filter out any in-memory index in the collection that is not in our PIT view of the
+ // MDB catalog. This is only important when background:true because we are then reading
+ // from the checkpoint's view of the MDB catalog and data.
+ if (isBackground() &&
+ readyDurableIndexes.find(desc->indexName()) == readyDurableIndexes.end()) {
+ LOGV2(
+ 6868901,
+ "Skipping background validation on the index because the index is not yet in a "
+ "checkpoint.",
+ "desc_indexName"_attr = desc->indexName(),
+ "nss"_attr = _nss);
+ continue;
+ }
- const IndexCatalog* indexCatalog = _collection->getIndexCatalog();
- // The index iterator for ready indexes is timestamp-aware and will only return indexes that
- // are visible at our read time.
- const auto it = indexCatalog->getIndexIterator(opCtx, IndexCatalog::InclusionPolicy::kReady);
- while (it->more()) {
- const IndexCatalogEntry* entry = it->next();
- const IndexDescriptor* desc = entry->descriptor();
+ // Read the index's ident from disk (the checkpoint if background:true). If it does not
+ // match the in-memory ident saved in the IndexCatalogEntry, then our PIT view of the
+ // index is old and the index has been dropped and recreated. In this case we will skip
+ // it since there is no utility in checking a dropped index (we also cannot currently
+ // access it because its in-memory representation is gone).
+ auto diskIndexIdent =
+ opCtx->getServiceContext()->getStorageEngine()->getCatalog()->getIndexIdent(
+ opCtx, _collection->getCatalogId(), desc->indexName());
+ if (entry->getIdent() != diskIndexIdent) {
+ LOGV2(6868902,
+ "Skipping validation on the index because the index was recreated and is not "
+ "yet in a checkpoint.",
+ "desc_indexName"_attr = desc->indexName(),
+ "nss"_attr = _nss);
+ continue;
+ }
- auto iam = entry->accessMethod()->asSortedData();
- if (!iam)
- continue;
+ auto iam = entry->accessMethod()->asSortedData();
+ if (!iam)
+ continue;
+
+ _indexCursors.emplace(
+ desc->indexName(),
+ std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle));
+
+ // Skip any newly created indexes that, because they were built with a WT bulk loader,
+ // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of
+ // the data.
+ if (isBackground() &&
+ opCtx->getServiceContext()->getStorageEngine()->isInIndividuallyCheckpointedIndexes(
+ diskIndexIdent)) {
+ _indexCursors.erase(desc->indexName());
+ LOGV2(
+ 6868903,
+ "Skipping background validation on the index because the index data is not yet "
+ "consistent in the checkpoint.",
+ "desc_indexName"_attr = desc->indexName(),
+ "nss"_attr = _nss);
+ continue;
+ }
- _indexCursors.emplace(
- desc->indexName(),
- std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle));
+ _indexes.push_back(indexCatalog->getEntryShared(desc));
+ }
+ currCheckpointTimestamp =
+ isBackground() ? storageEngine->getLastStableRecoveryTimestamp() : checkpointTimestamp;
+ // We will retry if a checkpoint happens during opening the cursors or break out of the loop
+ // for foreground validation. Due to the limited number of indexes a collection can have, it
+ // is expected to have at most one retry.
+ } while (currCheckpointTimestamp != checkpointTimestamp);
- _indexes.push_back(indexCatalog->getEntryShared(desc));
+ if (rs != RecoveryUnit::ReadSource::kNoTimestamp) {
+ invariant(rs == RecoveryUnit::ReadSource::kCheckpoint);
+ invariant(isBackground());
+ _validateTs = checkpointTimestamp;
}
// Because SeekableRecordCursors don't have a method to reset to the start, we save and then