From ba467f46cc1bc49965e1d72b541eff0cf1d7b22e Mon Sep 17 00:00:00 2001 From: Louis Williams Date: Thu, 3 Nov 2022 14:56:46 +0000 Subject: SERVER-67695 Remove write-blocking dbCheck This removes the snapshotRead:false option to dbCheck which blocked concurrent writes --- .../dbCheck_snapshotRead.js | 83 ++++++++++ jstests/replsets/dbcheck.js | 43 +---- src/mongo/db/commands/dbcheck.cpp | 177 ++++++++------------- src/mongo/db/repl/dbcheck.cpp | 24 ++- src/mongo/db/repl/dbcheck.idl | 36 ----- 5 files changed, 172 insertions(+), 191 deletions(-) create mode 100644 jstests/multiVersion/targetedTestsLastLtsFeatures/dbCheck_snapshotRead.js diff --git a/jstests/multiVersion/targetedTestsLastLtsFeatures/dbCheck_snapshotRead.js b/jstests/multiVersion/targetedTestsLastLtsFeatures/dbCheck_snapshotRead.js new file mode 100644 index 00000000000..729289bd9a5 --- /dev/null +++ b/jstests/multiVersion/targetedTestsLastLtsFeatures/dbCheck_snapshotRead.js @@ -0,0 +1,83 @@ +/** + * Ensure that a 6.0 version replicating a dbCheck oplog entry with the removed snapshotRead:false + * option does not crash when a 'latest' version receives the entry. + * + * @tags: [ + * requires_replication, + * ] + */ +(function() { +"use strict"; + +load('jstests/multiVersion/libs/multi_rs.js'); + +const nodes = { + // We want the 6.0 node to be the primary. + n1: {binVersion: "6.0", rsConfig: {priority: 1}}, + n2: {binVersion: "latest", rsConfig: {priority: 0}}, +}; + +const rst = new ReplSetTest({nodes: nodes}); +rst.startSet(); +rst.initiate(); + +const dbName = "test"; +const collName = jsTestName(); + +const primary = rst.getPrimary(); +const primaryDB = primary.getDB(dbName); +const coll = primaryDB[collName]; + +assert.commandWorked(coll.insert({a: 1})); + +// The 6.0 node will replicate the dbCheck oplog entry with the 'snapshotRead:false' option. This is +// not supported in recent versions and should be ignored, but not cause the node to crash. +assert.commandWorked(primaryDB.runCommand({"dbCheck": 1, snapshotRead: false})); + +rst.awaitReplication(); + +function dbCheckCompleted(db) { + return db.currentOp().inprog.filter(x => x["desc"] == "dbCheck")[0] === undefined; +} + +function forEachNode(f) { + f(rst.getPrimary()); + f(rst.getSecondary()); +} + +function awaitDbCheckCompletion(db) { + assert.soon(() => dbCheckCompleted(db), "dbCheck timed out"); + rst.awaitSecondaryNodes(); + rst.awaitReplication(); + + forEachNode(function(node) { + const healthlog = node.getDB('local').system.healthlog; + assert.soon(function() { + return (healthlog.find({"operation": "dbCheckStop"}).itcount() == 1); + }, "dbCheck command didn't complete"); + }); +} + +awaitDbCheckCompletion(primaryDB); + +{ + // The 6.0 primary should not report any errors. + const healthlog = primary.getDB('local').system.healthlog; + assert.eq(0, healthlog.find({severity: "error"}).itcount()); + assert.eq(0, healthlog.find({severity: "warning"}).itcount()); +} + +{ + // The latest secondary should log an error in the health log. + const secondary = rst.getSecondary(); + const healthlog = secondary.getDB('local').system.healthlog; + assert.eq(1, healthlog.find({severity: "error"}).itcount()); + assert.eq(0, healthlog.find({severity: "warning"}).itcount()); + const errorEntry = healthlog.findOne({severity: "error"}); + assert(errorEntry.hasOwnProperty('data'), tojson(errorEntry)); + assert.eq(false, errorEntry.data.success, tojson(errorEntry)); + assert(errorEntry.data.error.startsWith("Location6769502"), tojson(errorEntry)); +} + +rst.stopSet(); +})(); diff --git a/jstests/replsets/dbcheck.js b/jstests/replsets/dbcheck.js index e3cab92b03e..710d986d535 100644 --- a/jstests/replsets/dbcheck.js +++ b/jstests/replsets/dbcheck.js @@ -40,10 +40,6 @@ function clearLog() { forEachNode(conn => conn.getDB("local").system.healthlog.drop()); } -function addEnoughForMultipleBatches(collection) { - collection.insertMany([...Array(10000).keys()].map(x => ({_id: x})), {ordered: false}); -} - // Name for a collection which takes multiple batches to check and which shouldn't be modified // by any of the tests. const multiBatchSimpleCollName = "dbcheck-simple-collection"; @@ -55,29 +51,6 @@ function dbCheckCompleted(db) { return db.currentOp().inprog.filter(x => x["desc"] == "dbCheck")[0] === undefined; } -// Wait for DeferredWriter writes to local.system.healthlog to eventually complete. -// Requires clearLog() before the test case is run. -// TODO SERVER-61765 remove this function altoghether when healthlogging becomes -// synchronous. -function dbCheckHealthLogCompleted(db, coll, maxKey, maxSize, maxCount) { - let query = {"namespace": coll.getFullName(), "operation": "dbCheckBatch"}; - if (maxSize === undefined && maxCount === undefined && maxKey === undefined) { - query['data.maxKey'] = {"$type": "maxKey"}; - } - if (maxCount !== undefined) { - query['data.count'] = maxCount; - } else { - if (maxSize !== undefined) { - query['data.bytes'] = maxSize; - } else { - if (maxKey !== undefined) { - query['data.maxKey'] = maxKey; - } - } - } - return db.getSiblingDB("local").system.healthlog.find(query).itcount() === 1; -} - // Wait for dbCheck to complete (on both primaries and secondaries). Fails an assertion if // dbCheck takes longer than maxMs. function awaitDbCheckCompletion(db, collName, maxKey, maxSize, maxCount) { @@ -232,17 +205,11 @@ function simpleTestNonSnapshot() { assert.neq(primary, undefined); let db = primary.getDB(dbName); - assert.commandWorked(db.runCommand({"dbCheck": multiBatchSimpleCollName, snapshotRead: false})); - - awaitDbCheckCompletion(db, multiBatchSimpleCollName); - - checkLogAllConsistent(primary); - checkTotalCounts(primary, db[multiBatchSimpleCollName]); - - forEachSecondary(function(secondary) { - checkLogAllConsistent(secondary); - checkTotalCounts(secondary, secondary.getDB(dbName)[multiBatchSimpleCollName]); - }); + // "dbCheck no longer supports snapshotRead:false" + assert.commandFailedWithCode( + db.runCommand({"dbCheck": multiBatchSimpleCollName, snapshotRead: false}), 6769500); + // "dbCheck no longer supports snapshotRead:false" + assert.commandFailedWithCode(db.runCommand({"dbCheck": 1, snapshotRead: false}), 6769501); } // Same thing, but now with concurrent updates. diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp index 166ee91e097..f8aafcae484 100644 --- a/src/mongo/db/commands/dbcheck.cpp +++ b/src/mongo/db/commands/dbcheck.cpp @@ -143,7 +143,6 @@ struct DbCheckCollectionInfo { int64_t maxDocsPerBatch; int64_t maxBytesPerBatch; int64_t maxBatchTimeMillis; - bool snapshotRead; WriteConcernOptions writeConcern; }; @@ -166,6 +165,8 @@ std::unique_ptr singleCollectionRun(OperationContext* opCtx, "Cannot run dbCheck on " + nss.toString() + " because it is not replicated", nss.isReplicated()); + uassert(6769500, "dbCheck no longer supports snapshotRead:false", invocation.getSnapshotRead()); + const auto start = invocation.getMinKey(); const auto end = invocation.getMaxKey(); const auto maxCount = invocation.getMaxCount(); @@ -183,7 +184,6 @@ std::unique_ptr singleCollectionRun(OperationContext* opCtx, maxDocsPerBatch, maxBytesPerBatch, maxBatchTimeMillis, - invocation.getSnapshotRead(), invocation.getBatchWriteConcern()}; auto result = std::make_unique(); result->push_back(info); @@ -200,6 +200,8 @@ std::unique_ptr fullDatabaseRun(OperationContext* opCtx, AutoGetDb agd(opCtx, dbName, MODE_IS); uassert(ErrorCodes::NamespaceNotFound, "Database " + dbName.db() + " not found", agd.getDb()); + uassert(6769501, "dbCheck no longer supports snapshotRead:false", invocation.getSnapshotRead()); + const int64_t max = std::numeric_limits::max(); const auto rate = invocation.getMaxCountPerSecond(); const auto maxDocsPerBatch = invocation.getMaxDocsPerBatch(); @@ -219,7 +221,6 @@ std::unique_ptr fullDatabaseRun(OperationContext* opCtx, maxDocsPerBatch, maxBytesPerBatch, maxBatchTimeMillis, - invocation.getSnapshotRead(), invocation.getBatchWriteConcern()}; result->push_back(info); return true; @@ -492,122 +493,77 @@ private: const BSONKey& first, int64_t batchDocs, int64_t batchBytes) { - auto lockMode = MODE_S; - if (info.snapshotRead) { - // Each batch will read at the latest no-overlap point, which is the all_durable - // timestamp on primaries. We assume that the history window on secondaries is always - // longer than the time it takes between starting and replicating a batch on the - // primary. Otherwise, the readTimestamp will not be available on a secondary by the - // time it processes the oplog entry. - lockMode = MODE_IS; - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); + // Each batch will read at the latest no-overlap point, which is the all_durable timestamp + // on primaries. We assume that the history window on secondaries is always longer than the + // time it takes between starting and replicating a batch on the primary. Otherwise, the + // readTimestamp will not be available on a secondary by the time it processes the oplog + // entry. + opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); + + // dbCheck writes to the oplog, so we need to take an IX lock. We don't need to write to the + // collection, however, so we only take an intent lock on it. + Lock::GlobalLock glob(opCtx, MODE_IX); + AutoGetCollection collection(opCtx, info.nss, MODE_IS); + + if (_stepdownHasOccurred(opCtx, info.nss)) { + _done = true; + return Status(ErrorCodes::PrimarySteppedDown, "dbCheck terminated due to stepdown"); } - BatchStats result; - auto timeoutMs = Milliseconds(gDbCheckCollectionTryLockTimeoutMillis.load()); - const auto initialBackoffMs = - Milliseconds(gDbCheckCollectionTryLockMinBackoffMillis.load()); - auto backoffMs = initialBackoffMs; - for (int attempt = 1;; attempt++) { - try { - // Try to acquire collection lock with increasing timeout and bounded exponential - // backoff. - auto const lockDeadline = Date_t::now() + timeoutMs; - timeoutMs *= 2; - - AutoGetCollection agc( - opCtx, info.nss, lockMode, AutoGetCollection::Options{}.deadline(lockDeadline)); - - if (_stepdownHasOccurred(opCtx, info.nss)) { - _done = true; - return Status(ErrorCodes::PrimarySteppedDown, - "dbCheck terminated due to stepdown"); - } + if (!collection) { + const auto msg = "Collection under dbCheck no longer exists"; + return {ErrorCodes::NamespaceNotFound, msg}; + } - const auto& collection = - CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, info.nss); - if (!collection) { - const auto msg = "Collection under dbCheck no longer exists"; - return {ErrorCodes::NamespaceNotFound, msg}; - } + auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx); + uassert(ErrorCodes::SnapshotUnavailable, + "No snapshot available yet for dbCheck", + readTimestamp); + auto minVisible = collection->getMinimumVisibleSnapshot(); + if (minVisible && *readTimestamp < *collection->getMinimumVisibleSnapshot()) { + return {ErrorCodes::SnapshotUnavailable, + str::stream() << "Unable to read from collection " << info.nss + << " due to pending catalog changes"}; + } - auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx); - auto minVisible = collection->getMinimumVisibleSnapshot(); - if (readTimestamp && minVisible && - *readTimestamp < *collection->getMinimumVisibleSnapshot()) { - return {ErrorCodes::SnapshotUnavailable, - str::stream() << "Unable to read from collection " << info.nss - << " due to pending catalog changes"}; - } + boost::optional hasher; + try { + hasher.emplace(opCtx, + *collection, + first, + info.end, + std::min(batchDocs, info.maxCount), + std::min(batchBytes, info.maxSize)); + } catch (const DBException& e) { + return e.toStatus(); + } - boost::optional hasher; - try { - hasher.emplace(opCtx, - collection, - first, - info.end, - std::min(batchDocs, info.maxCount), - std::min(batchBytes, info.maxSize)); - } catch (const DBException& e) { - return e.toStatus(); - } + const auto batchDeadline = Date_t::now() + Milliseconds(info.maxBatchTimeMillis); + Status status = hasher->hashAll(opCtx, batchDeadline); - const auto batchDeadline = Date_t::now() + Milliseconds(info.maxBatchTimeMillis); - Status status = hasher->hashAll(opCtx, batchDeadline); + if (!status.isOK()) { + return status; + } - if (!status.isOK()) { - return status; - } + std::string md5 = hasher->total(); - std::string md5 = hasher->total(); - - DbCheckOplogBatch batch; - batch.setType(OplogEntriesEnum::Batch); - batch.setNss(info.nss); - batch.setMd5(md5); - batch.setMinKey(first); - batch.setMaxKey(BSONKey(hasher->lastKey())); - batch.setReadTimestamp(readTimestamp); - - // Send information on this batch over the oplog. - result.time = _logOp(opCtx, info.nss, collection->uuid(), batch.toBSON()); - result.readTimestamp = readTimestamp; - - result.nDocs = hasher->docsSeen(); - result.nBytes = hasher->bytesSeen(); - result.lastKey = hasher->lastKey(); - result.md5 = md5; - - break; - } catch (const ExceptionFor& e) { - if (attempt > gDbCheckCollectionTryLockMaxAttempts.load()) { - return StatusWith(e.code(), - "Unable to acquire the collection lock"); - } + DbCheckOplogBatch batch; + batch.setType(OplogEntriesEnum::Batch); + batch.setNss(info.nss); + batch.setMd5(md5); + batch.setMinKey(first); + batch.setMaxKey(BSONKey(hasher->lastKey())); + batch.setReadTimestamp(readTimestamp); - // Bounded exponential backoff between tryLocks. - opCtx->sleepFor(backoffMs); - const auto maxBackoffMillis = - Milliseconds(gDbCheckCollectionTryLockMaxBackoffMillis.load()); - if (backoffMs < maxBackoffMillis) { - auto backoff = durationCount(backoffMs); - auto initialBackoff = durationCount(initialBackoffMs); - backoff *= initialBackoff; - backoffMs = Milliseconds(backoff); - } - if (backoffMs > maxBackoffMillis) { - backoffMs = maxBackoffMillis; - } - LOGV2_DEBUG(6175700, - 1, - "Could not acquire collection lock, retrying", - "ns"_attr = info.nss.ns(), - "batchRangeMin"_attr = info.start.obj(), - "batchRangeMax"_attr = info.end.obj(), - "attempt"_attr = attempt, - "backoff"_attr = backoffMs); - } - } + // Send information on this batch over the oplog. + BatchStats result; + result.time = _logOp(opCtx, info.nss, collection->uuid(), batch.toBSON()); + result.readTimestamp = readTimestamp; + + result.nDocs = hasher->docsSeen(); + result.nBytes = hasher->bytesSeen(); + result.lastKey = hasher->lastKey(); + result.md5 = md5; return result; } @@ -669,7 +625,6 @@ public: " maxDocsPerBatch: \n" " maxBytesPerBatch: \n" " maxBatchTimeMillis: \n" - " readTimestamp: }\n" "to check a collection.\n" "Invoke with {dbCheck: 1} to check all collections in the database."; } diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp index 3062ec42733..0b0be849a5d 100644 --- a/src/mongo/db/repl/dbcheck.cpp +++ b/src/mongo/db/repl/dbcheck.cpp @@ -364,14 +364,26 @@ Status dbCheckBatchOnSecondary(OperationContext* opCtx, // Set up the hasher, boost::optional hasher; try { - auto lockMode = MODE_S; - if (entry.getReadTimestamp()) { - lockMode = MODE_IS; - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, - entry.getReadTimestamp()); + // We may not have a read timestamp if the dbCheck command was run on an older version of + // the server with snapshotRead:false. Since we don't implement this feature, we'll log an + // error about skipping the batch to ensure an operator notices. + if (!entry.getReadTimestamp().has_value()) { + auto logEntry = + dbCheckErrorHealthLogEntry(entry.getNss(), + "dbCheck failed", + OplogEntriesEnum::Batch, + Status{ErrorCodes::Error(6769502), + "no readTimestamp in oplog entry. Ensure dbCheck " + "command is not using snapshotRead:false"}, + entry.toBSON()); + HealthLog::get(opCtx).log(*logEntry); + return Status::OK(); } - AutoGetCollection coll(opCtx, entry.getNss(), lockMode); + opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, + entry.getReadTimestamp()); + + AutoGetCollection coll(opCtx, entry.getNss(), MODE_IS); const auto& collection = coll.getCollection(); if (!collection) { diff --git a/src/mongo/db/repl/dbcheck.idl b/src/mongo/db/repl/dbcheck.idl index 9d3b20b68f5..5ded9ce6daf 100644 --- a/src/mongo/db/repl/dbcheck.idl +++ b/src/mongo/db/repl/dbcheck.idl @@ -38,42 +38,6 @@ imports: - "mongo/db/write_concern_options.idl" server_parameters: - dbCheckCollectionTryLockTimeoutMillis: - description: 'Timeout to acquire the collection for processing a dbCheck batch. Each subsequent attempt doubles the timeout' - set_at: [ startup, runtime ] - cpp_vartype: 'AtomicWord' - cpp_varname: gDbCheckCollectionTryLockTimeoutMillis - default: 10 - validator: - gte: 1 - lte: 10000 - dbCheckCollectionTryLockMaxAttempts: - description: 'Maximum number of attempts with backoff to acquire the collection lock for processing a dbCheck batch' - set_at: [ startup, runtime ] - cpp_vartype: 'AtomicWord' - cpp_varname: gDbCheckCollectionTryLockMaxAttempts - default: 5 - validator: - gte: 1 - lte: 20 - dbCheckCollectionTryLockMinBackoffMillis: - description: 'Initial backoff on failure to acquire the collection lock for processing a dbCheck batch. Grows exponentially' - set_at: [ startup, runtime ] - cpp_vartype: 'AtomicWord' - cpp_varname: gDbCheckCollectionTryLockMinBackoffMillis - default: 10 - validator: - gte: 2 - lte: 60000 - dbCheckCollectionTryLockMaxBackoffMillis: - description: 'Maximum exponential backoff on failure to acquire the collection lock for processing a dbCheck batch.' - set_at: [ startup, runtime ] - cpp_vartype: 'AtomicWord' - cpp_varname: gDbCheckCollectionTryLockMaxBackoffMillis - default: 60000 - validator: - gte: 20 - lte: 120000 dbCheckHealthLogEveryNBatches: description: 'Emit an info-severity health log batch every N batches processed' set_at: [ startup, runtime ] -- cgit v1.2.1