From 0e0bdb41cf80cfa3e151b3aba3a5ebb77714c06e Mon Sep 17 00:00:00 2001 From: Louis Williams Date: Wed, 22 Dec 2021 19:08:59 +0000 Subject: SERVER-62212 Support writeConcern for dbCheck batches The dbCheck command accepts a `batchWriteConcern` option in the form of a writeConcern document. The default behavior is w:1 with no wtimeout. --- jstests/replsets/dbcheck_write_concern.js | 181 ++++++++++++++++++++++++++++++ src/mongo/db/commands/dbcheck.cpp | 59 ++++++---- src/mongo/db/repl/dbcheck.idl | 9 ++ 3 files changed, 226 insertions(+), 23 deletions(-) create mode 100644 jstests/replsets/dbcheck_write_concern.js diff --git a/jstests/replsets/dbcheck_write_concern.js b/jstests/replsets/dbcheck_write_concern.js new file mode 100644 index 00000000000..42d22452254 --- /dev/null +++ b/jstests/replsets/dbcheck_write_concern.js @@ -0,0 +1,181 @@ +/** + * Test the behavior of per-batch writeConcern in dbCheck. + * + * @tags: [ + * # We need persistence as we temporarily restart nodes. + * requires_persistence, + * assumes_against_mongod_not_mongos, + * ] + */ +(function() { +"use strict"; + +const replSet = new ReplSetTest({ + name: "dbCheckWriteConcern", + nodes: 2, + nodeOptions: {setParameter: {dbCheckHealthLogEveryNBatches: 1}} +}); +replSet.startSet(); +replSet.initiate(); + +function forEachSecondary(f) { + for (let secondary of replSet.getSecondaries()) { + f(secondary); + } +} + +function forEachNode(f) { + f(replSet.getPrimary()); + forEachSecondary(f); +} + +// Clear local.system.healthlog. +function clearLog() { + forEachNode(conn => conn.getDB("local").system.healthlog.drop()); +} + +const dbName = "dbCheck-writeConcern"; +const collName = "test"; +const primary = replSet.getPrimary(); +const db = primary.getDB(dbName); +const coll = db[collName]; +const healthlog = db.getSiblingDB('local').system.healthlog; + +// Validate that w:majority behaves normally. +(function testWMajority() { + clearLog(); + coll.drop(); + + // Insert 1000 docs and run a few small batches to ensure we wait for write concern between + // each one. + const nDocs = 1000; + const maxDocsPerBatch = 100; + assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x})))); + assert.commandWorked(db.runCommand({ + dbCheck: coll.getName(), + maxDocsPerBatch: maxDocsPerBatch, + batchWriteConcern: {w: 'majority'}, + })); + + // Confirm dbCheck logs the expected number of batches. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck doesn't seem to complete", 60 * 1000); + + // Confirm there are no warnings or errors. + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount(), 0); + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0); +})(); + +// Validate that w:2 behaves normally. +(function testW2() { + clearLog(); + coll.drop(); + + // Insert 1000 docs and run a few small batches to ensure we wait for write concern between + // each one. + const nDocs = 1000; + const maxDocsPerBatch = 100; + assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x})))); + assert.commandWorked(db.runCommand({ + dbCheck: coll.getName(), + maxDocsPerBatch: maxDocsPerBatch, + batchWriteConcern: {w: 2}, + })); + + // Confirm dbCheck logs the expected number of batches. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck doesn't seem to complete", 60 * 1000); + + // Confirm there are no warnings or errors. + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount(), 0); + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0); +})(); + +// Validate that dbCheck completes with w:majority even when the secondary is down and a wtimeout is +// specified. +(function testWMajorityUnavailable() { + clearLog(); + coll.drop(); + + // Insert 1000 docs and run a few small batches to ensure we wait for write concern between + // each one. + const nDocs = 1000; + const maxDocsPerBatch = 100; + assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x})))); + replSet.awaitReplication(); + + // Stop the secondary and expect that the dbCheck batches still complete on the primary. + const secondaryConn = replSet.getSecondary(); + const secondaryNodeId = replSet.getNodeId(secondaryConn); + replSet.stop(secondaryNodeId, {forRestart: true /* preserve dbPath */}); + + assert.commandWorked(db.runCommand({ + dbCheck: coll.getName(), + maxDocsPerBatch: maxDocsPerBatch, + batchWriteConcern: {w: 'majority', wtimeout: 10}, + })); + + // Confirm dbCheck logs the expected number of batches. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck doesn't seem to complete", 60 * 1000); + + // Confirm dbCheck logs a warning for every batch. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck did not log writeConcern warnings", 60 * 1000); + // There should be no errors. + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0); + + replSet.start(secondaryNodeId, {}, true /*restart*/); + replSet.awaitNodesAgreeOnPrimaryNoAuth(); + replSet.awaitReplication(); +})(); + +// Validate that an invalid 'w' setting still allows dbCheck to succeed when presented with a +// wtimeout. +(function testW3Unavailable() { + clearLog(); + coll.drop(); + + // Insert 1000 docs and run a few small batches to ensure we wait for write concern between + // each one. + const nDocs = 1000; + const maxDocsPerBatch = 100; + assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x})))); + replSet.awaitReplication(); + + // Stop the secondary and expect that the dbCheck batches still complete on the primary. + const secondaryConn = replSet.getSecondary(); + const secondaryNodeId = replSet.getNodeId(secondaryConn); + replSet.stop(secondaryNodeId, {forRestart: true /* preserve dbPath */}); + + assert.commandWorked(db.runCommand({ + dbCheck: coll.getName(), + maxDocsPerBatch: maxDocsPerBatch, + batchWriteConcern: {w: 3, wtimeout: 10}, + })); + + // Confirm dbCheck logs the expected number of batches. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck doesn't seem to complete", 60 * 1000); + + // Confirm dbCheck logs a warning for every batch. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck did not log writeConcern warnings", 60 * 1000); + // There should be no errors. + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0); +})(); + +replSet.stopSet(); +})(); diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp index 145c792b808..a1e44d16ff4 100644 --- a/src/mongo/db/commands/dbcheck.cpp +++ b/src/mongo/db/commands/dbcheck.cpp @@ -46,6 +46,7 @@ #include "mongo/db/repl/dbcheck.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/optime.h" +#include "mongo/db/write_concern_options.h" #include "mongo/idl/command_generic_argument.h" #include "mongo/util/background.h" @@ -142,6 +143,7 @@ struct DbCheckCollectionInfo { int64_t maxBytesPerBatch; int64_t maxBatchTimeMillis; bool snapshotRead; + WriteConcernOptions writeConcern; }; /** @@ -180,7 +182,8 @@ std::unique_ptr singleCollectionRun(OperationContext* opCtx, maxDocsPerBatch, maxBytesPerBatch, maxBatchTimeMillis, - invocation.getSnapshotRead()}; + invocation.getSnapshotRead(), + invocation.getBatchWriteConcern()}; auto result = std::make_unique(); result->push_back(info); return result; @@ -214,7 +217,8 @@ std::unique_ptr fullDatabaseRun(OperationContext* opCtx, maxDocsPerBatch, maxBytesPerBatch, maxBatchTimeMillis, - invocation.getSnapshotRead()}; + invocation.getSnapshotRead(), + invocation.getBatchWriteConcern()}; result->push_back(info); return true; }; @@ -351,10 +355,10 @@ private: return; } - std::unique_ptr entry; if (!result.isOK()) { bool retryable = false; + std::unique_ptr entry; const auto code = result.getStatus().code(); if (code == ErrorCodes::LockTimeout) { @@ -395,32 +399,41 @@ private: OplogEntriesEnum::Batch, result.getStatus()); } - HealthLog::get(Client::getCurrent()->getServiceContext()).log(*entry); + HealthLog::get(opCtx).log(*entry); if (retryable) { continue; } return; - } else { - _batchesProcessed++; - auto stats = result.getValue(); - entry = dbCheckBatchEntry(info.nss, - stats.nDocs, - stats.nBytes, - stats.md5, - stats.md5, - start, - stats.lastKey, - stats.readTimestamp, - stats.time); - if (kDebugBuild || entry->getSeverity() != SeverityEnum::Info || - (_batchesProcessed % gDbCheckHealthLogEveryNBatches.load() == 0)) { - // On debug builds, health-log every batch result; on release builds, health-log - // every N batches. - HealthLog::get(Client::getCurrent()->getServiceContext()).log(*entry); - } } - auto stats = result.getValue(); + const auto stats = result.getValue(); + + _batchesProcessed++; + auto entry = dbCheckBatchEntry(info.nss, + stats.nDocs, + stats.nBytes, + stats.md5, + stats.md5, + start, + stats.lastKey, + stats.readTimestamp, + stats.time); + if (kDebugBuild || entry->getSeverity() != SeverityEnum::Info || + (_batchesProcessed % gDbCheckHealthLogEveryNBatches.load() == 0)) { + // On debug builds, health-log every batch result; on release builds, health-log + // every N batches. + HealthLog::get(opCtx).log(*entry); + } + + WriteConcernResult unused; + auto status = waitForWriteConcern(opCtx, stats.time, info.writeConcern, &unused); + if (!status.isOK()) { + auto entry = dbCheckWarningHealthLogEntry(info.nss, + "dbCheck failed waiting for writeConcern", + OplogEntriesEnum::Batch, + status); + HealthLog::get(opCtx).log(*entry); + } start = stats.lastKey; diff --git a/src/mongo/db/repl/dbcheck.idl b/src/mongo/db/repl/dbcheck.idl index 48e71f32c31..8246c46e70d 100644 --- a/src/mongo/db/repl/dbcheck.idl +++ b/src/mongo/db/repl/dbcheck.idl @@ -35,6 +35,7 @@ global: imports: - "mongo/idl/basic_types.idl" + - "mongo/db/write_concern_options.idl" server_parameters: dbCheckCollectionTryLockTimeoutMillis: @@ -144,6 +145,10 @@ structs: snapshotRead: type: safeBool default: true + batchWriteConcern: + description: Wait for this writeConcern at the end of every batch. Default is w:1 with no timeout. + type: WriteConcern + default: WriteConcernOptions() DbCheckAllInvocation: description: "Command object for database-wide form of dbCheck invocation" @@ -175,6 +180,10 @@ structs: snapshotRead: type: safeBool default: true + batchWriteConcern: + description: Wait for this writeConcern at the end of every batch. Default is w:1 with no timeout. + type: WriteConcern + default: WriteConcernOptions() DbCheckOplogBatch: description: "Oplog entry for a dbCheck batch" -- cgit v1.2.1