diff options
-rw-r--r-- | etc/backports_required_for_multiversion_tests.yml | 2 | ||||
-rw-r--r-- | jstests/replsets/dbcheck_write_concern.js | 181 | ||||
-rw-r--r-- | src/mongo/db/commands/dbcheck.cpp | 59 | ||||
-rw-r--r-- | src/mongo/db/repl/dbcheck.idl | 9 |
4 files changed, 229 insertions, 22 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 208d341c46d..9604bccbed2 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -191,6 +191,8 @@ all: test_file: jstests/replsets/dbcheck.js - ticket: SERVER-61955 test_file: jstests/auth/dbcheck.js + - ticket: SERVER-62212 + test_file: jstests/replsets/dbcheck_write_concern.js suites: diff --git a/jstests/replsets/dbcheck_write_concern.js b/jstests/replsets/dbcheck_write_concern.js new file mode 100644 index 00000000000..7495b508d0f --- /dev/null +++ b/jstests/replsets/dbcheck_write_concern.js @@ -0,0 +1,181 @@ +/** + * Test the behavior of per-batch writeConcern in dbCheck. + * + * @tags: [ + * # We need persistence as we temporarily restart nodes. + * requires_persistence, + * assumes_against_mongod_not_mongos, + * ] + */ +(function() { +"use strict"; + +const replSet = new ReplSetTest({ + name: "dbCheckWriteConcern", + nodes: 2, + nodeOptions: {setParameter: {dbCheckHealthLogEveryNBatches: 1}} +}); +replSet.startSet(); +replSet.initiate(); + +function forEachSecondary(f) { + for (let secondary of replSet.getSecondaries()) { + f(secondary); + } +} + +function forEachNode(f) { + f(replSet.getPrimary()); + forEachSecondary(f); +} + +// Clear local.system.healthlog. +function clearLog() { + forEachNode(conn => conn.getDB("local").system.healthlog.drop()); +} + +const dbName = "dbCheck-writeConcern"; +const collName = "test"; +const primary = replSet.getPrimary(); +const db = primary.getDB(dbName); +const coll = db[collName]; +const healthlog = db.getSiblingDB('local').system.healthlog; + +// Validate that w:majority behaves normally. +(function testWMajority() { + clearLog(); + coll.drop(); + + // Insert 1000 docs and run a few small batches to ensure we wait for write concern between + // each one. + const nDocs = 1000; + const maxDocsPerBatch = 100; + assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x})))); + assert.commandWorked(db.runCommand({ + dbCheck: coll.getName(), + maxDocsPerBatch: maxDocsPerBatch, + batchWriteConcern: {w: 'majority'}, + })); + + // Confirm dbCheck logs the expected number of batches. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck doesn't seem to complete", 60 * 1000); + + // Confirm there are no warnings or errors. + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount(), 0); + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0); +})(); + +// Validate that w:2 behaves normally. +(function testW2() { + clearLog(); + coll.drop(); + + // Insert 1000 docs and run a few small batches to ensure we wait for write concern between + // each one. + const nDocs = 1000; + const maxDocsPerBatch = 100; + assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x})))); + assert.commandWorked(db.runCommand({ + dbCheck: coll.getName(), + maxDocsPerBatch: maxDocsPerBatch, + batchWriteConcern: {w: 2}, + })); + + // Confirm dbCheck logs the expected number of batches. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck doesn't seem to complete", 60 * 1000); + + // Confirm there are no warnings or errors. + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount(), 0); + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0); +})(); + +// Validate that dbCheck completes with w:majority even when the secondary is down and a wtimeout is +// specified. +(function testWMajorityUnavailable() { + clearLog(); + coll.drop(); + + // Insert 1000 docs and run a few small batches to ensure we wait for write concern between + // each one. + const nDocs = 1000; + const maxDocsPerBatch = 100; + assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x})))); + replSet.awaitReplication(); + + // Stop the secondary and expect that the dbCheck batches still complete on the primary. + const secondaryConn = replSet.getSecondary(); + const secondaryNodeId = replSet.getNodeId(secondaryConn); + replSet.stop(secondaryNodeId, {forRestart: true /* preserve dbPath */}); + + assert.commandWorked(db.runCommand({ + dbCheck: coll.getName(), + maxDocsPerBatch: maxDocsPerBatch, + batchWriteConcern: {w: 'majority', wtimeout: 10}, + })); + + // Confirm dbCheck logs the expected number of batches. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck doesn't seem to complete", 60 * 1000); + + // Confirm dbCheck logs a warning for every batch. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck did not log writeConcern warnings", 60 * 1000); + // There should be no errors. + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0); + + replSet.start(secondaryNodeId, {}, true /*restart*/); + replSet.awaitNodesAgreeOnPrimary(); + replSet.awaitReplication(); +})(); + +// Validate that an invalid 'w' setting still allows dbCheck to succeed when presented with a +// wtimeout. +(function testW3Unavailable() { + clearLog(); + coll.drop(); + + // Insert 1000 docs and run a few small batches to ensure we wait for write concern between + // each one. + const nDocs = 1000; + const maxDocsPerBatch = 100; + assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x})))); + replSet.awaitReplication(); + + // Stop the secondary and expect that the dbCheck batches still complete on the primary. + const secondaryConn = replSet.getSecondary(); + const secondaryNodeId = replSet.getNodeId(secondaryConn); + replSet.stop(secondaryNodeId, {forRestart: true /* preserve dbPath */}); + + assert.commandWorked(db.runCommand({ + dbCheck: coll.getName(), + maxDocsPerBatch: maxDocsPerBatch, + batchWriteConcern: {w: 3, wtimeout: 10}, + })); + + // Confirm dbCheck logs the expected number of batches. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck doesn't seem to complete", 60 * 1000); + + // Confirm dbCheck logs a warning for every batch. + assert.soon(function() { + return (healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount() == + nDocs / maxDocsPerBatch); + }, "dbCheck did not log writeConcern warnings", 60 * 1000); + // There should be no errors. + assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0); +})(); + +replSet.stopSet(); +})(); diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp index f53d8b77ce7..70940c0e5ff 100644 --- a/src/mongo/db/commands/dbcheck.cpp +++ b/src/mongo/db/commands/dbcheck.cpp @@ -46,6 +46,7 @@ #include "mongo/db/repl/dbcheck.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/optime.h" +#include "mongo/db/write_concern_options.h" #include "mongo/util/background.h" #include "mongo/logv2/log.h" @@ -141,6 +142,7 @@ struct DbCheckCollectionInfo { int64_t maxBytesPerBatch; int64_t maxBatchTimeMillis; bool snapshotRead; + WriteConcernOptions writeConcern; }; /** @@ -179,7 +181,8 @@ std::unique_ptr<DbCheckRun> singleCollectionRun(OperationContext* opCtx, maxDocsPerBatch, maxBytesPerBatch, maxBatchTimeMillis, - invocation.getSnapshotRead()}; + invocation.getSnapshotRead(), + invocation.getBatchWriteConcern()}; auto result = std::make_unique<DbCheckRun>(); result->push_back(info); return result; @@ -213,7 +216,8 @@ std::unique_ptr<DbCheckRun> fullDatabaseRun(OperationContext* opCtx, maxDocsPerBatch, maxBytesPerBatch, maxBatchTimeMillis, - invocation.getSnapshotRead()}; + invocation.getSnapshotRead(), + invocation.getBatchWriteConcern()}; result->push_back(info); return true; }; @@ -394,32 +398,43 @@ private: OplogEntriesEnum::Batch, result.getStatus()); } - HealthLog::get(Client::getCurrent()->getServiceContext()).log(*entry); + HealthLog::get(opCtx).log(*entry); if (retryable) { continue; } return; - } else { - _batchesProcessed++; - auto stats = result.getValue(); - auto entry = dbCheckBatchEntry(info.nss, - stats.nDocs, - stats.nBytes, - stats.md5, - stats.md5, - start, - stats.lastKey, - stats.readTimestamp, - stats.time); - if (kDebugBuild || entry->getSeverity() != SeverityEnum::Info || - (_batchesProcessed % gDbCheckHealthLogEveryNBatches.load() == 0)) { - // On debug builds, health-log every batch result; on release builds, health-log - // every N batches. - HealthLog::get(Client::getCurrent()->getServiceContext()).log(*entry); - } } - auto stats = result.getValue(); + + _batchesProcessed++; + + const auto stats = result.getValue(); + auto entry = dbCheckBatchEntry(info.nss, + stats.nDocs, + stats.nBytes, + stats.md5, + stats.md5, + start, + stats.lastKey, + stats.readTimestamp, + stats.time); + if (kDebugBuild || entry->getSeverity() != SeverityEnum::Info || + (_batchesProcessed % gDbCheckHealthLogEveryNBatches.load() == 0)) { + // On debug builds, health-log every batch result; on release builds, health-log + // every N batches. + HealthLog::get(opCtx).log(*entry); + } + + WriteConcernResult unused; + auto status = waitForWriteConcern(opCtx, stats.time, info.writeConcern, &unused); + if (!status.isOK()) { + auto entry = dbCheckWarningHealthLogEntry(info.nss, + "dbCheck failed waiting for writeConcern", + OplogEntriesEnum::Batch, + status); + HealthLog::get(opCtx).log(*entry); + } + start = stats.lastKey; // Update our running totals. diff --git a/src/mongo/db/repl/dbcheck.idl b/src/mongo/db/repl/dbcheck.idl index 48e71f32c31..8246c46e70d 100644 --- a/src/mongo/db/repl/dbcheck.idl +++ b/src/mongo/db/repl/dbcheck.idl @@ -35,6 +35,7 @@ global: imports: - "mongo/idl/basic_types.idl" + - "mongo/db/write_concern_options.idl" server_parameters: dbCheckCollectionTryLockTimeoutMillis: @@ -144,6 +145,10 @@ structs: snapshotRead: type: safeBool default: true + batchWriteConcern: + description: Wait for this writeConcern at the end of every batch. Default is w:1 with no timeout. + type: WriteConcern + default: WriteConcernOptions() DbCheckAllInvocation: description: "Command object for database-wide form of dbCheck invocation" @@ -175,6 +180,10 @@ structs: snapshotRead: type: safeBool default: true + batchWriteConcern: + description: Wait for this writeConcern at the end of every batch. Default is w:1 with no timeout. + type: WriteConcern + default: WriteConcernOptions() DbCheckOplogBatch: description: "Oplog entry for a dbCheck batch" |