summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2021-12-22 19:08:59 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-22 19:38:30 +0000
commit0e0bdb41cf80cfa3e151b3aba3a5ebb77714c06e (patch)
treec7c6d15cc635fd873252ecc7d6a991787a372c57
parentf114b2857bac3830436df4c319fd2cb562a1ff1b (diff)
downloadmongo-0e0bdb41cf80cfa3e151b3aba3a5ebb77714c06e.tar.gz
SERVER-62212 Support writeConcern for dbCheck batches
The dbCheck command accepts a `batchWriteConcern` option in the form of a writeConcern document. The default behavior is w:1 with no wtimeout.
-rw-r--r--jstests/replsets/dbcheck_write_concern.js181
-rw-r--r--src/mongo/db/commands/dbcheck.cpp59
-rw-r--r--src/mongo/db/repl/dbcheck.idl9
3 files changed, 226 insertions, 23 deletions
diff --git a/jstests/replsets/dbcheck_write_concern.js b/jstests/replsets/dbcheck_write_concern.js
new file mode 100644
index 00000000000..42d22452254
--- /dev/null
+++ b/jstests/replsets/dbcheck_write_concern.js
@@ -0,0 +1,181 @@
+/**
+ * Test the behavior of per-batch writeConcern in dbCheck.
+ *
+ * @tags: [
+ * # We need persistence as we temporarily restart nodes.
+ * requires_persistence,
+ * assumes_against_mongod_not_mongos,
+ * ]
+ */
+(function() {
+"use strict";
+
+const replSet = new ReplSetTest({
+ name: "dbCheckWriteConcern",
+ nodes: 2,
+ nodeOptions: {setParameter: {dbCheckHealthLogEveryNBatches: 1}}
+});
+replSet.startSet();
+replSet.initiate();
+
+function forEachSecondary(f) {
+ for (let secondary of replSet.getSecondaries()) {
+ f(secondary);
+ }
+}
+
+function forEachNode(f) {
+ f(replSet.getPrimary());
+ forEachSecondary(f);
+}
+
+// Clear local.system.healthlog.
+function clearLog() {
+ forEachNode(conn => conn.getDB("local").system.healthlog.drop());
+}
+
+const dbName = "dbCheck-writeConcern";
+const collName = "test";
+const primary = replSet.getPrimary();
+const db = primary.getDB(dbName);
+const coll = db[collName];
+const healthlog = db.getSiblingDB('local').system.healthlog;
+
+// Validate that w:majority behaves normally.
+(function testWMajority() {
+ clearLog();
+ coll.drop();
+
+ // Insert 1000 docs and run a few small batches to ensure we wait for write concern between
+ // each one.
+ const nDocs = 1000;
+ const maxDocsPerBatch = 100;
+ assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x}))));
+ assert.commandWorked(db.runCommand({
+ dbCheck: coll.getName(),
+ maxDocsPerBatch: maxDocsPerBatch,
+ batchWriteConcern: {w: 'majority'},
+ }));
+
+ // Confirm dbCheck logs the expected number of batches.
+ assert.soon(function() {
+ return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() ==
+ nDocs / maxDocsPerBatch);
+ }, "dbCheck doesn't seem to complete", 60 * 1000);
+
+ // Confirm there are no warnings or errors.
+ assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount(), 0);
+ assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0);
+})();
+
+// Validate that w:2 behaves normally.
+(function testW2() {
+ clearLog();
+ coll.drop();
+
+ // Insert 1000 docs and run a few small batches to ensure we wait for write concern between
+ // each one.
+ const nDocs = 1000;
+ const maxDocsPerBatch = 100;
+ assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x}))));
+ assert.commandWorked(db.runCommand({
+ dbCheck: coll.getName(),
+ maxDocsPerBatch: maxDocsPerBatch,
+ batchWriteConcern: {w: 2},
+ }));
+
+ // Confirm dbCheck logs the expected number of batches.
+ assert.soon(function() {
+ return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() ==
+ nDocs / maxDocsPerBatch);
+ }, "dbCheck doesn't seem to complete", 60 * 1000);
+
+ // Confirm there are no warnings or errors.
+ assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount(), 0);
+ assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0);
+})();
+
+// Validate that dbCheck completes with w:majority even when the secondary is down and a wtimeout is
+// specified.
+(function testWMajorityUnavailable() {
+ clearLog();
+ coll.drop();
+
+ // Insert 1000 docs and run a few small batches to ensure we wait for write concern between
+ // each one.
+ const nDocs = 1000;
+ const maxDocsPerBatch = 100;
+ assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x}))));
+ replSet.awaitReplication();
+
+ // Stop the secondary and expect that the dbCheck batches still complete on the primary.
+ const secondaryConn = replSet.getSecondary();
+ const secondaryNodeId = replSet.getNodeId(secondaryConn);
+ replSet.stop(secondaryNodeId, {forRestart: true /* preserve dbPath */});
+
+ assert.commandWorked(db.runCommand({
+ dbCheck: coll.getName(),
+ maxDocsPerBatch: maxDocsPerBatch,
+ batchWriteConcern: {w: 'majority', wtimeout: 10},
+ }));
+
+ // Confirm dbCheck logs the expected number of batches.
+ assert.soon(function() {
+ return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() ==
+ nDocs / maxDocsPerBatch);
+ }, "dbCheck doesn't seem to complete", 60 * 1000);
+
+ // Confirm dbCheck logs a warning for every batch.
+ assert.soon(function() {
+ return (healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount() ==
+ nDocs / maxDocsPerBatch);
+ }, "dbCheck did not log writeConcern warnings", 60 * 1000);
+ // There should be no errors.
+ assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0);
+
+ replSet.start(secondaryNodeId, {}, true /*restart*/);
+ replSet.awaitNodesAgreeOnPrimaryNoAuth();
+ replSet.awaitReplication();
+})();
+
+// Validate that an invalid 'w' setting still allows dbCheck to succeed when presented with a
+// wtimeout.
+(function testW3Unavailable() {
+ clearLog();
+ coll.drop();
+
+ // Insert 1000 docs and run a few small batches to ensure we wait for write concern between
+ // each one.
+ const nDocs = 1000;
+ const maxDocsPerBatch = 100;
+ assert.commandWorked(coll.insertMany([...Array(nDocs).keys()].map(x => ({a: x}))));
+ replSet.awaitReplication();
+
+ // Stop the secondary and expect that the dbCheck batches still complete on the primary.
+ const secondaryConn = replSet.getSecondary();
+ const secondaryNodeId = replSet.getNodeId(secondaryConn);
+ replSet.stop(secondaryNodeId, {forRestart: true /* preserve dbPath */});
+
+ assert.commandWorked(db.runCommand({
+ dbCheck: coll.getName(),
+ maxDocsPerBatch: maxDocsPerBatch,
+ batchWriteConcern: {w: 3, wtimeout: 10},
+ }));
+
+ // Confirm dbCheck logs the expected number of batches.
+ assert.soon(function() {
+ return (healthlog.find({operation: "dbCheckBatch", severity: "info"}).itcount() ==
+ nDocs / maxDocsPerBatch);
+ }, "dbCheck doesn't seem to complete", 60 * 1000);
+
+ // Confirm dbCheck logs a warning for every batch.
+ assert.soon(function() {
+ return (healthlog.find({operation: "dbCheckBatch", severity: "warning"}).itcount() ==
+ nDocs / maxDocsPerBatch);
+ }, "dbCheck did not log writeConcern warnings", 60 * 1000);
+ // There should be no errors.
+ assert.eq(healthlog.find({operation: "dbCheckBatch", severity: "error"}).itcount(), 0);
+})();
+
+replSet.stopSet();
+})();
diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp
index 145c792b808..a1e44d16ff4 100644
--- a/src/mongo/db/commands/dbcheck.cpp
+++ b/src/mongo/db/commands/dbcheck.cpp
@@ -46,6 +46,7 @@
#include "mongo/db/repl/dbcheck.h"
#include "mongo/db/repl/oplog.h"
#include "mongo/db/repl/optime.h"
+#include "mongo/db/write_concern_options.h"
#include "mongo/idl/command_generic_argument.h"
#include "mongo/util/background.h"
@@ -142,6 +143,7 @@ struct DbCheckCollectionInfo {
int64_t maxBytesPerBatch;
int64_t maxBatchTimeMillis;
bool snapshotRead;
+ WriteConcernOptions writeConcern;
};
/**
@@ -180,7 +182,8 @@ std::unique_ptr<DbCheckRun> singleCollectionRun(OperationContext* opCtx,
maxDocsPerBatch,
maxBytesPerBatch,
maxBatchTimeMillis,
- invocation.getSnapshotRead()};
+ invocation.getSnapshotRead(),
+ invocation.getBatchWriteConcern()};
auto result = std::make_unique<DbCheckRun>();
result->push_back(info);
return result;
@@ -214,7 +217,8 @@ std::unique_ptr<DbCheckRun> fullDatabaseRun(OperationContext* opCtx,
maxDocsPerBatch,
maxBytesPerBatch,
maxBatchTimeMillis,
- invocation.getSnapshotRead()};
+ invocation.getSnapshotRead(),
+ invocation.getBatchWriteConcern()};
result->push_back(info);
return true;
};
@@ -351,10 +355,10 @@ private:
return;
}
- std::unique_ptr<HealthLogEntry> entry;
if (!result.isOK()) {
bool retryable = false;
+ std::unique_ptr<HealthLogEntry> entry;
const auto code = result.getStatus().code();
if (code == ErrorCodes::LockTimeout) {
@@ -395,32 +399,41 @@ private:
OplogEntriesEnum::Batch,
result.getStatus());
}
- HealthLog::get(Client::getCurrent()->getServiceContext()).log(*entry);
+ HealthLog::get(opCtx).log(*entry);
if (retryable) {
continue;
}
return;
- } else {
- _batchesProcessed++;
- auto stats = result.getValue();
- entry = dbCheckBatchEntry(info.nss,
- stats.nDocs,
- stats.nBytes,
- stats.md5,
- stats.md5,
- start,
- stats.lastKey,
- stats.readTimestamp,
- stats.time);
- if (kDebugBuild || entry->getSeverity() != SeverityEnum::Info ||
- (_batchesProcessed % gDbCheckHealthLogEveryNBatches.load() == 0)) {
- // On debug builds, health-log every batch result; on release builds, health-log
- // every N batches.
- HealthLog::get(Client::getCurrent()->getServiceContext()).log(*entry);
- }
}
- auto stats = result.getValue();
+ const auto stats = result.getValue();
+
+ _batchesProcessed++;
+ auto entry = dbCheckBatchEntry(info.nss,
+ stats.nDocs,
+ stats.nBytes,
+ stats.md5,
+ stats.md5,
+ start,
+ stats.lastKey,
+ stats.readTimestamp,
+ stats.time);
+ if (kDebugBuild || entry->getSeverity() != SeverityEnum::Info ||
+ (_batchesProcessed % gDbCheckHealthLogEveryNBatches.load() == 0)) {
+ // On debug builds, health-log every batch result; on release builds, health-log
+ // every N batches.
+ HealthLog::get(opCtx).log(*entry);
+ }
+
+ WriteConcernResult unused;
+ auto status = waitForWriteConcern(opCtx, stats.time, info.writeConcern, &unused);
+ if (!status.isOK()) {
+ auto entry = dbCheckWarningHealthLogEntry(info.nss,
+ "dbCheck failed waiting for writeConcern",
+ OplogEntriesEnum::Batch,
+ status);
+ HealthLog::get(opCtx).log(*entry);
+ }
start = stats.lastKey;
diff --git a/src/mongo/db/repl/dbcheck.idl b/src/mongo/db/repl/dbcheck.idl
index 48e71f32c31..8246c46e70d 100644
--- a/src/mongo/db/repl/dbcheck.idl
+++ b/src/mongo/db/repl/dbcheck.idl
@@ -35,6 +35,7 @@ global:
imports:
- "mongo/idl/basic_types.idl"
+ - "mongo/db/write_concern_options.idl"
server_parameters:
dbCheckCollectionTryLockTimeoutMillis:
@@ -144,6 +145,10 @@ structs:
snapshotRead:
type: safeBool
default: true
+ batchWriteConcern:
+ description: Wait for this writeConcern at the end of every batch. Default is w:1 with no timeout.
+ type: WriteConcern
+ default: WriteConcernOptions()
DbCheckAllInvocation:
description: "Command object for database-wide form of dbCheck invocation"
@@ -175,6 +180,10 @@ structs:
snapshotRead:
type: safeBool
default: true
+ batchWriteConcern:
+ description: Wait for this writeConcern at the end of every batch. Default is w:1 with no timeout.
+ type: WriteConcern
+ default: WriteConcernOptions()
DbCheckOplogBatch:
description: "Oplog entry for a dbCheck batch"