summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2022-11-03 14:56:46 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-03 15:45:38 +0000
commitba467f46cc1bc49965e1d72b541eff0cf1d7b22e (patch)
treeb4a7b214d8f41d3f73cb022d183b8b8290d6dc79
parentc73e2cf8985997ceabbe7882597ce655a11c4487 (diff)
downloadmongo-ba467f46cc1bc49965e1d72b541eff0cf1d7b22e.tar.gz
SERVER-67695 Remove write-blocking dbCheck
This removes the snapshotRead:false option to dbCheck which blocked concurrent writes
-rw-r--r--jstests/multiVersion/targetedTestsLastLtsFeatures/dbCheck_snapshotRead.js83
-rw-r--r--jstests/replsets/dbcheck.js43
-rw-r--r--src/mongo/db/commands/dbcheck.cpp177
-rw-r--r--src/mongo/db/repl/dbcheck.cpp24
-rw-r--r--src/mongo/db/repl/dbcheck.idl36
5 files changed, 172 insertions, 191 deletions
diff --git a/jstests/multiVersion/targetedTestsLastLtsFeatures/dbCheck_snapshotRead.js b/jstests/multiVersion/targetedTestsLastLtsFeatures/dbCheck_snapshotRead.js
new file mode 100644
index 00000000000..729289bd9a5
--- /dev/null
+++ b/jstests/multiVersion/targetedTestsLastLtsFeatures/dbCheck_snapshotRead.js
@@ -0,0 +1,83 @@
+/**
+ * Ensure that a 6.0 version replicating a dbCheck oplog entry with the removed snapshotRead:false
+ * option does not crash when a 'latest' version receives the entry.
+ *
+ * @tags: [
+ * requires_replication,
+ * ]
+ */
+(function() {
+"use strict";
+
+load('jstests/multiVersion/libs/multi_rs.js');
+
+const nodes = {
+ // We want the 6.0 node to be the primary.
+ n1: {binVersion: "6.0", rsConfig: {priority: 1}},
+ n2: {binVersion: "latest", rsConfig: {priority: 0}},
+};
+
+const rst = new ReplSetTest({nodes: nodes});
+rst.startSet();
+rst.initiate();
+
+const dbName = "test";
+const collName = jsTestName();
+
+const primary = rst.getPrimary();
+const primaryDB = primary.getDB(dbName);
+const coll = primaryDB[collName];
+
+assert.commandWorked(coll.insert({a: 1}));
+
+// The 6.0 node will replicate the dbCheck oplog entry with the 'snapshotRead:false' option. This is
+// not supported in recent versions and should be ignored, but not cause the node to crash.
+assert.commandWorked(primaryDB.runCommand({"dbCheck": 1, snapshotRead: false}));
+
+rst.awaitReplication();
+
+function dbCheckCompleted(db) {
+ return db.currentOp().inprog.filter(x => x["desc"] == "dbCheck")[0] === undefined;
+}
+
+function forEachNode(f) {
+ f(rst.getPrimary());
+ f(rst.getSecondary());
+}
+
+function awaitDbCheckCompletion(db) {
+ assert.soon(() => dbCheckCompleted(db), "dbCheck timed out");
+ rst.awaitSecondaryNodes();
+ rst.awaitReplication();
+
+ forEachNode(function(node) {
+ const healthlog = node.getDB('local').system.healthlog;
+ assert.soon(function() {
+ return (healthlog.find({"operation": "dbCheckStop"}).itcount() == 1);
+ }, "dbCheck command didn't complete");
+ });
+}
+
+awaitDbCheckCompletion(primaryDB);
+
+{
+ // The 6.0 primary should not report any errors.
+ const healthlog = primary.getDB('local').system.healthlog;
+ assert.eq(0, healthlog.find({severity: "error"}).itcount());
+ assert.eq(0, healthlog.find({severity: "warning"}).itcount());
+}
+
+{
+ // The latest secondary should log an error in the health log.
+ const secondary = rst.getSecondary();
+ const healthlog = secondary.getDB('local').system.healthlog;
+ assert.eq(1, healthlog.find({severity: "error"}).itcount());
+ assert.eq(0, healthlog.find({severity: "warning"}).itcount());
+ const errorEntry = healthlog.findOne({severity: "error"});
+ assert(errorEntry.hasOwnProperty('data'), tojson(errorEntry));
+ assert.eq(false, errorEntry.data.success, tojson(errorEntry));
+ assert(errorEntry.data.error.startsWith("Location6769502"), tojson(errorEntry));
+}
+
+rst.stopSet();
+})();
diff --git a/jstests/replsets/dbcheck.js b/jstests/replsets/dbcheck.js
index e3cab92b03e..710d986d535 100644
--- a/jstests/replsets/dbcheck.js
+++ b/jstests/replsets/dbcheck.js
@@ -40,10 +40,6 @@ function clearLog() {
forEachNode(conn => conn.getDB("local").system.healthlog.drop());
}
-function addEnoughForMultipleBatches(collection) {
- collection.insertMany([...Array(10000).keys()].map(x => ({_id: x})), {ordered: false});
-}
-
// Name for a collection which takes multiple batches to check and which shouldn't be modified
// by any of the tests.
const multiBatchSimpleCollName = "dbcheck-simple-collection";
@@ -55,29 +51,6 @@ function dbCheckCompleted(db) {
return db.currentOp().inprog.filter(x => x["desc"] == "dbCheck")[0] === undefined;
}
-// Wait for DeferredWriter writes to local.system.healthlog to eventually complete.
-// Requires clearLog() before the test case is run.
-// TODO SERVER-61765 remove this function altoghether when healthlogging becomes
-// synchronous.
-function dbCheckHealthLogCompleted(db, coll, maxKey, maxSize, maxCount) {
- let query = {"namespace": coll.getFullName(), "operation": "dbCheckBatch"};
- if (maxSize === undefined && maxCount === undefined && maxKey === undefined) {
- query['data.maxKey'] = {"$type": "maxKey"};
- }
- if (maxCount !== undefined) {
- query['data.count'] = maxCount;
- } else {
- if (maxSize !== undefined) {
- query['data.bytes'] = maxSize;
- } else {
- if (maxKey !== undefined) {
- query['data.maxKey'] = maxKey;
- }
- }
- }
- return db.getSiblingDB("local").system.healthlog.find(query).itcount() === 1;
-}
-
// Wait for dbCheck to complete (on both primaries and secondaries). Fails an assertion if
// dbCheck takes longer than maxMs.
function awaitDbCheckCompletion(db, collName, maxKey, maxSize, maxCount) {
@@ -232,17 +205,11 @@ function simpleTestNonSnapshot() {
assert.neq(primary, undefined);
let db = primary.getDB(dbName);
- assert.commandWorked(db.runCommand({"dbCheck": multiBatchSimpleCollName, snapshotRead: false}));
-
- awaitDbCheckCompletion(db, multiBatchSimpleCollName);
-
- checkLogAllConsistent(primary);
- checkTotalCounts(primary, db[multiBatchSimpleCollName]);
-
- forEachSecondary(function(secondary) {
- checkLogAllConsistent(secondary);
- checkTotalCounts(secondary, secondary.getDB(dbName)[multiBatchSimpleCollName]);
- });
+ // "dbCheck no longer supports snapshotRead:false"
+ assert.commandFailedWithCode(
+ db.runCommand({"dbCheck": multiBatchSimpleCollName, snapshotRead: false}), 6769500);
+ // "dbCheck no longer supports snapshotRead:false"
+ assert.commandFailedWithCode(db.runCommand({"dbCheck": 1, snapshotRead: false}), 6769501);
}
// Same thing, but now with concurrent updates.
diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp
index 166ee91e097..f8aafcae484 100644
--- a/src/mongo/db/commands/dbcheck.cpp
+++ b/src/mongo/db/commands/dbcheck.cpp
@@ -143,7 +143,6 @@ struct DbCheckCollectionInfo {
int64_t maxDocsPerBatch;
int64_t maxBytesPerBatch;
int64_t maxBatchTimeMillis;
- bool snapshotRead;
WriteConcernOptions writeConcern;
};
@@ -166,6 +165,8 @@ std::unique_ptr<DbCheckRun> singleCollectionRun(OperationContext* opCtx,
"Cannot run dbCheck on " + nss.toString() + " because it is not replicated",
nss.isReplicated());
+ uassert(6769500, "dbCheck no longer supports snapshotRead:false", invocation.getSnapshotRead());
+
const auto start = invocation.getMinKey();
const auto end = invocation.getMaxKey();
const auto maxCount = invocation.getMaxCount();
@@ -183,7 +184,6 @@ std::unique_ptr<DbCheckRun> singleCollectionRun(OperationContext* opCtx,
maxDocsPerBatch,
maxBytesPerBatch,
maxBatchTimeMillis,
- invocation.getSnapshotRead(),
invocation.getBatchWriteConcern()};
auto result = std::make_unique<DbCheckRun>();
result->push_back(info);
@@ -200,6 +200,8 @@ std::unique_ptr<DbCheckRun> fullDatabaseRun(OperationContext* opCtx,
AutoGetDb agd(opCtx, dbName, MODE_IS);
uassert(ErrorCodes::NamespaceNotFound, "Database " + dbName.db() + " not found", agd.getDb());
+ uassert(6769501, "dbCheck no longer supports snapshotRead:false", invocation.getSnapshotRead());
+
const int64_t max = std::numeric_limits<int64_t>::max();
const auto rate = invocation.getMaxCountPerSecond();
const auto maxDocsPerBatch = invocation.getMaxDocsPerBatch();
@@ -219,7 +221,6 @@ std::unique_ptr<DbCheckRun> fullDatabaseRun(OperationContext* opCtx,
maxDocsPerBatch,
maxBytesPerBatch,
maxBatchTimeMillis,
- invocation.getSnapshotRead(),
invocation.getBatchWriteConcern()};
result->push_back(info);
return true;
@@ -492,122 +493,77 @@ private:
const BSONKey& first,
int64_t batchDocs,
int64_t batchBytes) {
- auto lockMode = MODE_S;
- if (info.snapshotRead) {
- // Each batch will read at the latest no-overlap point, which is the all_durable
- // timestamp on primaries. We assume that the history window on secondaries is always
- // longer than the time it takes between starting and replicating a batch on the
- // primary. Otherwise, the readTimestamp will not be available on a secondary by the
- // time it processes the oplog entry.
- lockMode = MODE_IS;
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
+ // Each batch will read at the latest no-overlap point, which is the all_durable timestamp
+ // on primaries. We assume that the history window on secondaries is always longer than the
+ // time it takes between starting and replicating a batch on the primary. Otherwise, the
+ // readTimestamp will not be available on a secondary by the time it processes the oplog
+ // entry.
+ opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
+
+ // dbCheck writes to the oplog, so we need to take an IX lock. We don't need to write to the
+ // collection, however, so we only take an intent lock on it.
+ Lock::GlobalLock glob(opCtx, MODE_IX);
+ AutoGetCollection collection(opCtx, info.nss, MODE_IS);
+
+ if (_stepdownHasOccurred(opCtx, info.nss)) {
+ _done = true;
+ return Status(ErrorCodes::PrimarySteppedDown, "dbCheck terminated due to stepdown");
}
- BatchStats result;
- auto timeoutMs = Milliseconds(gDbCheckCollectionTryLockTimeoutMillis.load());
- const auto initialBackoffMs =
- Milliseconds(gDbCheckCollectionTryLockMinBackoffMillis.load());
- auto backoffMs = initialBackoffMs;
- for (int attempt = 1;; attempt++) {
- try {
- // Try to acquire collection lock with increasing timeout and bounded exponential
- // backoff.
- auto const lockDeadline = Date_t::now() + timeoutMs;
- timeoutMs *= 2;
-
- AutoGetCollection agc(
- opCtx, info.nss, lockMode, AutoGetCollection::Options{}.deadline(lockDeadline));
-
- if (_stepdownHasOccurred(opCtx, info.nss)) {
- _done = true;
- return Status(ErrorCodes::PrimarySteppedDown,
- "dbCheck terminated due to stepdown");
- }
+ if (!collection) {
+ const auto msg = "Collection under dbCheck no longer exists";
+ return {ErrorCodes::NamespaceNotFound, msg};
+ }
- const auto& collection =
- CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, info.nss);
- if (!collection) {
- const auto msg = "Collection under dbCheck no longer exists";
- return {ErrorCodes::NamespaceNotFound, msg};
- }
+ auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx);
+ uassert(ErrorCodes::SnapshotUnavailable,
+ "No snapshot available yet for dbCheck",
+ readTimestamp);
+ auto minVisible = collection->getMinimumVisibleSnapshot();
+ if (minVisible && *readTimestamp < *collection->getMinimumVisibleSnapshot()) {
+ return {ErrorCodes::SnapshotUnavailable,
+ str::stream() << "Unable to read from collection " << info.nss
+ << " due to pending catalog changes"};
+ }
- auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx);
- auto minVisible = collection->getMinimumVisibleSnapshot();
- if (readTimestamp && minVisible &&
- *readTimestamp < *collection->getMinimumVisibleSnapshot()) {
- return {ErrorCodes::SnapshotUnavailable,
- str::stream() << "Unable to read from collection " << info.nss
- << " due to pending catalog changes"};
- }
+ boost::optional<DbCheckHasher> hasher;
+ try {
+ hasher.emplace(opCtx,
+ *collection,
+ first,
+ info.end,
+ std::min(batchDocs, info.maxCount),
+ std::min(batchBytes, info.maxSize));
+ } catch (const DBException& e) {
+ return e.toStatus();
+ }
- boost::optional<DbCheckHasher> hasher;
- try {
- hasher.emplace(opCtx,
- collection,
- first,
- info.end,
- std::min(batchDocs, info.maxCount),
- std::min(batchBytes, info.maxSize));
- } catch (const DBException& e) {
- return e.toStatus();
- }
+ const auto batchDeadline = Date_t::now() + Milliseconds(info.maxBatchTimeMillis);
+ Status status = hasher->hashAll(opCtx, batchDeadline);
- const auto batchDeadline = Date_t::now() + Milliseconds(info.maxBatchTimeMillis);
- Status status = hasher->hashAll(opCtx, batchDeadline);
+ if (!status.isOK()) {
+ return status;
+ }
- if (!status.isOK()) {
- return status;
- }
+ std::string md5 = hasher->total();
- std::string md5 = hasher->total();
-
- DbCheckOplogBatch batch;
- batch.setType(OplogEntriesEnum::Batch);
- batch.setNss(info.nss);
- batch.setMd5(md5);
- batch.setMinKey(first);
- batch.setMaxKey(BSONKey(hasher->lastKey()));
- batch.setReadTimestamp(readTimestamp);
-
- // Send information on this batch over the oplog.
- result.time = _logOp(opCtx, info.nss, collection->uuid(), batch.toBSON());
- result.readTimestamp = readTimestamp;
-
- result.nDocs = hasher->docsSeen();
- result.nBytes = hasher->bytesSeen();
- result.lastKey = hasher->lastKey();
- result.md5 = md5;
-
- break;
- } catch (const ExceptionFor<ErrorCodes::LockTimeout>& e) {
- if (attempt > gDbCheckCollectionTryLockMaxAttempts.load()) {
- return StatusWith<BatchStats>(e.code(),
- "Unable to acquire the collection lock");
- }
+ DbCheckOplogBatch batch;
+ batch.setType(OplogEntriesEnum::Batch);
+ batch.setNss(info.nss);
+ batch.setMd5(md5);
+ batch.setMinKey(first);
+ batch.setMaxKey(BSONKey(hasher->lastKey()));
+ batch.setReadTimestamp(readTimestamp);
- // Bounded exponential backoff between tryLocks.
- opCtx->sleepFor(backoffMs);
- const auto maxBackoffMillis =
- Milliseconds(gDbCheckCollectionTryLockMaxBackoffMillis.load());
- if (backoffMs < maxBackoffMillis) {
- auto backoff = durationCount<Milliseconds>(backoffMs);
- auto initialBackoff = durationCount<Milliseconds>(initialBackoffMs);
- backoff *= initialBackoff;
- backoffMs = Milliseconds(backoff);
- }
- if (backoffMs > maxBackoffMillis) {
- backoffMs = maxBackoffMillis;
- }
- LOGV2_DEBUG(6175700,
- 1,
- "Could not acquire collection lock, retrying",
- "ns"_attr = info.nss.ns(),
- "batchRangeMin"_attr = info.start.obj(),
- "batchRangeMax"_attr = info.end.obj(),
- "attempt"_attr = attempt,
- "backoff"_attr = backoffMs);
- }
- }
+ // Send information on this batch over the oplog.
+ BatchStats result;
+ result.time = _logOp(opCtx, info.nss, collection->uuid(), batch.toBSON());
+ result.readTimestamp = readTimestamp;
+
+ result.nDocs = hasher->docsSeen();
+ result.nBytes = hasher->bytesSeen();
+ result.lastKey = hasher->lastKey();
+ result.md5 = md5;
return result;
}
@@ -669,7 +625,6 @@ public:
" maxDocsPerBatch: <max number of docs/batch>\n"
" maxBytesPerBatch: <try to keep a batch within max bytes/batch>\n"
" maxBatchTimeMillis: <max time processing a batch in milliseconds>\n"
- " readTimestamp: <bool, read at a timestamp without strong locks> }\n"
"to check a collection.\n"
"Invoke with {dbCheck: 1} to check all collections in the database.";
}
diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp
index 3062ec42733..0b0be849a5d 100644
--- a/src/mongo/db/repl/dbcheck.cpp
+++ b/src/mongo/db/repl/dbcheck.cpp
@@ -364,14 +364,26 @@ Status dbCheckBatchOnSecondary(OperationContext* opCtx,
// Set up the hasher,
boost::optional<DbCheckHasher> hasher;
try {
- auto lockMode = MODE_S;
- if (entry.getReadTimestamp()) {
- lockMode = MODE_IS;
- opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
- entry.getReadTimestamp());
+ // We may not have a read timestamp if the dbCheck command was run on an older version of
+ // the server with snapshotRead:false. Since we don't implement this feature, we'll log an
+ // error about skipping the batch to ensure an operator notices.
+ if (!entry.getReadTimestamp().has_value()) {
+ auto logEntry =
+ dbCheckErrorHealthLogEntry(entry.getNss(),
+ "dbCheck failed",
+ OplogEntriesEnum::Batch,
+ Status{ErrorCodes::Error(6769502),
+ "no readTimestamp in oplog entry. Ensure dbCheck "
+ "command is not using snapshotRead:false"},
+ entry.toBSON());
+ HealthLog::get(opCtx).log(*logEntry);
+ return Status::OK();
}
- AutoGetCollection coll(opCtx, entry.getNss(), lockMode);
+ opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
+ entry.getReadTimestamp());
+
+ AutoGetCollection coll(opCtx, entry.getNss(), MODE_IS);
const auto& collection = coll.getCollection();
if (!collection) {
diff --git a/src/mongo/db/repl/dbcheck.idl b/src/mongo/db/repl/dbcheck.idl
index 9d3b20b68f5..5ded9ce6daf 100644
--- a/src/mongo/db/repl/dbcheck.idl
+++ b/src/mongo/db/repl/dbcheck.idl
@@ -38,42 +38,6 @@ imports:
- "mongo/db/write_concern_options.idl"
server_parameters:
- dbCheckCollectionTryLockTimeoutMillis:
- description: 'Timeout to acquire the collection for processing a dbCheck batch. Each subsequent attempt doubles the timeout'
- set_at: [ startup, runtime ]
- cpp_vartype: 'AtomicWord<int>'
- cpp_varname: gDbCheckCollectionTryLockTimeoutMillis
- default: 10
- validator:
- gte: 1
- lte: 10000
- dbCheckCollectionTryLockMaxAttempts:
- description: 'Maximum number of attempts with backoff to acquire the collection lock for processing a dbCheck batch'
- set_at: [ startup, runtime ]
- cpp_vartype: 'AtomicWord<int>'
- cpp_varname: gDbCheckCollectionTryLockMaxAttempts
- default: 5
- validator:
- gte: 1
- lte: 20
- dbCheckCollectionTryLockMinBackoffMillis:
- description: 'Initial backoff on failure to acquire the collection lock for processing a dbCheck batch. Grows exponentially'
- set_at: [ startup, runtime ]
- cpp_vartype: 'AtomicWord<int>'
- cpp_varname: gDbCheckCollectionTryLockMinBackoffMillis
- default: 10
- validator:
- gte: 2
- lte: 60000
- dbCheckCollectionTryLockMaxBackoffMillis:
- description: 'Maximum exponential backoff on failure to acquire the collection lock for processing a dbCheck batch.'
- set_at: [ startup, runtime ]
- cpp_vartype: 'AtomicWord<int>'
- cpp_varname: gDbCheckCollectionTryLockMaxBackoffMillis
- default: 60000
- validator:
- gte: 20
- lte: 120000
dbCheckHealthLogEveryNBatches:
description: 'Emit an info-severity health log batch every N batches processed'
set_at: [ startup, runtime ]