summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordi Olivares Provencio <jordi.olivares-provencio@mongodb.com>2023-04-28 13:36:44 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-28 15:11:07 +0000
commit8c64458511a257877e99a2e78278db1ea3bd14b8 (patch)
tree3921deffc14da93fe4ed0d2730ced2ebe82e9aaa
parentab15cb29a43ea6a3b76c6fa9764dc4839558f85c (diff)
downloadmongo-8c64458511a257877e99a2e78278db1ea3bd14b8.tar.gz
SERVER-75440 Add new consistency check for preimages
-rw-r--r--jstests/noPassthrough/preimages_can_be_inconsistent.js98
-rw-r--r--jstests/replsets/libs/rename_across_dbs.js1
-rw-r--r--src/mongo/db/repl/dbcheck.cpp10
-rw-r--r--src/mongo/shell/data_consistency_checker.js26
-rw-r--r--src/mongo/shell/replsettest.js236
5 files changed, 292 insertions, 79 deletions
diff --git a/jstests/noPassthrough/preimages_can_be_inconsistent.js b/jstests/noPassthrough/preimages_can_be_inconsistent.js
new file mode 100644
index 00000000000..bf87f16e5cd
--- /dev/null
+++ b/jstests/noPassthrough/preimages_can_be_inconsistent.js
@@ -0,0 +1,98 @@
+/**
+ * Test that consistency checks for preimage work as expected. Consistency is defined by performing
+ * these steps:
+ * * Fix a nsUUID to scan the preimage collection
+ * * Obtain all preimage entries of the namespace by sorting in descending order of '_id.ts' and
+ * '_id.applyIndexOps'.
+ * * For each entry position and node:
+ * * The entry exists in the node at that position and is equal across all nodes in that
+ * position.
+ * * The entry doesn't exist in the node at that position.
+ * @tags: [
+ * requires_replication,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+function getPreImage(collectionIndex, ts) {
+ return {
+ _id: {
+ nsUUID: UUID(`3b241101-e2bb-4255-8caf-4136c566a12${collectionIndex}`),
+ ts: new Timestamp(ts, 0),
+ applyOpsIndex: 0,
+ },
+ operationTime: ISODate("9000-01-01"), // Put a date really far off into the future.
+ };
+}
+
+assert.doesNotThrow(() => {
+ const replSetTest = new ReplSetTest({name: "replSet", nodes: 2});
+ replSetTest.startSet();
+ replSetTest.initiate();
+
+ const primary = replSetTest.getPrimary();
+ const secondary = replSetTest.getSecondary();
+
+ const coll = primary.getDB("config")["system.preimages"];
+ const secondaryColl = secondary.getDB("config")["system.preimages"];
+
+ // Insert documents to the preimages collection. Ensure they are not replicated to secondaries.
+ coll.insert(getPreImage(1, 0));
+ coll.insert(getPreImage(1, 1));
+ coll.insert(getPreImage(2, 1));
+ coll.insert(getPreImage(3, 1));
+
+ assert.eq(coll.find({}).itcount(), 4);
+ assert.eq(secondaryColl.find({}).itcount(), 0);
+
+ // Now insert preimages in the old secondary.
+ replSetTest.stepUp(secondary);
+
+ const newPrimary = replSetTest.getPrimary();
+ const newColl = newPrimary.getDB("config")["system.preimages"];
+ newColl.insert(getPreImage(1, 1));
+ newColl.insert(getPreImage(2, 1));
+
+ // Verify that even if the data isn't consistent the test passes as consistency is defined as
+ // two nodes having entries equal or non-existent starting from the end.
+ replSetTest.stopSet();
+});
+
+const replSetTest = new ReplSetTest({name: "replSet", nodes: 2});
+replSetTest.startSet();
+replSetTest.initiate();
+
+const primary = replSetTest.getPrimary();
+const secondary = replSetTest.getSecondary();
+
+const coll = primary.getDB("config")["system.preimages"];
+const secondaryColl = secondary.getDB("config")["system.preimages"];
+
+// Insert a document to the preimage collection. Ensure it is not replicated to secondaries.
+coll.insert(getPreImage(1, 0));
+assert.eq(coll.find({}).itcount(), 1);
+assert.eq(secondaryColl.find({}).itcount(), 0);
+
+// Now insert another document to the secondary, this will cause an inconsistency error when we stop
+// the replica set.
+replSetTest.stepUp(secondary);
+
+const newPrimary = replSetTest.getPrimary();
+
+const newColl = newPrimary.getDB("config")["system.preimages"];
+newColl.insert(getPreImage(1, 1));
+
+// Verify that the two nodes are inconsistent.
+assert.throws(() => replSetTest.stopSet());
+
+try {
+ replSetTest.stopSet();
+} catch (e) {
+ // Verify that the inconsistency is the one we're looking for in preimages.
+ assert.eq(e.message.includes("Detected preimage entries that have different content"), true);
+}
+// Tear down the nodes now without checking for consistency.
+replSetTest.stopSet(undefined, undefined, {skipCheckDBHashes: true});
+})();
diff --git a/jstests/replsets/libs/rename_across_dbs.js b/jstests/replsets/libs/rename_across_dbs.js
index 59b0fba4e14..ed7b9c77ba2 100644
--- a/jstests/replsets/libs/rename_across_dbs.js
+++ b/jstests/replsets/libs/rename_across_dbs.js
@@ -165,6 +165,7 @@ var RenameAcrossDatabasesTest = function(options) {
_testLog('Checking oplogs and dbhashes after renaming collection.');
replTest.awaitReplication();
replTest.checkOplogs(testName);
+ replTest.checkPreImageCollection(testName);
replTest.checkReplicatedDataHashes(testName);
_testLog('Test completed. Stopping replica set.');
diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp
index 24c008a2df2..db0575cd072 100644
--- a/src/mongo/db/repl/dbcheck.cpp
+++ b/src/mongo/db/repl/dbcheck.cpp
@@ -203,10 +203,14 @@ std::unique_ptr<HealthLogEntry> dbCheckBatchEntry(
if (hashesMatch) {
return SeverityEnum::Info;
}
- // Implicitly replicated collections and capped collections not replicating truncation are
- // not designed to be consistent, so inconsistency is not necessarily pathological.
+ // We relax inconsistency checks for some collections to a simple warning in some cases.
+ // preimages and change collections may be using untimestamped truncates on each node
+ // independently and can easily be inconsistent. In addition, by design
+ // the image_collection can skip a write during steady-state replication, and the preimages
+ // collection can be inconsistent during logical initial sync, all of which is
+ // harmless.
if (nss.isChangeStreamPreImagesCollection() || nss.isConfigImagesCollection() ||
- (options && options->capped)) {
+ nss.isChangeCollection() || (options && options->capped)) {
return SeverityEnum::Warning;
}
diff --git a/src/mongo/shell/data_consistency_checker.js b/src/mongo/shell/data_consistency_checker.js
index 96e28902e9c..6f765dbce1f 100644
--- a/src/mongo/shell/data_consistency_checker.js
+++ b/src/mongo/shell/data_consistency_checker.js
@@ -253,6 +253,12 @@ var {DataConsistencyChecker} = (function() {
}
static canIgnoreCollectionDiff(sourceCollInfos, syncingCollInfos, collName) {
+ if (collName === "system.preimages") {
+ print(`Ignoring hash inconsistencies for 'system.preimages' as those can be ` +
+ `expected with independent truncates. Content is checked separately by ` +
+ `ReplSetTest.checkPreImageCollection`);
+ return true;
+ }
if (collName !== "image_collection") {
return false;
}
@@ -407,15 +413,18 @@ var {DataConsistencyChecker} = (function() {
// Although rare, the 'config.image_collection' table can be inconsistent after
// an initial sync or after a restart (see SERVER-60048). Dump the collection
// diff anyways for more visibility as a sanity check.
+ //
+ // 'config.system.preimages' can potentially be inconsistent via hashes,
+ // there's a special process that verifies them with
+ // ReplSetTest.checkPreImageCollection so it is safe to ignore failures here.
this.dumpCollectionDiff(
collectionPrinted, sourceCollInfos, syncingCollInfos, coll.name);
const shouldIgnoreFailure =
this.canIgnoreCollectionDiff(sourceCollInfos, syncingCollInfos, coll.name);
if (shouldIgnoreFailure) {
- prettyPrint(
- `Collection diff in ${dbName}.${coll.name} can be ignored: ${dbHashesMsg}
- . Inconsistencies in the image collection can be expected in certain
- restart scenarios.`);
+ prettyPrint(`Collection diff in ${dbName}.${coll.name} can be ignored: ` +
+ `${dbHashesMsg}. Inconsistencies in the collection can be ` +
+ `expected in certain scenarios.`);
}
success = shouldIgnoreFailure && success;
didIgnoreFailure = shouldIgnoreFailure || didIgnoreFailure;
@@ -593,10 +602,11 @@ var {DataConsistencyChecker} = (function() {
dbHashesMsg}`);
if (didIgnoreFailure) {
// We only expect database hash mismatches on the config db, where
- // config.image_collection is expected to have inconsistencies in certain
- // scenarios.
- prettyPrint(`Ignoring hash mismatch for the ${dbName} database since
- inconsistencies in 'config.image_collection' can be expected`);
+ // config.image_collection and config.system.preimages are expected to have
+ // inconsistencies in certain scenarios.
+ prettyPrint(`Ignoring hash mismatch for the ${dbName} database since ` +
+ `inconsistencies in 'config.image_collection' or ` +
+ `'config.system.preimages' can be expected`);
return success;
}
success = false;
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index 45ea0ccca29..74a4adfed01 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -2548,82 +2548,76 @@ var ReplSetTest = function(opts) {
this.checkReplicaSet(checkOplogs, liveSecondaries, this, liveSecondaries, msgPrefix);
};
- /**
- * Check oplogs on all nodes, by reading from the last time. Since the oplog is a capped
- * collection, each node may not contain the same number of entries and stop if the cursor
- * is exhausted on any node being checked.
- */
- function checkOplogs(rst, secondaries, msgPrefix = 'checkOplogs') {
- secondaries = secondaries || rst._secondaries;
- const kCappedPositionLostSentinel = Object.create(null);
- const OplogReader = function(mongo) {
- this._safelyPerformCursorOperation = function(name, operation, onCappedPositionLost) {
- if (!this.cursor) {
- throw new Error("OplogReader is not open!");
- }
+ const ReverseReader = function(mongo, coll, query) {
+ this.kCappedPositionLostSentinel = Object.create(null);
- if (this._cursorExhausted) {
- return onCappedPositionLost;
- }
+ this._safelyPerformCursorOperation = function(name, operation, onCappedPositionLost) {
+ if (!this.cursor) {
+ throw new Error("ReverseReader is not open!");
+ }
- try {
- return operation(this.cursor);
- } catch (err) {
- print("Error: " + name + " threw '" + err.message + "' on " + this.mongo.host);
- // Occasionally, the capped collection will get truncated while we are iterating
- // over it. Since we are iterating over the collection in reverse, getting a
- // truncated item means we've reached the end of the list, so return false.
- if (err.code === ErrorCodes.CappedPositionLost) {
- this.cursor.close();
- this._cursorExhausted = true;
- return onCappedPositionLost;
- }
+ if (this._cursorExhausted) {
+ return onCappedPositionLost;
+ }
- throw err;
+ try {
+ return operation(this.cursor);
+ } catch (err) {
+ print("Error: " + name + " threw '" + err.message + "' on " + this.mongo.host);
+ // Occasionally, the capped collection will get truncated while we are iterating
+ // over it. Since we are iterating over the collection in reverse, getting a
+ // truncated item means we've reached the end of the list, so return false.
+ if (err.code === ErrorCodes.CappedPositionLost) {
+ this.cursor.close();
+ this._cursorExhausted = true;
+ return onCappedPositionLost;
}
- };
-
- this.next = function() {
- return this._safelyPerformCursorOperation('next', function(cursor) {
- return cursor.next();
- }, kCappedPositionLostSentinel);
- };
- this.hasNext = function() {
- return this._safelyPerformCursorOperation('hasNext', function(cursor) {
- return cursor.hasNext();
- }, false);
- };
+ throw err;
+ }
+ };
- this.query = function(ts) {
- const coll = this.getOplogColl();
- const query = {ts: {$gte: ts ? ts : new Timestamp()}};
- // Set the cursor to read backwards, from last to first. We also set the cursor not
- // to time out since it may take a while to process each batch and a test may have
- // changed "cursorTimeoutMillis" to a short time period.
- this._cursorExhausted = false;
- this.cursor =
- coll.find(query).sort({$natural: -1}).noCursorTimeout().readConcern("local");
- };
+ this.next = function() {
+ return this._safelyPerformCursorOperation('next', function(cursor) {
+ return cursor.next();
+ }, this.kCappedPositionLostSentinel);
+ };
- this.getFirstDoc = function() {
- return this.getOplogColl()
- .find()
- .sort({$natural: 1})
- .readConcern("local")
- .limit(-1)
- .next();
- };
+ this.hasNext = function() {
+ return this._safelyPerformCursorOperation('hasNext', function(cursor) {
+ return cursor.hasNext();
+ }, false);
+ };
- this.getOplogColl = function() {
- return this.mongo.getDB("local")[oplogName];
- };
+ this.query = function() {
+ // Set the cursor to read backwards, from last to first. We also set the cursor not
+ // to time out since it may take a while to process each batch and a test may have
+ // changed "cursorTimeoutMillis" to a short time period.
+ this._cursorExhausted = false;
+ this.cursor = coll.find(query)
+ .sort({$natural: -1})
+ .noCursorTimeout()
+ .readConcern("local")
+ .limit(-1);
+ };
- this.cursor = null;
- this._cursorExhausted = true;
- this.mongo = mongo;
+ this.getFirstDoc = function() {
+ return coll.find(query).sort({$natural: 1}).readConcern("local").limit(-1).next();
};
+ this.cursor = null;
+ this._cursorExhausted = true;
+ this.mongo = mongo;
+ };
+
+ /**
+ * Check oplogs on all nodes, by reading from the last time. Since the oplog is a capped
+ * collection, each node may not contain the same number of entries and stop if the cursor
+ * is exhausted on any node being checked.
+ */
+ function checkOplogs(rst, secondaries, msgPrefix = 'checkOplogs') {
+ secondaries = secondaries || rst._secondaries;
+
function assertOplogEntriesEq(oplogEntry0, oplogEntry1, reader0, reader1, prevOplogEntry) {
if (!bsonBinaryEqual(oplogEntry0, oplogEntry1)) {
const query = prevOplogEntry ? {ts: {$lte: prevOplogEntry.ts}} : {};
@@ -2658,7 +2652,8 @@ var ReplSetTest = function(opts) {
}
print("checkOplogs going to check oplog of node: " + node.host);
- readers[i] = new OplogReader(node);
+ readers[i] = new ReverseReader(
+ node, node.getDB("local")[oplogName], {ts: {$gte: new Timestamp()}});
const currTS = readers[i].getFirstDoc().ts;
// Find the reader which has the smallestTS. This reader should have the most
// number of documents in the oplog.
@@ -2682,7 +2677,7 @@ var ReplSetTest = function(opts) {
while (firstReader.hasNext()) {
const oplogEntry = firstReader.next();
bytesSinceGC += Object.bsonsize(oplogEntry);
- if (oplogEntry === kCappedPositionLostSentinel) {
+ if (oplogEntry === firstReader.kCappedPositionLostSentinel) {
// When using legacy OP_QUERY/OP_GET_MORE reads against mongos, it is
// possible for hasNext() to return true but for next() to throw an exception.
break;
@@ -2697,7 +2692,8 @@ var ReplSetTest = function(opts) {
const otherOplogEntry = readers[i].next();
bytesSinceGC += Object.bsonsize(otherOplogEntry);
- if (otherOplogEntry && otherOplogEntry !== kCappedPositionLostSentinel) {
+ if (otherOplogEntry &&
+ otherOplogEntry !== readers[i].kCappedPositionLostSentinel) {
assertOplogEntriesEq.call(this,
oplogEntry,
otherOplogEntry,
@@ -2717,6 +2713,108 @@ var ReplSetTest = function(opts) {
print("checkOplogs oplog checks complete.");
}
+ function getPreImageReaders(msgPrefix, rst, secondaries, nsUUID) {
+ const readers = [];
+ const nodes = rst.nodes;
+ for (let i = 0; i < nodes.length; i++) {
+ const node = nodes[i];
+
+ if (rst._primary !== node && !secondaries.includes(node)) {
+ print(`${msgPrefix} -- skipping preimages of node as it's not in our list of ` +
+ `secondaries: ${node.host}`);
+ continue;
+ }
+
+ // Arbiters have no documents in the oplog and thus don't have preimages
+ // content.
+ if (isNodeArbiter(node)) {
+ jsTestLog(`${msgPrefix} -- skipping preimages of arbiter node: ${node.host}`);
+ continue;
+ }
+
+ print(`${msgPrefix} -- going to check preimages of ${nsUUID} of node: ${node.host}`);
+ readers[i] = new ReverseReader(
+ node, node.getDB("config")["system.preimages"], {"_id.nsUUID": nsUUID});
+ // Start all reverseReaders at their last document for the collection.
+ readers[i].query();
+ }
+
+ return readers;
+ }
+
+ /**
+ * Check preimages on all nodes, by reading reading from the last time. Since the preimage may
+ * or may not be maintained independently, each node may not contain the same number of entries
+ * and stop if the cursor is exhausted on any node being checked.
+ */
+ function checkPreImageCollection(rst, secondaries, msgPrefix = 'checkPreImageCollection') {
+ secondaries = secondaries || rst._secondaries;
+
+ print(`${msgPrefix} -- starting preimage checks.`);
+ print(`${msgPrefix} -- waiting for secondaries to be ready.`);
+ this.awaitSecondaryNodes(self.kDefaultTimeoutMS, secondaries);
+ if (secondaries.length >= 1) {
+ let collectionsWithPreimages = {};
+ const nodes = rst.nodes;
+ for (let i = 0; i < nodes.length; i++) {
+ const node = nodes[i];
+
+ if (rst._primary !== node && !secondaries.includes(node)) {
+ print(`${msgPrefix} -- skipping preimages of node as it's not in our list of ` +
+ `secondaries: ${node.host}`);
+ continue;
+ }
+
+ // Arbiters have no documents in the oplog and thus don't have preimages content.
+ if (isNodeArbiter(node)) {
+ jsTestLog(`${msgPrefix} -- skipping preimages of arbiter node: ${node.host}`);
+ continue;
+ }
+
+ const preImageColl = node.getDB("config")["system.preimages"];
+ // Reset connection preferences in case the test has modified them.
+ preImageColl.getMongo().setSecondaryOk(true);
+ preImageColl.getMongo().setReadPref(rst._primary === node ? "primary"
+ : "secondary");
+
+ // Find all collections participating in pre-images.
+ const collectionsInPreimages =
+ preImageColl.aggregate([{$group: {_id: "$_id.nsUUID"}}]).toArray();
+ for (const collTs of collectionsInPreimages) {
+ collectionsWithPreimages[collTs._id] = collTs._id;
+ }
+ }
+ for (const nsUUID of Object.values(collectionsWithPreimages)) {
+ const readers = getPreImageReaders(msgPrefix, rst, secondaries, nsUUID);
+
+ while (true) {
+ let preImageEntryToCompare = undefined;
+ for (const reader of readers) {
+ if (reader.hasNext()) {
+ const preImageEntry = reader.next();
+ if (preImageEntryToCompare === undefined) {
+ preImageEntryToCompare = preImageEntry;
+ } else {
+ assert(bsonBinaryEqual(preImageEntryToCompare, preImageEntry),
+ `Detected preimage entries that have different content`);
+ }
+ }
+ }
+ if (preImageEntryToCompare === undefined) {
+ break;
+ }
+ }
+ }
+ }
+ print(`${msgPrefix} -- preimages check complete.`);
+ }
+
+ this.checkPreImageCollection = function(msgPrefix) {
+ var liveSecondaries = _determineLiveSecondaries();
+ this.checkReplicaSet(
+ checkPreImageCollection, liveSecondaries, this, liveSecondaries, msgPrefix);
+ };
+
/**
* Waits for an initial connection to a given node. Should only be called after the node's
* process has already been started. Updates the corresponding entry in 'this.nodes' with the
@@ -3158,6 +3256,8 @@ var ReplSetTest = function(opts) {
// refuses to log in live connections if some secondaries are down.
print("ReplSetTest stopSet checking oplogs.");
asCluster(this._liveNodes, () => this.checkOplogs());
+ print("ReplSetTest stopSet checking preimages.");
+ asCluster(this._liveNodes, () => this.checkPreImageCollection());
print("ReplSetTest stopSet checking replicated data hashes.");
asCluster(this._liveNodes, () => this.checkReplicatedDataHashes());
} else {