diff options
author | Jordi Olivares Provencio <jordi.olivares-provencio@mongodb.com> | 2023-04-28 13:36:44 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-28 15:11:07 +0000 |
commit | 8c64458511a257877e99a2e78278db1ea3bd14b8 (patch) | |
tree | 3921deffc14da93fe4ed0d2730ced2ebe82e9aaa | |
parent | ab15cb29a43ea6a3b76c6fa9764dc4839558f85c (diff) | |
download | mongo-8c64458511a257877e99a2e78278db1ea3bd14b8.tar.gz |
SERVER-75440 Add new consistency check for preimages
-rw-r--r-- | jstests/noPassthrough/preimages_can_be_inconsistent.js | 98 | ||||
-rw-r--r-- | jstests/replsets/libs/rename_across_dbs.js | 1 | ||||
-rw-r--r-- | src/mongo/db/repl/dbcheck.cpp | 10 | ||||
-rw-r--r-- | src/mongo/shell/data_consistency_checker.js | 26 | ||||
-rw-r--r-- | src/mongo/shell/replsettest.js | 236 |
5 files changed, 292 insertions, 79 deletions
diff --git a/jstests/noPassthrough/preimages_can_be_inconsistent.js b/jstests/noPassthrough/preimages_can_be_inconsistent.js new file mode 100644 index 00000000000..bf87f16e5cd --- /dev/null +++ b/jstests/noPassthrough/preimages_can_be_inconsistent.js @@ -0,0 +1,98 @@ +/** + * Test that consistency checks for preimage work as expected. Consistency is defined by performing + * these steps: + * * Fix a nsUUID to scan the preimage collection + * * Obtain all preimage entries of the namespace by sorting in descending order of '_id.ts' and + * '_id.applyIndexOps'. + * * For each entry position and node: + * * The entry exists in the node at that position and is equal across all nodes in that + * position. + * * The entry doesn't exist in the node at that position. + * @tags: [ + * requires_replication, + * ] + */ + +(function() { +"use strict"; + +function getPreImage(collectionIndex, ts) { + return { + _id: { + nsUUID: UUID(`3b241101-e2bb-4255-8caf-4136c566a12${collectionIndex}`), + ts: new Timestamp(ts, 0), + applyOpsIndex: 0, + }, + operationTime: ISODate("9000-01-01"), // Put a date really far off into the future. + }; +} + +assert.doesNotThrow(() => { + const replSetTest = new ReplSetTest({name: "replSet", nodes: 2}); + replSetTest.startSet(); + replSetTest.initiate(); + + const primary = replSetTest.getPrimary(); + const secondary = replSetTest.getSecondary(); + + const coll = primary.getDB("config")["system.preimages"]; + const secondaryColl = secondary.getDB("config")["system.preimages"]; + + // Insert documents to the preimages collection. Ensure they are not replicated to secondaries. + coll.insert(getPreImage(1, 0)); + coll.insert(getPreImage(1, 1)); + coll.insert(getPreImage(2, 1)); + coll.insert(getPreImage(3, 1)); + + assert.eq(coll.find({}).itcount(), 4); + assert.eq(secondaryColl.find({}).itcount(), 0); + + // Now insert preimages in the old secondary. + replSetTest.stepUp(secondary); + + const newPrimary = replSetTest.getPrimary(); + const newColl = newPrimary.getDB("config")["system.preimages"]; + newColl.insert(getPreImage(1, 1)); + newColl.insert(getPreImage(2, 1)); + + // Verify that even if the data isn't consistent the test passes as consistency is defined as + // two nodes having entries equal or non-existent starting from the end. + replSetTest.stopSet(); +}); + +const replSetTest = new ReplSetTest({name: "replSet", nodes: 2}); +replSetTest.startSet(); +replSetTest.initiate(); + +const primary = replSetTest.getPrimary(); +const secondary = replSetTest.getSecondary(); + +const coll = primary.getDB("config")["system.preimages"]; +const secondaryColl = secondary.getDB("config")["system.preimages"]; + +// Insert a document to the preimage collection. Ensure it is not replicated to secondaries. +coll.insert(getPreImage(1, 0)); +assert.eq(coll.find({}).itcount(), 1); +assert.eq(secondaryColl.find({}).itcount(), 0); + +// Now insert another document to the secondary, this will cause an inconsistency error when we stop +// the replica set. +replSetTest.stepUp(secondary); + +const newPrimary = replSetTest.getPrimary(); + +const newColl = newPrimary.getDB("config")["system.preimages"]; +newColl.insert(getPreImage(1, 1)); + +// Verify that the two nodes are inconsistent. +assert.throws(() => replSetTest.stopSet()); + +try { + replSetTest.stopSet(); +} catch (e) { + // Verify that the inconsistency is the one we're looking for in preimages. + assert.eq(e.message.includes("Detected preimage entries that have different content"), true); +} +// Tear down the nodes now without checking for consistency. +replSetTest.stopSet(undefined, undefined, {skipCheckDBHashes: true}); +})(); diff --git a/jstests/replsets/libs/rename_across_dbs.js b/jstests/replsets/libs/rename_across_dbs.js index 59b0fba4e14..ed7b9c77ba2 100644 --- a/jstests/replsets/libs/rename_across_dbs.js +++ b/jstests/replsets/libs/rename_across_dbs.js @@ -165,6 +165,7 @@ var RenameAcrossDatabasesTest = function(options) { _testLog('Checking oplogs and dbhashes after renaming collection.'); replTest.awaitReplication(); replTest.checkOplogs(testName); + replTest.checkPreImageCollection(testName); replTest.checkReplicatedDataHashes(testName); _testLog('Test completed. Stopping replica set.'); diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp index 24c008a2df2..db0575cd072 100644 --- a/src/mongo/db/repl/dbcheck.cpp +++ b/src/mongo/db/repl/dbcheck.cpp @@ -203,10 +203,14 @@ std::unique_ptr<HealthLogEntry> dbCheckBatchEntry( if (hashesMatch) { return SeverityEnum::Info; } - // Implicitly replicated collections and capped collections not replicating truncation are - // not designed to be consistent, so inconsistency is not necessarily pathological. + // We relax inconsistency checks for some collections to a simple warning in some cases. + // preimages and change collections may be using untimestamped truncates on each node + // independently and can easily be inconsistent. In addition, by design + // the image_collection can skip a write during steady-state replication, and the preimages + // collection can be inconsistent during logical initial sync, all of which is + // harmless. if (nss.isChangeStreamPreImagesCollection() || nss.isConfigImagesCollection() || - (options && options->capped)) { + nss.isChangeCollection() || (options && options->capped)) { return SeverityEnum::Warning; } diff --git a/src/mongo/shell/data_consistency_checker.js b/src/mongo/shell/data_consistency_checker.js index 96e28902e9c..6f765dbce1f 100644 --- a/src/mongo/shell/data_consistency_checker.js +++ b/src/mongo/shell/data_consistency_checker.js @@ -253,6 +253,12 @@ var {DataConsistencyChecker} = (function() { } static canIgnoreCollectionDiff(sourceCollInfos, syncingCollInfos, collName) { + if (collName === "system.preimages") { + print(`Ignoring hash inconsistencies for 'system.preimages' as those can be ` + + `expected with independent truncates. Content is checked separately by ` + + `ReplSetTest.checkPreImageCollection`); + return true; + } if (collName !== "image_collection") { return false; } @@ -407,15 +413,18 @@ var {DataConsistencyChecker} = (function() { // Although rare, the 'config.image_collection' table can be inconsistent after // an initial sync or after a restart (see SERVER-60048). Dump the collection // diff anyways for more visibility as a sanity check. + // + // 'config.system.preimages' can potentially be inconsistent via hashes, + // there's a special process that verifies them with + // ReplSetTest.checkPreImageCollection so it is safe to ignore failures here. this.dumpCollectionDiff( collectionPrinted, sourceCollInfos, syncingCollInfos, coll.name); const shouldIgnoreFailure = this.canIgnoreCollectionDiff(sourceCollInfos, syncingCollInfos, coll.name); if (shouldIgnoreFailure) { - prettyPrint( - `Collection diff in ${dbName}.${coll.name} can be ignored: ${dbHashesMsg} - . Inconsistencies in the image collection can be expected in certain - restart scenarios.`); + prettyPrint(`Collection diff in ${dbName}.${coll.name} can be ignored: ` + + `${dbHashesMsg}. Inconsistencies in the collection can be ` + + `expected in certain scenarios.`); } success = shouldIgnoreFailure && success; didIgnoreFailure = shouldIgnoreFailure || didIgnoreFailure; @@ -593,10 +602,11 @@ var {DataConsistencyChecker} = (function() { dbHashesMsg}`); if (didIgnoreFailure) { // We only expect database hash mismatches on the config db, where - // config.image_collection is expected to have inconsistencies in certain - // scenarios. - prettyPrint(`Ignoring hash mismatch for the ${dbName} database since - inconsistencies in 'config.image_collection' can be expected`); + // config.image_collection and config.system.preimages are expected to have + // inconsistencies in certain scenarios. + prettyPrint(`Ignoring hash mismatch for the ${dbName} database since ` + + `inconsistencies in 'config.image_collection' or ` + + `'config.system.preimages' can be expected`); return success; } success = false; diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index 45ea0ccca29..74a4adfed01 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -2548,82 +2548,76 @@ var ReplSetTest = function(opts) { this.checkReplicaSet(checkOplogs, liveSecondaries, this, liveSecondaries, msgPrefix); }; - /** - * Check oplogs on all nodes, by reading from the last time. Since the oplog is a capped - * collection, each node may not contain the same number of entries and stop if the cursor - * is exhausted on any node being checked. - */ - function checkOplogs(rst, secondaries, msgPrefix = 'checkOplogs') { - secondaries = secondaries || rst._secondaries; - const kCappedPositionLostSentinel = Object.create(null); - const OplogReader = function(mongo) { - this._safelyPerformCursorOperation = function(name, operation, onCappedPositionLost) { - if (!this.cursor) { - throw new Error("OplogReader is not open!"); - } + const ReverseReader = function(mongo, coll, query) { + this.kCappedPositionLostSentinel = Object.create(null); - if (this._cursorExhausted) { - return onCappedPositionLost; - } + this._safelyPerformCursorOperation = function(name, operation, onCappedPositionLost) { + if (!this.cursor) { + throw new Error("ReverseReader is not open!"); + } - try { - return operation(this.cursor); - } catch (err) { - print("Error: " + name + " threw '" + err.message + "' on " + this.mongo.host); - // Occasionally, the capped collection will get truncated while we are iterating - // over it. Since we are iterating over the collection in reverse, getting a - // truncated item means we've reached the end of the list, so return false. - if (err.code === ErrorCodes.CappedPositionLost) { - this.cursor.close(); - this._cursorExhausted = true; - return onCappedPositionLost; - } + if (this._cursorExhausted) { + return onCappedPositionLost; + } - throw err; + try { + return operation(this.cursor); + } catch (err) { + print("Error: " + name + " threw '" + err.message + "' on " + this.mongo.host); + // Occasionally, the capped collection will get truncated while we are iterating + // over it. Since we are iterating over the collection in reverse, getting a + // truncated item means we've reached the end of the list, so return false. + if (err.code === ErrorCodes.CappedPositionLost) { + this.cursor.close(); + this._cursorExhausted = true; + return onCappedPositionLost; } - }; - - this.next = function() { - return this._safelyPerformCursorOperation('next', function(cursor) { - return cursor.next(); - }, kCappedPositionLostSentinel); - }; - this.hasNext = function() { - return this._safelyPerformCursorOperation('hasNext', function(cursor) { - return cursor.hasNext(); - }, false); - }; + throw err; + } + }; - this.query = function(ts) { - const coll = this.getOplogColl(); - const query = {ts: {$gte: ts ? ts : new Timestamp()}}; - // Set the cursor to read backwards, from last to first. We also set the cursor not - // to time out since it may take a while to process each batch and a test may have - // changed "cursorTimeoutMillis" to a short time period. - this._cursorExhausted = false; - this.cursor = - coll.find(query).sort({$natural: -1}).noCursorTimeout().readConcern("local"); - }; + this.next = function() { + return this._safelyPerformCursorOperation('next', function(cursor) { + return cursor.next(); + }, this.kCappedPositionLostSentinel); + }; - this.getFirstDoc = function() { - return this.getOplogColl() - .find() - .sort({$natural: 1}) - .readConcern("local") - .limit(-1) - .next(); - }; + this.hasNext = function() { + return this._safelyPerformCursorOperation('hasNext', function(cursor) { + return cursor.hasNext(); + }, false); + }; - this.getOplogColl = function() { - return this.mongo.getDB("local")[oplogName]; - }; + this.query = function() { + // Set the cursor to read backwards, from last to first. We also set the cursor not + // to time out since it may take a while to process each batch and a test may have + // changed "cursorTimeoutMillis" to a short time period. + this._cursorExhausted = false; + this.cursor = coll.find(query) + .sort({$natural: -1}) + .noCursorTimeout() + .readConcern("local") + .limit(-1); + }; - this.cursor = null; - this._cursorExhausted = true; - this.mongo = mongo; + this.getFirstDoc = function() { + return coll.find(query).sort({$natural: 1}).readConcern("local").limit(-1).next(); }; + this.cursor = null; + this._cursorExhausted = true; + this.mongo = mongo; + }; + + /** + * Check oplogs on all nodes, by reading from the last time. Since the oplog is a capped + * collection, each node may not contain the same number of entries and stop if the cursor + * is exhausted on any node being checked. + */ + function checkOplogs(rst, secondaries, msgPrefix = 'checkOplogs') { + secondaries = secondaries || rst._secondaries; + function assertOplogEntriesEq(oplogEntry0, oplogEntry1, reader0, reader1, prevOplogEntry) { if (!bsonBinaryEqual(oplogEntry0, oplogEntry1)) { const query = prevOplogEntry ? {ts: {$lte: prevOplogEntry.ts}} : {}; @@ -2658,7 +2652,8 @@ var ReplSetTest = function(opts) { } print("checkOplogs going to check oplog of node: " + node.host); - readers[i] = new OplogReader(node); + readers[i] = new ReverseReader( + node, node.getDB("local")[oplogName], {ts: {$gte: new Timestamp()}}); const currTS = readers[i].getFirstDoc().ts; // Find the reader which has the smallestTS. This reader should have the most // number of documents in the oplog. @@ -2682,7 +2677,7 @@ var ReplSetTest = function(opts) { while (firstReader.hasNext()) { const oplogEntry = firstReader.next(); bytesSinceGC += Object.bsonsize(oplogEntry); - if (oplogEntry === kCappedPositionLostSentinel) { + if (oplogEntry === firstReader.kCappedPositionLostSentinel) { // When using legacy OP_QUERY/OP_GET_MORE reads against mongos, it is // possible for hasNext() to return true but for next() to throw an exception. break; @@ -2697,7 +2692,8 @@ var ReplSetTest = function(opts) { const otherOplogEntry = readers[i].next(); bytesSinceGC += Object.bsonsize(otherOplogEntry); - if (otherOplogEntry && otherOplogEntry !== kCappedPositionLostSentinel) { + if (otherOplogEntry && + otherOplogEntry !== readers[i].kCappedPositionLostSentinel) { assertOplogEntriesEq.call(this, oplogEntry, otherOplogEntry, @@ -2717,6 +2713,108 @@ var ReplSetTest = function(opts) { print("checkOplogs oplog checks complete."); } + function getPreImageReaders(msgPrefix, rst, secondaries, nsUUID) { + const readers = []; + const nodes = rst.nodes; + for (let i = 0; i < nodes.length; i++) { + const node = nodes[i]; + + if (rst._primary !== node && !secondaries.includes(node)) { + print(`${msgPrefix} -- skipping preimages of node as it's not in our list of ` + + `secondaries: ${node.host}`); + continue; + } + + // Arbiters have no documents in the oplog and thus don't have preimages + // content. + if (isNodeArbiter(node)) { + jsTestLog(`${msgPrefix} -- skipping preimages of arbiter node: ${node.host}`); + continue; + } + + print(`${msgPrefix} -- going to check preimages of ${nsUUID} of node: ${node.host}`); + readers[i] = new ReverseReader( + node, node.getDB("config")["system.preimages"], {"_id.nsUUID": nsUUID}); + // Start all reverseReaders at their last document for the collection. + readers[i].query(); + } + + return readers; + } + + /** + * Check preimages on all nodes, by reading reading from the last time. Since the preimage may + * or may not be maintained independently, each node may not contain the same number of entries + * and stop if the cursor is exhausted on any node being checked. + */ + function checkPreImageCollection(rst, secondaries, msgPrefix = 'checkPreImageCollection') { + secondaries = secondaries || rst._secondaries; + + print(`${msgPrefix} -- starting preimage checks.`); + print(`${msgPrefix} -- waiting for secondaries to be ready.`); + this.awaitSecondaryNodes(self.kDefaultTimeoutMS, secondaries); + if (secondaries.length >= 1) { + let collectionsWithPreimages = {}; + const nodes = rst.nodes; + for (let i = 0; i < nodes.length; i++) { + const node = nodes[i]; + + if (rst._primary !== node && !secondaries.includes(node)) { + print(`${msgPrefix} -- skipping preimages of node as it's not in our list of ` + + `secondaries: ${node.host}`); + continue; + } + + // Arbiters have no documents in the oplog and thus don't have preimages content. + if (isNodeArbiter(node)) { + jsTestLog(`${msgPrefix} -- skipping preimages of arbiter node: ${node.host}`); + continue; + } + + const preImageColl = node.getDB("config")["system.preimages"]; + // Reset connection preferences in case the test has modified them. + preImageColl.getMongo().setSecondaryOk(true); + preImageColl.getMongo().setReadPref(rst._primary === node ? "primary" + : "secondary"); + + // Find all collections participating in pre-images. + const collectionsInPreimages = + preImageColl.aggregate([{$group: {_id: "$_id.nsUUID"}}]).toArray(); + for (const collTs of collectionsInPreimages) { + collectionsWithPreimages[collTs._id] = collTs._id; + } + } + for (const nsUUID of Object.values(collectionsWithPreimages)) { + const readers = getPreImageReaders(msgPrefix, rst, secondaries, nsUUID); + + while (true) { + let preImageEntryToCompare = undefined; + for (const reader of readers) { + if (reader.hasNext()) { + const preImageEntry = reader.next(); + if (preImageEntryToCompare === undefined) { + preImageEntryToCompare = preImageEntry; + } else { + assert(bsonBinaryEqual(preImageEntryToCompare, preImageEntry), + `Detected preimage entries that have different content`); + } + } + } + if (preImageEntryToCompare === undefined) { + break; + } + } + } + } + print(`${msgPrefix} -- preimages check complete.`); + } + + this.checkPreImageCollection = function(msgPrefix) { + var liveSecondaries = _determineLiveSecondaries(); + this.checkReplicaSet( + checkPreImageCollection, liveSecondaries, this, liveSecondaries, msgPrefix); + }; + /** * Waits for an initial connection to a given node. Should only be called after the node's * process has already been started. Updates the corresponding entry in 'this.nodes' with the @@ -3158,6 +3256,8 @@ var ReplSetTest = function(opts) { // refuses to log in live connections if some secondaries are down. print("ReplSetTest stopSet checking oplogs."); asCluster(this._liveNodes, () => this.checkOplogs()); + print("ReplSetTest stopSet checking preimages."); + asCluster(this._liveNodes, () => this.checkPreImageCollection()); print("ReplSetTest stopSet checking replicated data hashes."); asCluster(this._liveNodes, () => this.checkReplicatedDataHashes()); } else { |