diff options
author | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2020-09-29 15:15:01 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-09-29 16:25:32 +0000 |
commit | 1042e3d11c01daffc385a3c9b9af66eb98bf6162 (patch) | |
tree | 4e4676fabc11baf69a14eb659b703aadb1fcbf77 | |
parent | cd0315d1b733a4d94fe1e26d6ccddb8b625b711f (diff) | |
download | mongo-1042e3d11c01daffc385a3c9b9af66eb98bf6162.tar.gz |
SERVER-51086 Extract collection diffing logic into separate utility.
Adds separate DataConsistencyChecker.getDiff() function to allow
comparing the contents of different collections.
-rw-r--r-- | jstests/noPassthrough/diff_different_collections_test.js | 75 | ||||
-rw-r--r-- | jstests/noPassthrough/diff_using_sessions_test.js | 63 | ||||
-rw-r--r-- | src/mongo/shell/data_consistency_checker.js | 157 | ||||
-rw-r--r-- | src/mongo/shell/replsettest.js | 88 |
4 files changed, 271 insertions, 112 deletions
diff --git a/jstests/noPassthrough/diff_different_collections_test.js b/jstests/noPassthrough/diff_different_collections_test.js new file mode 100644 index 00000000000..49a8f335011 --- /dev/null +++ b/jstests/noPassthrough/diff_different_collections_test.js @@ -0,0 +1,75 @@ +/** + * Tests the DataConsistencyChecker.getDiff() function can be used to compare the contents between + * different collections. + */ +(function() { +"use strict"; + +const rst = new ReplSetTest({nodes: 2}); +rst.startSet(); +rst.initiate(); + +const dbName = "diff_different_collections_test"; +const collName1 = "coll_one"; +const collName2 = "coll_two"; + +const primaryDB = rst.getPrimary().getDB(dbName); + +const matchingDocs = Array.from({length: 100}, (_, i) => ({_id: i, num: i * 2})); +assert.commandWorked(primaryDB[collName1].insert(matchingDocs)); +assert.commandWorked(primaryDB[collName2].insert(matchingDocs)); + +let diff = DataConsistencyChecker.getDiff(primaryDB[collName1].find().sort({_id: 1}), + primaryDB[collName2].find().sort({_id: 1})); + +assert.eq(diff, {docsWithDifferentContents: [], docsMissingOnFirst: [], docsMissingOnSecond: []}); + +const expectedMissingOnSecond = [{_id: 30.2, num: -1}, {_id: 70.4, num: -2}]; +const expectedMissingOnFirst = [{_id: 10, num: 20}, {_id: 50, num: 100}]; + +assert.commandWorked(primaryDB[collName1].insert(expectedMissingOnSecond)); +assert.commandWorked(primaryDB[collName1].remove( + {_id: {$in: expectedMissingOnFirst.map(doc => doc._id)}}, {justOne: false})); +assert.commandWorked( + primaryDB[collName1].update({_id: {$in: [40, 90]}}, {$set: {extra: "yes"}}, {multi: true})); + +// Type fidelity is expected to be preserved by replication so intentionally test comparisons of +// distinct but equivalent BSON types. +assert.commandWorked(primaryDB[collName1].update({_id: 2}, {$set: {num: NumberLong(4)}})); + +diff = DataConsistencyChecker.getDiff(primaryDB[collName1].find().sort({_id: 1}), + primaryDB[collName2].find().sort({_id: 1})); + +assert.eq(diff, + { + docsWithDifferentContents: [ + {first: {_id: 2, num: NumberLong(4)}, second: {_id: 2, num: 4}}, + {first: {_id: 40, num: 80, extra: "yes"}, second: {_id: 40, num: 80}}, + {first: {_id: 90, num: 180, extra: "yes"}, second: {_id: 90, num: 180}}, + ], + docsMissingOnFirst: expectedMissingOnFirst, + docsMissingOnSecond: expectedMissingOnSecond + }, + "actual mismatch between collections differed"); + +// It is also possible to compare the contents of different collections across different servers. +rst.awaitReplication(); +const secondaryDB = rst.getSecondary().getDB(dbName); + +diff = DataConsistencyChecker.getDiff(primaryDB[collName1].find().sort({_id: 1}), + secondaryDB[collName2].find().sort({_id: 1})); + +assert.eq(diff, + { + docsWithDifferentContents: [ + {first: {_id: 2, num: NumberLong(4)}, second: {_id: 2, num: 4}}, + {first: {_id: 40, num: 80, extra: "yes"}, second: {_id: 40, num: 80}}, + {first: {_id: 90, num: 180, extra: "yes"}, second: {_id: 90, num: 180}}, + ], + docsMissingOnFirst: expectedMissingOnFirst, + docsMissingOnSecond: expectedMissingOnSecond + }, + "actual mismatch between servers differed"); + +rst.stopSet(); +})(); diff --git a/jstests/noPassthrough/diff_using_sessions_test.js b/jstests/noPassthrough/diff_using_sessions_test.js new file mode 100644 index 00000000000..c52bb2928b7 --- /dev/null +++ b/jstests/noPassthrough/diff_using_sessions_test.js @@ -0,0 +1,63 @@ +/** + * Tests the ReplSetTest#getCollectionDiffUsingSessions() method for comparing the contents between + * a primary and secondary server. + */ +(function() { +"use strict"; + +const rst = new ReplSetTest({nodes: 2}); +rst.startSet(); +rst.initiate(); + +const dbName = "diff_using_session_test"; +const collName = "mycoll"; + +const primaryDB = rst.getPrimary().startSession().getDatabase(dbName); +const secondaryDB = rst.getSecondary().startSession().getDatabase(dbName); + +assert.commandWorked(primaryDB[collName].insert( + Array.from({length: 100}, (_, i) => ({_id: i, num: i * 2})), {writeConcern: {w: 2}})); + +// There should be no missing or mismatched documents after having waited for replication. +let diff = rst.getCollectionDiffUsingSessions( + primaryDB.getSession(), secondaryDB.getSession(), dbName, collName); + +assert.eq(diff, + {docsWithDifferentContents: [], docsMissingOnPrimary: [], docsMissingOnSecondary: []}); + +// We pause replication on the secondary to intentionally cause the contents between the primary and +// the secondary to differ. +assert.commandWorked( + secondaryDB.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "alwaysOn"})); + +const expectedMissingOnSecondary = [{_id: 30.2, num: -1}, {_id: 70.4, num: -2}]; +const expectedMissingOnPrimary = [{_id: 10, num: 20}, {_id: 50, num: 100}]; + +assert.commandWorked(primaryDB[collName].insert(expectedMissingOnSecondary)); +assert.commandWorked(primaryDB[collName].remove( + {_id: {$in: expectedMissingOnPrimary.map(doc => doc._id)}}, {justOne: false})); +assert.commandWorked( + primaryDB[collName].update({_id: {$in: [40, 90]}}, {$set: {extra: "yes"}}, {multi: true})); + +// Type fidelity is expected to be preserved by replication so intentionally test comparisons of +// distinct but equivalent BSON types. +assert.commandWorked(primaryDB[collName].update({_id: 2}, {$set: {num: NumberLong(4)}})); + +diff = rst.getCollectionDiffUsingSessions( + primaryDB.getSession(), secondaryDB.getSession(), dbName, collName); + +assert.eq(diff, { + docsWithDifferentContents: [ + {primary: {_id: 2, num: NumberLong(4)}, secondary: {_id: 2, num: 4}}, + {primary: {_id: 40, num: 80, extra: "yes"}, secondary: {_id: 40, num: 80}}, + {primary: {_id: 90, num: 180, extra: "yes"}, secondary: {_id: 90, num: 180}}, + ], + docsMissingOnPrimary: expectedMissingOnPrimary, + docsMissingOnSecondary: expectedMissingOnSecondary +}); + +assert.commandWorked( + secondaryDB.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"})); + +rst.stopSet(); +})(); diff --git a/src/mongo/shell/data_consistency_checker.js b/src/mongo/shell/data_consistency_checker.js index 805fed61655..b3a4f9153a9 100644 --- a/src/mongo/shell/data_consistency_checker.js +++ b/src/mongo/shell/data_consistency_checker.js @@ -93,46 +93,133 @@ var CollInfos = class { } }; -var DataConsistencyChecker = class { - static dumpCollectionDiff( - rst, collectionPrinted, primaryCollInfos, secondaryCollInfos, collName) { - print('Dumping collection: ' + primaryCollInfos.ns(collName)); - - const primaryExists = primaryCollInfos.print(collectionPrinted, collName); - const secondaryExists = secondaryCollInfos.print(collectionPrinted, collName); - - if (!primaryExists || !secondaryExists) { - print(`Skipping checking collection differences for ${ - primaryCollInfos.ns(collName)} since it does not exist on primary and secondary`); - return; +var {DataConsistencyChecker} = (function() { + "use strict"; + + class PeekableCursor { + constructor(cursor) { + this.cursor = cursor; + this.stashedDoc = undefined; } - const primary = primaryCollInfos.conn; - const secondary = secondaryCollInfos.conn; - - const primarySession = primary.getDB('test').getSession(); - const secondarySession = secondary.getDB('test').getSession(); - const diff = rst.getCollectionDiffUsingSessions( - primarySession, secondarySession, primaryCollInfos.dbName, collName); - - for (let { - primary: primaryDoc, - secondary: secondaryDoc, - } of diff.docsWithDifferentContents) { - print(`Mismatching documents between the primary ${primary.host}` + - ` and the secondary ${secondary.host}:`); - print(' primary: ' + tojsononeline(primaryDoc)); - print(' secondary: ' + tojsononeline(secondaryDoc)); + hasNext() { + return this.cursor.hasNext(); } - if (diff.docsMissingOnPrimary.length > 0) { - print(`The following documents are missing on the primary ${primary.host}:`); - print(diff.docsMissingOnPrimary.map(doc => tojsononeline(doc)).join('\n')); + peekNext() { + if (this.stashedDoc === undefined) { + this.stashedDoc = this.cursor.next(); + } + return this.stashedDoc; } - if (diff.docsMissingOnSecondary.length > 0) { - print(`The following documents are missing on the secondary ${secondary.host}:`); - print(diff.docsMissingOnSecondary.map(doc => tojsononeline(doc)).join('\n')); + next() { + const result = (this.stashedDoc === undefined) ? this.cursor.next() : this.stashedDoc; + this.stashedDoc = undefined; + return result; } } -}; + + class DataConsistencyChecker { + static getDiff(cursor1, cursor2) { + const docsWithDifferentContents = []; + const docsMissingOnFirst = []; + const docsMissingOnSecond = []; + + cursor1 = new PeekableCursor(cursor1); + cursor2 = new PeekableCursor(cursor2); + + while (cursor1.hasNext() && cursor2.hasNext()) { + const doc1 = cursor1.peekNext(); + const doc2 = cursor2.peekNext(); + + if (bsonBinaryEqual(doc1, doc2)) { + // The same document was found from both cursor1 and cursor2 so we just move + // on to the next document for both cursors. + cursor1.next(); + cursor2.next(); + continue; + } + + const ordering = bsonWoCompare({_: doc1._id}, {_: doc2._id}); + if (ordering === 0) { + // The documents have the same _id but have different contents. + docsWithDifferentContents.push({first: doc1, second: doc2}); + cursor1.next(); + cursor2.next(); + } else if (ordering < 0) { + // The cursor1's next document has a smaller _id than the cursor2's next + // document. Since we are iterating the documents in ascending order by their + // _id, we'll never see a document with 'doc1._id' from cursor2. + docsMissingOnSecond.push(doc1); + cursor1.next(); + } else if (ordering > 0) { + // The cursor1's next document has a larger _id than the cursor2's next + // document. Since we are iterating the documents in ascending order by their + // _id, we'll never see a document with 'doc2._id' from cursor1. + docsMissingOnFirst.push(doc2); + cursor2.next(); + } + } + + while (cursor1.hasNext()) { + // We've exhausted cursor2 already, so everything remaining from cursor1 must be + // missing from cursor2. + docsMissingOnSecond.push(cursor1.next()); + } + + while (cursor2.hasNext()) { + // We've exhausted cursor1 already, so everything remaining from cursor2 must be + // missing from cursor1. + docsMissingOnFirst.push(cursor2.next()); + } + + return {docsWithDifferentContents, docsMissingOnFirst, docsMissingOnSecond}; + } + + static dumpCollectionDiff( + rst, collectionPrinted, primaryCollInfos, secondaryCollInfos, collName) { + print('Dumping collection: ' + primaryCollInfos.ns(collName)); + + const primaryExists = primaryCollInfos.print(collectionPrinted, collName); + const secondaryExists = secondaryCollInfos.print(collectionPrinted, collName); + + if (!primaryExists || !secondaryExists) { + print(`Skipping checking collection differences for ${ + primaryCollInfos.ns( + collName)} since it does not exist on primary and secondary`); + return; + } + + const primary = primaryCollInfos.conn; + const secondary = secondaryCollInfos.conn; + + const primarySession = primary.getDB('test').getSession(); + const secondarySession = secondary.getDB('test').getSession(); + const diff = rst.getCollectionDiffUsingSessions( + primarySession, secondarySession, primaryCollInfos.dbName, collName); + + for (let { + primary: primaryDoc, + secondary: secondaryDoc, + } of diff.docsWithDifferentContents) { + print(`Mismatching documents between the primary ${primary.host}` + + ` and the secondary ${secondary.host}:`); + print(' primary: ' + tojsononeline(primaryDoc)); + print(' secondary: ' + tojsononeline(secondaryDoc)); + } + + if (diff.docsMissingOnPrimary.length > 0) { + print(`The following documents are missing on the primary ${primary.host}:`); + print(diff.docsMissingOnPrimary.map(doc => tojsononeline(doc)).join('\n')); + } + + if (diff.docsMissingOnSecondary.length > 0) { + print(`The following documents are missing on the secondary ${secondary.host}:`); + print(diff.docsMissingOnSecondary.map(doc => tojsononeline(doc)).join('\n')); + } + } + } + + return {DataConsistencyChecker}; +})(); diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index e953827c3af..4d30ea9a319 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -2161,87 +2161,21 @@ var ReplSetTest = function(opts) { this.getCollectionDiffUsingSessions = function( primarySession, secondarySession, dbName, collNameOrUUID) { - function PeekableCursor(cursor) { - let _stashedDoc; - - this.hasNext = function hasNext() { - return cursor.hasNext(); - }; - - this.peekNext = function peekNext() { - if (_stashedDoc === undefined) { - _stashedDoc = cursor.next(); - } - return _stashedDoc; - }; - - this.next = function next() { - const result = (_stashedDoc === undefined) ? cursor.next() : _stashedDoc; - _stashedDoc = undefined; - return result; - }; - } - - const docsWithDifferentContents = []; - const docsMissingOnPrimary = []; - const docsMissingOnSecondary = []; - const primaryDB = primarySession.getDatabase(dbName); const secondaryDB = secondarySession.getDatabase(dbName); const commandObj = {find: collNameOrUUID, sort: {_id: 1}}; - const primaryCursor = - new PeekableCursor(new DBCommandCursor(primaryDB, primaryDB.runCommand(commandObj))); - - const secondaryCursor = new PeekableCursor( - new DBCommandCursor(secondaryDB, secondaryDB.runCommand(commandObj))); - - while (primaryCursor.hasNext() && secondaryCursor.hasNext()) { - const primaryDoc = primaryCursor.peekNext(); - const secondaryDoc = secondaryCursor.peekNext(); - - if (bsonBinaryEqual(primaryDoc, secondaryDoc)) { - // The same document was found on the primary and secondary so we just move on to - // the next document for both cursors. - primaryCursor.next(); - secondaryCursor.next(); - continue; - } - - const ordering = bsonWoCompare({_: primaryDoc._id}, {_: secondaryDoc._id}); - if (ordering === 0) { - // The documents have the same _id but have different contents. - docsWithDifferentContents.push({primary: primaryDoc, secondary: secondaryDoc}); - primaryCursor.next(); - secondaryCursor.next(); - } else if (ordering < 0) { - // The primary's next document has a smaller _id than the secondary's next document. - // Since we are iterating the documents in ascending order by their _id, we'll never - // see a document with 'primaryDoc._id' on the secondary. - docsMissingOnSecondary.push(primaryDoc); - primaryCursor.next(); - } else if (ordering > 0) { - // The primary's next document has a larger _id than the secondary's next document. - // Since we are iterating the documents in ascending order by their _id, we'll never - // see a document with 'secondaryDoc._id' on the primary. - docsMissingOnPrimary.push(secondaryDoc); - secondaryCursor.next(); - } - } - - while (primaryCursor.hasNext()) { - // We've exhausted the secondary's cursor already, so everything remaining from the - // primary's cursor must be missing from secondary. - docsMissingOnSecondary.push(primaryCursor.next()); - } - - while (secondaryCursor.hasNext()) { - // We've exhausted the primary's cursor already, so everything remaining from the - // secondary's cursor must be missing from primary. - docsMissingOnPrimary.push(secondaryCursor.next()); - } - - return {docsWithDifferentContents, docsMissingOnPrimary, docsMissingOnSecondary}; + const primaryCursor = new DBCommandCursor(primaryDB, primaryDB.runCommand(commandObj)); + const secondaryCursor = + new DBCommandCursor(secondaryDB, secondaryDB.runCommand(commandObj)); + const diff = DataConsistencyChecker.getDiff(primaryCursor, secondaryCursor); + + return { + docsWithDifferentContents: diff.docsWithDifferentContents.map( + ({first, second}) => ({primary: first, secondary: second})), + docsMissingOnPrimary: diff.docsMissingOnFirst, + docsMissingOnSecondary: diff.docsMissingOnSecond + }; }; // Gets the dbhash for the current primary and for all secondaries (or the members of |