summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Hirschhorn <max.hirschhorn@mongodb.com>2020-09-29 15:15:01 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-09-29 16:25:32 +0000
commit1042e3d11c01daffc385a3c9b9af66eb98bf6162 (patch)
tree4e4676fabc11baf69a14eb659b703aadb1fcbf77
parentcd0315d1b733a4d94fe1e26d6ccddb8b625b711f (diff)
downloadmongo-1042e3d11c01daffc385a3c9b9af66eb98bf6162.tar.gz
SERVER-51086 Extract collection diffing logic into separate utility.
Adds separate DataConsistencyChecker.getDiff() function to allow comparing the contents of different collections.
-rw-r--r--jstests/noPassthrough/diff_different_collections_test.js75
-rw-r--r--jstests/noPassthrough/diff_using_sessions_test.js63
-rw-r--r--src/mongo/shell/data_consistency_checker.js157
-rw-r--r--src/mongo/shell/replsettest.js88
4 files changed, 271 insertions, 112 deletions
diff --git a/jstests/noPassthrough/diff_different_collections_test.js b/jstests/noPassthrough/diff_different_collections_test.js
new file mode 100644
index 00000000000..49a8f335011
--- /dev/null
+++ b/jstests/noPassthrough/diff_different_collections_test.js
@@ -0,0 +1,75 @@
+/**
+ * Tests the DataConsistencyChecker.getDiff() function can be used to compare the contents between
+ * different collections.
+ */
+(function() {
+"use strict";
+
+const rst = new ReplSetTest({nodes: 2});
+rst.startSet();
+rst.initiate();
+
+const dbName = "diff_different_collections_test";
+const collName1 = "coll_one";
+const collName2 = "coll_two";
+
+const primaryDB = rst.getPrimary().getDB(dbName);
+
+const matchingDocs = Array.from({length: 100}, (_, i) => ({_id: i, num: i * 2}));
+assert.commandWorked(primaryDB[collName1].insert(matchingDocs));
+assert.commandWorked(primaryDB[collName2].insert(matchingDocs));
+
+let diff = DataConsistencyChecker.getDiff(primaryDB[collName1].find().sort({_id: 1}),
+ primaryDB[collName2].find().sort({_id: 1}));
+
+assert.eq(diff, {docsWithDifferentContents: [], docsMissingOnFirst: [], docsMissingOnSecond: []});
+
+const expectedMissingOnSecond = [{_id: 30.2, num: -1}, {_id: 70.4, num: -2}];
+const expectedMissingOnFirst = [{_id: 10, num: 20}, {_id: 50, num: 100}];
+
+assert.commandWorked(primaryDB[collName1].insert(expectedMissingOnSecond));
+assert.commandWorked(primaryDB[collName1].remove(
+ {_id: {$in: expectedMissingOnFirst.map(doc => doc._id)}}, {justOne: false}));
+assert.commandWorked(
+ primaryDB[collName1].update({_id: {$in: [40, 90]}}, {$set: {extra: "yes"}}, {multi: true}));
+
+// Type fidelity is expected to be preserved by replication so intentionally test comparisons of
+// distinct but equivalent BSON types.
+assert.commandWorked(primaryDB[collName1].update({_id: 2}, {$set: {num: NumberLong(4)}}));
+
+diff = DataConsistencyChecker.getDiff(primaryDB[collName1].find().sort({_id: 1}),
+ primaryDB[collName2].find().sort({_id: 1}));
+
+assert.eq(diff,
+ {
+ docsWithDifferentContents: [
+ {first: {_id: 2, num: NumberLong(4)}, second: {_id: 2, num: 4}},
+ {first: {_id: 40, num: 80, extra: "yes"}, second: {_id: 40, num: 80}},
+ {first: {_id: 90, num: 180, extra: "yes"}, second: {_id: 90, num: 180}},
+ ],
+ docsMissingOnFirst: expectedMissingOnFirst,
+ docsMissingOnSecond: expectedMissingOnSecond
+ },
+ "actual mismatch between collections differed");
+
+// It is also possible to compare the contents of different collections across different servers.
+rst.awaitReplication();
+const secondaryDB = rst.getSecondary().getDB(dbName);
+
+diff = DataConsistencyChecker.getDiff(primaryDB[collName1].find().sort({_id: 1}),
+ secondaryDB[collName2].find().sort({_id: 1}));
+
+assert.eq(diff,
+ {
+ docsWithDifferentContents: [
+ {first: {_id: 2, num: NumberLong(4)}, second: {_id: 2, num: 4}},
+ {first: {_id: 40, num: 80, extra: "yes"}, second: {_id: 40, num: 80}},
+ {first: {_id: 90, num: 180, extra: "yes"}, second: {_id: 90, num: 180}},
+ ],
+ docsMissingOnFirst: expectedMissingOnFirst,
+ docsMissingOnSecond: expectedMissingOnSecond
+ },
+ "actual mismatch between servers differed");
+
+rst.stopSet();
+})();
diff --git a/jstests/noPassthrough/diff_using_sessions_test.js b/jstests/noPassthrough/diff_using_sessions_test.js
new file mode 100644
index 00000000000..c52bb2928b7
--- /dev/null
+++ b/jstests/noPassthrough/diff_using_sessions_test.js
@@ -0,0 +1,63 @@
+/**
+ * Tests the ReplSetTest#getCollectionDiffUsingSessions() method for comparing the contents between
+ * a primary and secondary server.
+ */
+(function() {
+"use strict";
+
+const rst = new ReplSetTest({nodes: 2});
+rst.startSet();
+rst.initiate();
+
+const dbName = "diff_using_session_test";
+const collName = "mycoll";
+
+const primaryDB = rst.getPrimary().startSession().getDatabase(dbName);
+const secondaryDB = rst.getSecondary().startSession().getDatabase(dbName);
+
+assert.commandWorked(primaryDB[collName].insert(
+ Array.from({length: 100}, (_, i) => ({_id: i, num: i * 2})), {writeConcern: {w: 2}}));
+
+// There should be no missing or mismatched documents after having waited for replication.
+let diff = rst.getCollectionDiffUsingSessions(
+ primaryDB.getSession(), secondaryDB.getSession(), dbName, collName);
+
+assert.eq(diff,
+ {docsWithDifferentContents: [], docsMissingOnPrimary: [], docsMissingOnSecondary: []});
+
+// We pause replication on the secondary to intentionally cause the contents between the primary and
+// the secondary to differ.
+assert.commandWorked(
+ secondaryDB.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "alwaysOn"}));
+
+const expectedMissingOnSecondary = [{_id: 30.2, num: -1}, {_id: 70.4, num: -2}];
+const expectedMissingOnPrimary = [{_id: 10, num: 20}, {_id: 50, num: 100}];
+
+assert.commandWorked(primaryDB[collName].insert(expectedMissingOnSecondary));
+assert.commandWorked(primaryDB[collName].remove(
+ {_id: {$in: expectedMissingOnPrimary.map(doc => doc._id)}}, {justOne: false}));
+assert.commandWorked(
+ primaryDB[collName].update({_id: {$in: [40, 90]}}, {$set: {extra: "yes"}}, {multi: true}));
+
+// Type fidelity is expected to be preserved by replication so intentionally test comparisons of
+// distinct but equivalent BSON types.
+assert.commandWorked(primaryDB[collName].update({_id: 2}, {$set: {num: NumberLong(4)}}));
+
+diff = rst.getCollectionDiffUsingSessions(
+ primaryDB.getSession(), secondaryDB.getSession(), dbName, collName);
+
+assert.eq(diff, {
+ docsWithDifferentContents: [
+ {primary: {_id: 2, num: NumberLong(4)}, secondary: {_id: 2, num: 4}},
+ {primary: {_id: 40, num: 80, extra: "yes"}, secondary: {_id: 40, num: 80}},
+ {primary: {_id: 90, num: 180, extra: "yes"}, secondary: {_id: 90, num: 180}},
+ ],
+ docsMissingOnPrimary: expectedMissingOnPrimary,
+ docsMissingOnSecondary: expectedMissingOnSecondary
+});
+
+assert.commandWorked(
+ secondaryDB.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}));
+
+rst.stopSet();
+})();
diff --git a/src/mongo/shell/data_consistency_checker.js b/src/mongo/shell/data_consistency_checker.js
index 805fed61655..b3a4f9153a9 100644
--- a/src/mongo/shell/data_consistency_checker.js
+++ b/src/mongo/shell/data_consistency_checker.js
@@ -93,46 +93,133 @@ var CollInfos = class {
}
};
-var DataConsistencyChecker = class {
- static dumpCollectionDiff(
- rst, collectionPrinted, primaryCollInfos, secondaryCollInfos, collName) {
- print('Dumping collection: ' + primaryCollInfos.ns(collName));
-
- const primaryExists = primaryCollInfos.print(collectionPrinted, collName);
- const secondaryExists = secondaryCollInfos.print(collectionPrinted, collName);
-
- if (!primaryExists || !secondaryExists) {
- print(`Skipping checking collection differences for ${
- primaryCollInfos.ns(collName)} since it does not exist on primary and secondary`);
- return;
+var {DataConsistencyChecker} = (function() {
+ "use strict";
+
+ class PeekableCursor {
+ constructor(cursor) {
+ this.cursor = cursor;
+ this.stashedDoc = undefined;
}
- const primary = primaryCollInfos.conn;
- const secondary = secondaryCollInfos.conn;
-
- const primarySession = primary.getDB('test').getSession();
- const secondarySession = secondary.getDB('test').getSession();
- const diff = rst.getCollectionDiffUsingSessions(
- primarySession, secondarySession, primaryCollInfos.dbName, collName);
-
- for (let {
- primary: primaryDoc,
- secondary: secondaryDoc,
- } of diff.docsWithDifferentContents) {
- print(`Mismatching documents between the primary ${primary.host}` +
- ` and the secondary ${secondary.host}:`);
- print(' primary: ' + tojsononeline(primaryDoc));
- print(' secondary: ' + tojsononeline(secondaryDoc));
+ hasNext() {
+ return this.cursor.hasNext();
}
- if (diff.docsMissingOnPrimary.length > 0) {
- print(`The following documents are missing on the primary ${primary.host}:`);
- print(diff.docsMissingOnPrimary.map(doc => tojsononeline(doc)).join('\n'));
+ peekNext() {
+ if (this.stashedDoc === undefined) {
+ this.stashedDoc = this.cursor.next();
+ }
+ return this.stashedDoc;
}
- if (diff.docsMissingOnSecondary.length > 0) {
- print(`The following documents are missing on the secondary ${secondary.host}:`);
- print(diff.docsMissingOnSecondary.map(doc => tojsononeline(doc)).join('\n'));
+ next() {
+ const result = (this.stashedDoc === undefined) ? this.cursor.next() : this.stashedDoc;
+ this.stashedDoc = undefined;
+ return result;
}
}
-};
+
+ class DataConsistencyChecker {
+ static getDiff(cursor1, cursor2) {
+ const docsWithDifferentContents = [];
+ const docsMissingOnFirst = [];
+ const docsMissingOnSecond = [];
+
+ cursor1 = new PeekableCursor(cursor1);
+ cursor2 = new PeekableCursor(cursor2);
+
+ while (cursor1.hasNext() && cursor2.hasNext()) {
+ const doc1 = cursor1.peekNext();
+ const doc2 = cursor2.peekNext();
+
+ if (bsonBinaryEqual(doc1, doc2)) {
+ // The same document was found from both cursor1 and cursor2 so we just move
+ // on to the next document for both cursors.
+ cursor1.next();
+ cursor2.next();
+ continue;
+ }
+
+ const ordering = bsonWoCompare({_: doc1._id}, {_: doc2._id});
+ if (ordering === 0) {
+ // The documents have the same _id but have different contents.
+ docsWithDifferentContents.push({first: doc1, second: doc2});
+ cursor1.next();
+ cursor2.next();
+ } else if (ordering < 0) {
+ // The cursor1's next document has a smaller _id than the cursor2's next
+ // document. Since we are iterating the documents in ascending order by their
+ // _id, we'll never see a document with 'doc1._id' from cursor2.
+ docsMissingOnSecond.push(doc1);
+ cursor1.next();
+ } else if (ordering > 0) {
+ // The cursor1's next document has a larger _id than the cursor2's next
+ // document. Since we are iterating the documents in ascending order by their
+ // _id, we'll never see a document with 'doc2._id' from cursor1.
+ docsMissingOnFirst.push(doc2);
+ cursor2.next();
+ }
+ }
+
+ while (cursor1.hasNext()) {
+ // We've exhausted cursor2 already, so everything remaining from cursor1 must be
+ // missing from cursor2.
+ docsMissingOnSecond.push(cursor1.next());
+ }
+
+ while (cursor2.hasNext()) {
+ // We've exhausted cursor1 already, so everything remaining from cursor2 must be
+ // missing from cursor1.
+ docsMissingOnFirst.push(cursor2.next());
+ }
+
+ return {docsWithDifferentContents, docsMissingOnFirst, docsMissingOnSecond};
+ }
+
+ static dumpCollectionDiff(
+ rst, collectionPrinted, primaryCollInfos, secondaryCollInfos, collName) {
+ print('Dumping collection: ' + primaryCollInfos.ns(collName));
+
+ const primaryExists = primaryCollInfos.print(collectionPrinted, collName);
+ const secondaryExists = secondaryCollInfos.print(collectionPrinted, collName);
+
+ if (!primaryExists || !secondaryExists) {
+ print(`Skipping checking collection differences for ${
+ primaryCollInfos.ns(
+ collName)} since it does not exist on primary and secondary`);
+ return;
+ }
+
+ const primary = primaryCollInfos.conn;
+ const secondary = secondaryCollInfos.conn;
+
+ const primarySession = primary.getDB('test').getSession();
+ const secondarySession = secondary.getDB('test').getSession();
+ const diff = rst.getCollectionDiffUsingSessions(
+ primarySession, secondarySession, primaryCollInfos.dbName, collName);
+
+ for (let {
+ primary: primaryDoc,
+ secondary: secondaryDoc,
+ } of diff.docsWithDifferentContents) {
+ print(`Mismatching documents between the primary ${primary.host}` +
+ ` and the secondary ${secondary.host}:`);
+ print(' primary: ' + tojsononeline(primaryDoc));
+ print(' secondary: ' + tojsononeline(secondaryDoc));
+ }
+
+ if (diff.docsMissingOnPrimary.length > 0) {
+ print(`The following documents are missing on the primary ${primary.host}:`);
+ print(diff.docsMissingOnPrimary.map(doc => tojsononeline(doc)).join('\n'));
+ }
+
+ if (diff.docsMissingOnSecondary.length > 0) {
+ print(`The following documents are missing on the secondary ${secondary.host}:`);
+ print(diff.docsMissingOnSecondary.map(doc => tojsononeline(doc)).join('\n'));
+ }
+ }
+ }
+
+ return {DataConsistencyChecker};
+})();
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index e953827c3af..4d30ea9a319 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -2161,87 +2161,21 @@ var ReplSetTest = function(opts) {
this.getCollectionDiffUsingSessions = function(
primarySession, secondarySession, dbName, collNameOrUUID) {
- function PeekableCursor(cursor) {
- let _stashedDoc;
-
- this.hasNext = function hasNext() {
- return cursor.hasNext();
- };
-
- this.peekNext = function peekNext() {
- if (_stashedDoc === undefined) {
- _stashedDoc = cursor.next();
- }
- return _stashedDoc;
- };
-
- this.next = function next() {
- const result = (_stashedDoc === undefined) ? cursor.next() : _stashedDoc;
- _stashedDoc = undefined;
- return result;
- };
- }
-
- const docsWithDifferentContents = [];
- const docsMissingOnPrimary = [];
- const docsMissingOnSecondary = [];
-
const primaryDB = primarySession.getDatabase(dbName);
const secondaryDB = secondarySession.getDatabase(dbName);
const commandObj = {find: collNameOrUUID, sort: {_id: 1}};
- const primaryCursor =
- new PeekableCursor(new DBCommandCursor(primaryDB, primaryDB.runCommand(commandObj)));
-
- const secondaryCursor = new PeekableCursor(
- new DBCommandCursor(secondaryDB, secondaryDB.runCommand(commandObj)));
-
- while (primaryCursor.hasNext() && secondaryCursor.hasNext()) {
- const primaryDoc = primaryCursor.peekNext();
- const secondaryDoc = secondaryCursor.peekNext();
-
- if (bsonBinaryEqual(primaryDoc, secondaryDoc)) {
- // The same document was found on the primary and secondary so we just move on to
- // the next document for both cursors.
- primaryCursor.next();
- secondaryCursor.next();
- continue;
- }
-
- const ordering = bsonWoCompare({_: primaryDoc._id}, {_: secondaryDoc._id});
- if (ordering === 0) {
- // The documents have the same _id but have different contents.
- docsWithDifferentContents.push({primary: primaryDoc, secondary: secondaryDoc});
- primaryCursor.next();
- secondaryCursor.next();
- } else if (ordering < 0) {
- // The primary's next document has a smaller _id than the secondary's next document.
- // Since we are iterating the documents in ascending order by their _id, we'll never
- // see a document with 'primaryDoc._id' on the secondary.
- docsMissingOnSecondary.push(primaryDoc);
- primaryCursor.next();
- } else if (ordering > 0) {
- // The primary's next document has a larger _id than the secondary's next document.
- // Since we are iterating the documents in ascending order by their _id, we'll never
- // see a document with 'secondaryDoc._id' on the primary.
- docsMissingOnPrimary.push(secondaryDoc);
- secondaryCursor.next();
- }
- }
-
- while (primaryCursor.hasNext()) {
- // We've exhausted the secondary's cursor already, so everything remaining from the
- // primary's cursor must be missing from secondary.
- docsMissingOnSecondary.push(primaryCursor.next());
- }
-
- while (secondaryCursor.hasNext()) {
- // We've exhausted the primary's cursor already, so everything remaining from the
- // secondary's cursor must be missing from primary.
- docsMissingOnPrimary.push(secondaryCursor.next());
- }
-
- return {docsWithDifferentContents, docsMissingOnPrimary, docsMissingOnSecondary};
+ const primaryCursor = new DBCommandCursor(primaryDB, primaryDB.runCommand(commandObj));
+ const secondaryCursor =
+ new DBCommandCursor(secondaryDB, secondaryDB.runCommand(commandObj));
+ const diff = DataConsistencyChecker.getDiff(primaryCursor, secondaryCursor);
+
+ return {
+ docsWithDifferentContents: diff.docsWithDifferentContents.map(
+ ({first, second}) => ({primary: first, secondary: second})),
+ docsMissingOnPrimary: diff.docsMissingOnFirst,
+ docsMissingOnSecondary: diff.docsMissingOnSecond
+ };
};
// Gets the dbhash for the current primary and for all secondaries (or the members of