SERVER-21881 Use fsync+lock when checking dbhash in concurrency suite.

The fsync+lock operation prevents the TTL monitor from running on the primary after awaiting replication and before getting the dbhashes from all the nodes in the replica set. Otherwise, a primary's dbhash can reflect a delete operation that hasn't yet replicated to all of the secondaries, leading to a spurious dbhash mismatch.
author: Max Hirschhorn <max.hirschhorn@mongodb.com> 2016-02-09 10:02:32 -0500
committer: Max Hirschhorn <max.hirschhorn@mongodb.com> 2016-02-09 10:02:32 -0500
commit: 41f296cdc37f92f8de0ea3d17109693c4f0befb6 (patch)
tree: 5bb9eddb3abd37fa9230940d4721755ffbf8569c /jstests/concurrency/fsm_libs
parent: 2d42c9ad4e65ff20d1ea1a3799afedca8d78364a (diff)
download: mongo-41f296cdc37f92f8de0ea3d17109693c4f0befb6.tar.gz
2 files changed, 156 insertions, 87 deletions
diff --git a/jstests/concurrency/fsm_libs/cluster.js b/jstests/concurrency/fsm_libs/cluster.js
index f6d5fc9a793..889cf316d85 100644
--- a/jstests/concurrency/fsm_libs/cluster.js
+++ b/jstests/concurrency/fsm_libs/cluster.js
@@ -109,7 +109,6 @@ var Cluster = function(options) {
         mongod: []
     };
     var nextConn = 0;
-    var primaries = [];
     var replSets = [];
 
     // TODO: Define size of replica set from options
@@ -179,7 +178,6 @@ var Cluster = function(options) {
                 i = 0;
                 while (rsTest) {
                     this._addReplicaSetConns(rsTest);
-                    primaries.push(rsTest.getPrimary());
                     replSets.push(rsTest);
                     ++i;
                     rsTest = st['rs' + i];
@@ -210,7 +208,6 @@ var Cluster = function(options) {
             rst.awaitSecondaryNodes();
 
             conn = rst.getPrimary();
-            primaries = [conn];
             replSets = [rst];
 
             this.teardown = function teardown() {
@@ -417,88 +414,142 @@ var Cluster = function(options) {
         return this.isSharded() && options.enableBalancer;
     };
 
-    this.awaitReplication = function awaitReplication(message) {
+    this.checkDBHashes = function checkDBHashes(rst, dbBlacklist, phase) {
         assert(initialized, 'cluster must be initialized first');
-        if (this.isReplication()) {
-            var wc = {
-                writeConcern: {
-                    w: replSetNodes, // all nodes in replica set
-                    wtimeout: 300000 // wait up to 5 minutes
+        assert(this.isReplication(), 'cluster is not a replica set');
+
+        // Use liveNodes.master instead of getPrimary() to avoid the detection of a new primary.
+        var primary = rst.liveNodes.master;
+
+        var res = primary.adminCommand({ listDatabases: 1 });
+        assert.commandWorked(res);
+
+        res.databases.forEach(dbInfo => {
+            var dbName = dbInfo.name;
+            if (Array.contains(dbBlacklist, dbName)) {
+                return;
+            }
+
+            var dbHashes = rst.getHashes(dbName);
+            var primaryDBHash = dbHashes.master;
+            assert.commandWorked(primaryDBHash);
+
+            dbHashes.slaves.forEach(secondaryDBHash => {
+                assert.commandWorked(secondaryDBHash);
+
+                var primaryNumCollections = Object.keys(primaryDBHash.collections).length;
+                var secondaryNumCollections = Object.keys(secondaryDBHash.collections).length;
+
+                assert.eq(primaryNumCollections, secondaryNumCollections,
+                          phase + ', the primary and secondary have a different number of' +
+                          ' collections: ' + tojson(dbHashes));
+
+                // Only compare the dbhashes of non-capped collections because capped collections
+                // are not necessarily truncated at the same points across replica set members.
+                var collNames = Object.keys(primaryDBHash.collections).filter(collName =>
+                    !primary.getDB(dbName)[collName].isCapped());
+
+                collNames.forEach(collName => {
+                    assert.eq(primaryDBHash.collections[collName],
+                              secondaryDBHash.collections[collName],
+                              phase + ', the primary and secondary have a different hash for the' +
+                              ' collection ' + dbName + '.' + collName + ': ' + tojson(dbHashes));
+                });
+
+                if (collNames.length === primaryNumCollections) {
+                    // If the primary and secondary have the same hashes for all the collections on
+                    // the database and there aren't any capped collections, then the hashes for the
+                    // whole database should match.
+                    assert.eq(primaryDBHash.md5,
+                              secondaryDBHash.md5,
+                              phase + ', the primary and secondary have a different hash for the ' +
+                              dbName + ' database: ' + tojson(dbHashes));
                 }
-            };
-            primaries.forEach(function(primary) {
-                var startTime = Date.now();
-                jsTest.log(primary.host + ': awaitReplication started ' + message);
-
-                // Insert a document with a writeConcern for all nodes in the replica set to
-                // ensure that all previous workload operations have completed on secondaries
-                var result = primary.getDB('test').fsm_teardown.insert({ a: 1 }, wc);
-                assert.writeOK(result, 'teardown insert failed: ' + tojson(result));
-
-                var totalTime = Date.now() - startTime;
-                jsTest.log(primary.host + ': awaitReplication ' + message + ' completed in ' +
-                           totalTime + ' ms');
             });
-        }
+        });
     };
 
-    // Returns true if the specified DB contains a capped collection.
-    var containsCappedCollection = function containsCappedCollection(db) {
-        return db.getCollectionNames().some(coll => db[coll].isCapped());
-    };
+    this.checkReplicationConsistency = function checkReplicationConsistency(dbBlacklist,
+                                                                            phase,
+                                                                            ttlIndexExists) {
+        assert(initialized, 'cluster must be initialized first');
 
-    // Checks dbHashes for databases that are not on the blacklist.
-    // All replica set nodes are checked.
-    this.checkDbHashes = function checkDbHashes(dbBlacklist, message) {
-        if (!this.isReplication() || this.isBalancerEnabled()) {
+        if (!this.isReplication()) {
             return;
         }
 
-        var res = this.getDB('admin').runCommand('listDatabases');
-        assert.commandWorked(res);
+        var shouldCheckDBHashes = !this.isBalancerEnabled();
 
-        res.databases.forEach(function(dbInfo) {
-            if (Array.contains(dbBlacklist, dbInfo.name)) {
-                return;
+        replSets.forEach(rst => {
+            var startTime = Date.now();
+            var res;
+
+            // Use liveNodes.master instead of getPrimary() to avoid the detection of a new primary.
+            var primary = rst.liveNodes.master;
+            jsTest.log('Starting consistency checks for replica set with ' + primary.host +
+                       ' assumed to still be primary, ' + phase);
+
+            if (shouldCheckDBHashes && ttlIndexExists) {
+                // Lock the primary to prevent the TTL monitor from deleting expired documents in
+                // the background while we are getting the dbhashes of the replica set members.
+                assert.commandWorked(primary.adminCommand({ fsync: 1, lock: 1 }),
+                                     phase + ', failed to lock the primary');
             }
-            var hasCappedColl = containsCappedCollection(this.getDB(dbInfo.name));
-
-            replSets.forEach(function(replSet) {
-                var hashes = replSet.getHashes(dbInfo.name);
-                var masterHashes = hashes.master;
-                assert.commandWorked(masterHashes);
-                var dbHash = masterHashes.md5;
-
-                hashes.slaves.forEach(function(slaveHashes) {
-                    assert.commandWorked(slaveHashes);
-                    assert.eq(Object.keys(masterHashes.collections).length,
-                              Object.keys(slaveHashes.collections).length,
-                              message + ' dbHash number of collections in db ' +
-                                dbInfo.name + ' ' + tojson(hashes));
-
-                    if (!hasCappedColl) {
-                        // dbHash on a DB not containing a capped collection should match.
-                        assert.eq(dbHash,
-                                  slaveHashes.md5,
-                                  message + ' dbHash inconsistency for db ' +
-                                    dbInfo.name + ' ' + tojson(hashes));
-                    } else {
-                        // dbHash on a DB containing a capped collection will not return
-                        // consistent results between the replica set, so we only
-                        // check non-capped collections in the DB.
-                        var collNames = Object.keys(masterHashes.collections).filter(
-                                                    coll =>
-                                                    !this.getDB(dbInfo.name)[coll].isCapped());
-                        collNames.forEach(function(coll) {
-                            assert.eq(masterHashes.collections[coll],
-                                      slaveHashes.collections[coll],
-                                      message + ' dbHash inconsistency for collection ' + coll +
-                                        ' in db ' + dbInfo.name + ' ' + tojson(hashes));
-                        }, this);
+
+            var activeException = false;
+            var msg;
+
+            try {
+                // Get the latest optime from the primary.
+                var replSetStatus = primary.adminCommand({ replSetGetStatus: 1 });
+                assert.commandWorked(replSetStatus,
+                                     phase + ', error getting replication status');
+
+                var primaryInfo = replSetStatus.members.find(memberInfo => memberInfo.self);
+                assert(primaryInfo !== undefined,
+                       phase + ', failed to find self in replication status: ' +
+                       tojson(replSetStatus));
+
+                // Wait for all previous workload operations to complete. We use the "getLastError"
+                // command rather than a replicated write because the primary is currently
+                // fsyncLock()ed to prevent the TTL monitor from running.
+                res = primary.getDB('test').runCommand({
+                    getLastError: 1,
+                    w: replSetNodes,
+                    wtimeout: 5 * 60 * 1000,
+                    wOpTime: primaryInfo.optime
+                });
+                assert.commandWorked(res, phase + ', error awaiting replication');
+
+                if (shouldCheckDBHashes) {
+                    // Compare the dbhashes of the primary and secondaries.
+                    this.checkDBHashes(rst, dbBlacklist);
+                }
+            } catch (e) {
+                activeException = true;
+                throw e;
+            } finally {
+                if (shouldCheckDBHashes && ttlIndexExists) {
+                    // Allow writes on the primary.
+                    res = primary.adminCommand({ fsyncUnlock: 1 });
+
+                    // Returning early would suppress the exception rethrown in the catch block.
+                    if (!res.ok) {
+                        msg = phase + ', failed to unlock the primary, which may cause this' +
+                              ' test to hang: ' + tojson(res);
+                        if (activeException) {
+                            jsTest.log(msg);
+                        } else {
+                            throw new Error(msg);
+                        }
                     }
-                }, this);
-            }, this);
-        }, this);
+                }
+            }
+
+            var totalTime = Date.now() - startTime;
+            jsTest.log('Finished consistency checks of replica set with ' + primary.host +
+                       ' as primary in ' + totalTime +  ' ms, ' + phase);
+        });
     };
 
     this.recordConfigServerData = function recordConfigServerData(configServer) {
diff --git a/jstests/concurrency/fsm_libs/runner.js b/jstests/concurrency/fsm_libs/runner.js
index 09f788a8a59..ff27744e596 100644
--- a/jstests/concurrency/fsm_libs/runner.js
+++ b/jstests/concurrency/fsm_libs/runner.js
@@ -427,17 +427,16 @@ var runner = (function() {
         jsTest.log('End of schedule');
     }
 
-    function cleanupWorkload(workload, context, cluster, errors, header, dbHashBlacklist) {
+    function cleanupWorkload(workload, context, cluster, errors, header, dbHashBlacklist,
+                             ttlIndexExists) {
         // Returns true if the workload's teardown succeeds and false if the workload's
         // teardown fails.
 
         try {
-            // Ensure that secondaries have caught up before workload teardown.
-            cluster.awaitReplication('before workload teardown');
-
-            // Check dbHash, for all DBs not in dbHashBlacklist, on all nodes
-            // before the workload's teardown method is called.
-            cluster.checkDbHashes(dbHashBlacklist, 'before workload teardown');
+            // Ensure that all data has replicated correctly to the secondaries before calling the
+            // workload's teardown method.
+            var phase = 'before workload ' + workload + ' teardown';
+            cluster.checkReplicationConsistency(dbHashBlacklist, phase, ttlIndexExists);
         } catch (e) {
             errors.push(new WorkloadFailure(e.toString(), e.stack, 'main',
                                             header + ' checking consistency on secondaries'));
@@ -485,6 +484,7 @@ var runner = (function() {
         var teardownFailed = false;
         var startTime = Date.now(); // Initialize in case setupWorkload fails below.
         var totalTime;
+        var ttlIndexExists;
 
         jsTest.log('Workload(s) started: ' + workloads.join(' '));
 
@@ -522,11 +522,19 @@ var runner = (function() {
                                         'Foreground ' + e.workloads.join(' '))));
             }
         } finally {
+            // Checking that the data is consistent across the primary and secondaries requires
+            // additional complexity to prevent writes from occurring in the background on the
+            // primary due to the TTL monitor. If none of the workloads actually created any TTL
+            // indexes (and we dropped the data of any previous workloads), then don't expend any
+            // additional effort in trying to handle that case.
+            ttlIndexExists = workloads.some(workload =>
+                context[workload].config.data.ttlIndexExists);
+
             // Call each foreground workload's teardown function. After all teardowns have completed
             // check if any of them failed.
             var cleanupResults = cleanup.map(workload =>
                 cleanupWorkload(workload, context, cluster, errors,
-                                'Foreground', dbHashBlacklist));
+                                'Foreground', dbHashBlacklist, ttlIndexExists));
             teardownFailed = cleanupResults.some(success => (success === false));
 
             totalTime = Date.now() - startTime;
@@ -544,12 +552,14 @@ var runner = (function() {
         // Throw any existing errors so that the schedule aborts.
         throwError(errors);
 
-        // Ensure that secondaries have caught up after workload teardown.
-        cluster.awaitReplication('after workload-group teardown and data clean-up');
+        // All workload data should have been dropped at this point, so there shouldn't be any TTL
+        // indexes.
+        ttlIndexExists = false;
 
-        // Check dbHash, for all DBs not in dbHashBlacklist, on all nodes
-        // after the workload's teardown method is called.
-        cluster.checkDbHashes(dbHashBlacklist, 'after workload-group teardown and data clean-up');
+        // Ensure that all operations replicated correctly to the secondaries.
+        cluster.checkReplicationConsistency(dbHashBlacklist,
+                                            'after workload-group teardown and data clean-up',
+                                            ttlIndexExists);
     }
 
     function runWorkloads(workloads,
@@ -685,10 +695,18 @@ var runner = (function() {
             }
         } finally {
             try {
+                // Checking that the data is consistent across the primary and secondaries requires
+                // additional complexity to prevent writes from occurring in the background on the
+                // primary due to the TTL monitor. If none of the workloads actually created any TTL
+                // indexes (and we dropped the data of any previous workloads), then don't expend
+                // any additional effort in trying to handle that case.
+                var ttlIndexExists = bgWorkloads.some(bgWorkload =>
+                    bgContext[bgWorkload].config.data.ttlIndexExists);
+
                 // Call each background workload's teardown function.
                 bgCleanup.forEach(bgWorkload => cleanupWorkload(bgWorkload, bgContext, cluster,
                                                                 errors, 'Background',
-                                                                dbHashBlacklist));
+                                                                dbHashBlacklist, ttlIndexExists));
                 // TODO: Call cleanupWorkloadData() on background workloads here if no background
                 // workload teardown functions fail.
author	Max Hirschhorn <max.hirschhorn@mongodb.com>	2016-02-09 10:02:32 -0500
committer	Max Hirschhorn <max.hirschhorn@mongodb.com>	2016-02-09 10:02:32 -0500
commit	41f296cdc37f92f8de0ea3d17109693c4f0befb6 (patch)
tree	5bb9eddb3abd37fa9230940d4721755ffbf8569c /jstests/concurrency/fsm_libs
parent	2d42c9ad4e65ff20d1ea1a3799afedca8d78364a (diff)
download	mongo-41f296cdc37f92f8de0ea3d17109693c4f0befb6.tar.gz