summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Abrahams <jonathan@mongodb.com>2016-07-26 17:33:45 -0400
committerJonathan Abrahams <jonathan@mongodb.com>2016-07-26 17:33:45 -0400
commitac162ebc53af7296f7837b2f57fc0cf92c001393 (patch)
treefa762c3c853cb261dbb380dbace04378208d3dc7
parent945cb73a7a443f3b282a072449adc81072c3eced (diff)
downloadmongo-ac162ebc53af7296f7837b2f57fc0cf92c001393.tar.gz
SERVER-25273 Concurrency suite dbHash check should call checkDBHashesFsyncLocked
-rw-r--r--jstests/concurrency/fsm_libs/cluster.js61
-rw-r--r--jstests/concurrency/fsm_libs/runner.js50
-rw-r--r--jstests/concurrency/fsm_workloads/collmod.js3
-rw-r--r--jstests/concurrency/fsm_workloads/indexed_insert_ttl.js2
-rw-r--r--jstests/hooks/check_repl_dbhash.js7
5 files changed, 30 insertions, 93 deletions
diff --git a/jstests/concurrency/fsm_libs/cluster.js b/jstests/concurrency/fsm_libs/cluster.js
index 954a3872fda..692f32d0399 100644
--- a/jstests/concurrency/fsm_libs/cluster.js
+++ b/jstests/concurrency/fsm_libs/cluster.js
@@ -3,6 +3,7 @@
/**
* Represents a MongoDB cluster.
*/
+load('jstests/hooks/check_repl_dbhash.js'); // Loads the checkDBHashesFsyncLocked function.
var Cluster = function(options) {
if (!(this instanceof Cluster)) {
@@ -472,8 +473,7 @@ var Cluster = function(options) {
});
};
- this.checkReplicationConsistency = function checkReplicationConsistency(
- dbBlacklist, phase, ttlIndexExists) {
+ this.checkReplicationConsistency = function checkReplicationConsistency(dbBlacklist, phase) {
assert(initialized, 'cluster must be initialized first');
if (!this.isReplication()) {
@@ -488,20 +488,21 @@ var Cluster = function(options) {
// Use liveNodes.master instead of getPrimary() to avoid the detection of a new primary.
var primary = rst.liveNodes.master;
- jsTest.log('Starting consistency checks for replica set with ' + primary.host +
- ' assumed to still be primary, ' + phase);
-
- if (shouldCheckDBHashes && ttlIndexExists) {
- // Lock the primary to prevent the TTL monitor from deleting expired documents in
- // the background while we are getting the dbhashes of the replica set members.
- assert.commandWorked(primary.adminCommand({fsync: 1, lock: 1}),
- phase + ', failed to lock the primary');
- }
- var activeException = false;
- var msg;
+ if (shouldCheckDBHashes) {
+ jsTest.log('Starting consistency checks for replica set with ' + primary.host +
+ ' assumed to still be primary, ' + phase);
+
+ // Compare the dbhashes of the primary and secondaries.
+ checkDBHashesFsyncLocked(rst, dbBlacklist, phase);
+ var totalTime = Date.now() - startTime;
+ jsTest.log('Finished consistency checks of replica set with ' + primary.host +
+ ' as primary in ' + totalTime + ' ms, ' + phase);
+ } else {
+ jsTest.log('Skipping consistency checks when the balancer is enabled, ' +
+ 'for replica set with ' + primary.host +
+ ' assumed to still be primary, ' + phase);
- try {
// Get the latest optime from the primary.
var replSetStatus = primary.adminCommand({replSetGetStatus: 1});
assert.commandWorked(replSetStatus, phase + ', error getting replication status');
@@ -511,9 +512,7 @@ var Cluster = function(options) {
phase + ', failed to find self in replication status: ' +
tojson(replSetStatus));
- // Wait for all previous workload operations to complete. We use the "getLastError"
- // command rather than a replicated write because the primary is currently
- // fsyncLock()ed to prevent the TTL monitor from running.
+ // Wait for all previous workload operations to complete, with "getLastError".
res = primary.getDB('test').runCommand({
getLastError: 1,
w: replSetNodes,
@@ -521,35 +520,7 @@ var Cluster = function(options) {
wOpTime: primaryInfo.optime
});
assert.commandWorked(res, phase + ', error awaiting replication');
-
- if (shouldCheckDBHashes) {
- // Compare the dbhashes of the primary and secondaries.
- this.checkDBHashes(rst, dbBlacklist);
- }
- } catch (e) {
- activeException = true;
- throw e;
- } finally {
- if (shouldCheckDBHashes && ttlIndexExists) {
- // Allow writes on the primary.
- res = primary.adminCommand({fsyncUnlock: 1});
-
- // Returning early would suppress the exception rethrown in the catch block.
- if (!res.ok) {
- msg = phase + ', failed to unlock the primary, which may cause this' +
- ' test to hang: ' + tojson(res);
- if (activeException) {
- jsTest.log(msg);
- } else {
- throw new Error(msg);
- }
- }
- }
}
-
- var totalTime = Date.now() - startTime;
- jsTest.log('Finished consistency checks of replica set with ' + primary.host +
- ' as primary in ' + totalTime + ' ms, ' + phase);
});
};
diff --git a/jstests/concurrency/fsm_libs/runner.js b/jstests/concurrency/fsm_libs/runner.js
index 1ca999f74e2..23e6437dd8d 100644
--- a/jstests/concurrency/fsm_libs/runner.js
+++ b/jstests/concurrency/fsm_libs/runner.js
@@ -432,8 +432,7 @@ var runner = (function() {
jsTest.log('End of schedule');
}
- function cleanupWorkload(
- workload, context, cluster, errors, header, dbHashBlacklist, ttlIndexExists) {
+ function cleanupWorkload(workload, context, cluster, errors, header, dbHashBlacklist) {
// Returns true if the workload's teardown succeeds and false if the workload's
// teardown fails.
@@ -441,7 +440,7 @@ var runner = (function() {
// Ensure that all data has replicated correctly to the secondaries before calling the
// workload's teardown method.
var phase = 'before workload ' + workload + ' teardown';
- cluster.checkReplicationConsistency(dbHashBlacklist, phase, ttlIndexExists);
+ cluster.checkReplicationConsistency(dbHashBlacklist, phase);
} catch (e) {
errors.push(new WorkloadFailure(
e.toString(), e.stack, 'main', header + ' checking consistency on secondaries'));
@@ -497,7 +496,6 @@ var runner = (function() {
var teardownFailed = false;
var startTime = Date.now(); // Initialize in case setupWorkload fails below.
var totalTime;
- var ttlIndexExists;
jsTest.log('Workload(s) started: ' + workloads.join(' '));
@@ -535,23 +533,11 @@ var runner = (function() {
e.err, e.stack, e.tid, 'Foreground ' + e.workloads.join(' '))));
}
} finally {
- // Checking that the data is consistent across the primary and secondaries requires
- // additional complexity to prevent writes from occurring in the background on the
- // primary due to the TTL monitor. If none of the workloads actually created any TTL
- // indexes (and we dropped the data of any previous workloads), then don't expend any
- // additional effort in trying to handle that case.
- ttlIndexExists =
- workloads.some(workload => context[workload].config.data.ttlIndexExists);
-
// Call each foreground workload's teardown function. After all teardowns have completed
// check if any of them failed.
- var cleanupResults = cleanup.map(workload => cleanupWorkload(workload,
- context,
- cluster,
- errors,
- 'Foreground',
- dbHashBlacklist,
- ttlIndexExists));
+ var cleanupResults =
+ cleanup.map(workload => cleanupWorkload(
+ workload, context, cluster, errors, 'Foreground', dbHashBlacklist));
teardownFailed = cleanupResults.some(success => (success === false));
totalTime = Date.now() - startTime;
@@ -568,13 +554,9 @@ var runner = (function() {
// Throw any existing errors so that the schedule aborts.
throwError(errors);
- // All workload data should have been dropped at this point, so there shouldn't be any TTL
- // indexes.
- ttlIndexExists = false;
-
// Ensure that all operations replicated correctly to the secondaries.
- cluster.checkReplicationConsistency(
- dbHashBlacklist, 'after workload-group teardown and data clean-up', ttlIndexExists);
+ cluster.checkReplicationConsistency(dbHashBlacklist,
+ 'after workload-group teardown and data clean-up');
}
function runWorkloads(
@@ -714,22 +696,10 @@ var runner = (function() {
}
} finally {
try {
- // Checking that the data is consistent across the primary and secondaries requires
- // additional complexity to prevent writes from occurring in the background on the
- // primary due to the TTL monitor. If none of the workloads actually created any TTL
- // indexes (and we dropped the data of any previous workloads), then don't expend
- // any additional effort in trying to handle that case.
- var ttlIndexExists = bgWorkloads.some(
- bgWorkload => bgContext[bgWorkload].config.data.ttlIndexExists);
-
// Call each background workload's teardown function.
- bgCleanup.forEach(bgWorkload => cleanupWorkload(bgWorkload,
- bgContext,
- cluster,
- errors,
- 'Background',
- dbHashBlacklist,
- ttlIndexExists));
+ bgCleanup.forEach(
+ bgWorkload => cleanupWorkload(
+ bgWorkload, bgContext, cluster, errors, 'Background', dbHashBlacklist));
// TODO: Call cleanupWorkloadData() on background workloads here if no background
// workload teardown functions fail.
diff --git a/jstests/concurrency/fsm_workloads/collmod.js b/jstests/concurrency/fsm_workloads/collmod.js
index 7b803cd3284..8e2216c6280 100644
--- a/jstests/concurrency/fsm_workloads/collmod.js
+++ b/jstests/concurrency/fsm_workloads/collmod.js
@@ -14,8 +14,7 @@ var $config = (function() {
var data = {
numDocs: 1000,
- maxTTL: 5000, // max time to live
- ttlIndexExists: true
+ maxTTL: 5000 // max time to live
};
var states = (function() {
diff --git a/jstests/concurrency/fsm_workloads/indexed_insert_ttl.js b/jstests/concurrency/fsm_workloads/indexed_insert_ttl.js
index 90aa6d3baf7..d872cbe7e6a 100644
--- a/jstests/concurrency/fsm_workloads/indexed_insert_ttl.js
+++ b/jstests/concurrency/fsm_workloads/indexed_insert_ttl.js
@@ -58,7 +58,7 @@ var $config = (function() {
states: states,
transitions: transitions,
setup: setup,
- data: {ttlSeconds: 5, ttlIndexExists: true},
+ data: {ttlSeconds: 5},
teardown: teardown
};
})();
diff --git a/jstests/hooks/check_repl_dbhash.js b/jstests/hooks/check_repl_dbhash.js
index 31382c72770..332e44eb13f 100644
--- a/jstests/hooks/check_repl_dbhash.js
+++ b/jstests/hooks/check_repl_dbhash.js
@@ -75,7 +75,7 @@ function dumpCollectionDiff(primary, secondary, dbName, collName) {
}
}
-function checkDBHashesFsyncLocked(rst) {
+function checkDBHashesFsyncLocked(rst, dbBlacklist = [], phase = 'after test hook') {
// Call getPrimary to populate rst with information about the nodes.
var primary = rst.getPrimary();
assert(primary, 'calling getPrimary() failed');
@@ -113,10 +113,7 @@ function checkDBHashesFsyncLocked(rst) {
assert.commandWorked(primary.adminCommand({fsync: 1, lock: 1}),
'failed to lock the primary');
rst.awaitReplication(60 * 1000 * 5);
-
- var phaseName = 'after test hook';
- var blacklist = [];
- checkDBHashes(rst, blacklist, phaseName);
+ checkDBHashes(rst, dbBlacklist, phase);
} catch (e) {
activeException = true;
throw e;