summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2019-04-16 14:02:29 -0400
committerGregory Wlodarek <gregory.wlodarek@mongodb.com>2019-04-25 11:06:12 -0400
commit5ae506cb8fc73074bb46d6be9667ed1825e98d69 (patch)
treea6e27ff6db42388a945ee844c6e51be09bf44dc6
parent935157a3cf54ca802419cd8d92ff4b137fbe2949 (diff)
downloadmongo-5ae506cb8fc73074bb46d6be9667ed1825e98d69.tar.gz
SERVER-39321 Re-enable the CheckReplDBHashInBackground hook
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_replication.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn_ubsan.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_replication_ubsan.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml9
-rw-r--r--buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml9
-rw-r--r--buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml9
-rw-r--r--buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml9
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml10
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_multi_oplog_txns_jscore_passthrough.yml10
-rw-r--r--jstests/hooks/run_check_repl_dbhash_background.js35
-rw-r--r--src/mongo/db/commands/dbhash.cpp7
13 files changed, 60 insertions, 53 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_replication.yml b/buildscripts/resmokeconfig/suites/concurrency_replication.yml
index 4a05e4a6a7f..a2270cc4c32 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_replication.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_replication.yml
@@ -18,6 +18,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- ValidateCollections
tests: true
@@ -30,7 +31,7 @@ executor:
# validating the entire contents of the collection.
#
# TODO SERVER-26466: Add CheckReplOplogs hook to the concurrency suite.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplDBHash
- class: ValidateCollections
- class: CleanupConcurrencyWorkloads
diff --git a/buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn.yml b/buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn.yml
index cc33048a6e1..9291a7eb561 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn.yml
@@ -38,6 +38,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- ValidateCollections
tests: true
@@ -55,7 +56,7 @@ executor:
# validating the entire contents of the collection.
#
# TODO SERVER-26466: Add CheckReplOplogs hook to the concurrency suite.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplDBHash
- class: ValidateCollections
- class: CleanupConcurrencyWorkloads
diff --git a/buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn_ubsan.yml b/buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn_ubsan.yml
index 4a9883de4f3..0e5a2754774 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn_ubsan.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_replication_multi_stmt_txn_ubsan.yml
@@ -38,6 +38,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- ValidateCollections
tests: true
@@ -55,7 +56,7 @@ executor:
# validating the entire contents of the collection.
#
# TODO SERVER-26466: Add CheckReplOplogs hook to the concurrency suite.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplDBHash
- class: ValidateCollections
- class: CleanEveryN
diff --git a/buildscripts/resmokeconfig/suites/concurrency_replication_ubsan.yml b/buildscripts/resmokeconfig/suites/concurrency_replication_ubsan.yml
index 07e6113f330..d535361c29f 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_replication_ubsan.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_replication_ubsan.yml
@@ -20,6 +20,7 @@ executor:
hooks:
- CheckReplDBHash
- ValidateCollections
+ - CheckReplDBHashInBackground
tests: true
config:
shell_options:
@@ -30,7 +31,7 @@ executor:
# validating the entire contents of the collection.
#
# TODO SERVER-26466: Add CheckReplOplogs hook to the concurrency suite.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplDBHash
- class: ValidateCollections
- class: CleanEveryN
diff --git a/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml b/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml
index 9983712e0b2..e073ab2a05d 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_simultaneous_replication.yml
@@ -36,6 +36,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- ValidateCollections
tests: true
@@ -51,7 +52,7 @@ executor:
# validating the entire contents of the collection.
#
# TODO SERVER-26466: Add CheckReplOplogs hook to the concurrency suite.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplDBHash
- class: ValidateCollections
- class: CleanupConcurrencyWorkloads
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml
index 81e6b26b006..0b3d1e5d440 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_100ms_refresh_jscore_passthrough.yml
@@ -12,12 +12,6 @@ selector:
# These tests change the featureCompatibilityVersion which makes it so transactions aren't
# supported.
- jstests/core/txns/abort_transactions_on_FCV_downgrade.js
- # TODO SERVER-39321: Remove the following block of blacklists.
- # These tests change the transactionLifetimeLimitSeconds server parameter which conflicts with how
- # the CheckReplDBHashInBackground hook doesn't want transactions to be reaped while it is running.
- - jstests/core/txns/abort_expired_transaction.js
- - jstests/core/txns/abort_transaction_thread_does_not_block_on_locks.js
- - jstests/core/txns/kill_op_on_txn_expiry.js
# The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
# command multiple times, which may observe the change to the "transactionLifetimeLimitSeconds"
# server parameter.
@@ -37,6 +31,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- CheckReplOplogs
- ValidateCollections
@@ -48,7 +43,7 @@ executor:
# The CheckReplDBHash hook waits until all operations have replicated to and have been applied
# on the secondaries, so we run the ValidateCollections hook after it to ensure we're
# validating the entire contents of the collection.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplOplogs
- class: CheckReplDBHash
- class: ValidateCollections
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml
index 93e35ee865d..b6b69c8e990 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_10sec_refresh_jscore_passthrough.yml
@@ -12,12 +12,6 @@ selector:
# These tests change the featureCompatibilityVersion which makes it so transactions aren't
# supported.
- jstests/core/txns/abort_transactions_on_FCV_downgrade.js
- # TODO SERVER-39321: Remove the following block of blacklists.
- # These tests change the transactionLifetimeLimitSeconds server parameter which conflicts with how
- # the CheckReplDBHashInBackground hook doesn't want transactions to be reaped while it is running.
- - jstests/core/txns/abort_expired_transaction.js
- - jstests/core/txns/abort_transaction_thread_does_not_block_on_locks.js
- - jstests/core/txns/kill_op_on_txn_expiry.js
# The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
# command multiple times, which may observe the change to the "transactionLifetimeLimitSeconds"
# server parameter.
@@ -37,6 +31,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- CheckReplOplogs
- ValidateCollections
@@ -48,7 +43,7 @@ executor:
# The CheckReplDBHash hook waits until all operations have replicated to and have been applied
# on the secondaries, so we run the ValidateCollections hook after it to ensure we're
# validating the entire contents of the collection.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplOplogs
- class: CheckReplDBHash
- class: ValidateCollections
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml
index c3859a94dd8..3ae31db3873 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_1sec_refresh_jscore_passthrough.yml
@@ -12,12 +12,6 @@ selector:
# These tests change the featureCompatibilityVersion which makes it so transactions aren't
# supported.
- jstests/core/txns/abort_transactions_on_FCV_downgrade.js
- # TODO SERVER-39321: Remove the following block of blacklists.
- # These tests change the transactionLifetimeLimitSeconds server parameter which conflicts with how
- # the CheckReplDBHashInBackground hook doesn't want transactions to be reaped while it is running.
- - jstests/core/txns/abort_expired_transaction.js
- - jstests/core/txns/abort_transaction_thread_does_not_block_on_locks.js
- - jstests/core/txns/kill_op_on_txn_expiry.js
# The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
# command multiple times, which may observe the change to the "transactionLifetimeLimitSeconds"
# server parameter.
@@ -37,6 +31,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- CheckReplOplogs
- ValidateCollections
@@ -48,7 +43,7 @@ executor:
# The CheckReplDBHash hook waits until all operations have replicated to and have been applied
# on the secondaries, so we run the ValidateCollections hook after it to ensure we're
# validating the entire contents of the collection.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplOplogs
- class: CheckReplDBHash
- class: ValidateCollections
diff --git a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml
index 83c6b88f289..c4417ac733c 100644
--- a/buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/logical_session_cache_replication_default_refresh_jscore_passthrough.yml
@@ -12,12 +12,6 @@ selector:
# These tests change the featureCompatibilityVersion which makes it so transactions aren't
# supported.
- jstests/core/txns/abort_transactions_on_FCV_downgrade.js
- # TODO SERVER-39321: Remove the following block of blacklists.
- # These tests change the transactionLifetimeLimitSeconds server parameter which conflicts with how
- # the CheckReplDBHashInBackground hook doesn't want transactions to be reaped while it is running.
- - jstests/core/txns/abort_expired_transaction.js
- - jstests/core/txns/abort_transaction_thread_does_not_block_on_locks.js
- - jstests/core/txns/kill_op_on_txn_expiry.js
# The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
# command multiple times, which may observe the change to the "transactionLifetimeLimitSeconds"
# server parameter.
@@ -37,6 +31,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- CheckReplOplogs
- ValidateCollections
@@ -48,7 +43,7 @@ executor:
# The CheckReplDBHash hook waits until all operations have replicated to and have been applied
# on the secondaries, so we run the ValidateCollections hook after it to ensure we're
# validating the entire contents of the collection.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplOplogs
- class: CheckReplDBHash
- class: ValidateCollections
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml
index 502ad225144..ef37c323fc5 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml
@@ -25,6 +25,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- CheckReplOplogs
- ValidateCollections
@@ -36,7 +37,7 @@ executor:
# The CheckReplDBHash hook waits until all operations have replicated to and have been applied
# on the secondaries, so we run the ValidateCollections hook after it to ensure we're
# validating the entire contents of the collection.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplOplogs
- class: CheckReplDBHash
- class: ValidateCollections
@@ -47,11 +48,4 @@ executor:
mongod_options:
set_parameters:
enableTestCommands: 1
- # TODO SERVER-39321: Remove the following block now that SERVER-35377 has been resolved.
- #
- # When running tests that intentionally trigger a DuplicateKeyError, we somehow end up
- # performing an atClusterTime read in the CheckReplDBHashInBackground hook based on an
- # operationTime that is greater than anything in the oplog. The periodic no-op writer must
- # be enabled to ensure that we eventually reach the clusterTime we are waiting for.
- writePeriodicNoops: 1
num_nodes: 2
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_oplog_txns_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_oplog_txns_jscore_passthrough.yml
index cf0e6dd95cb..7b39c059c64 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_multi_oplog_txns_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_oplog_txns_jscore_passthrough.yml
@@ -19,6 +19,7 @@ selector:
executor:
archive:
hooks:
+ - CheckReplDBHashInBackground
- CheckReplDBHash
- CheckReplOplogs
- ValidateCollections
@@ -30,7 +31,7 @@ executor:
# The CheckReplDBHash hook waits until all operations have replicated to and have been applied
# on the secondaries, so we run the ValidateCollections hook after it to ensure we're
# validating the entire contents of the collection.
- # TODO SERVER-39321: Re-enable and archive the CheckReplDBHashInBackground hook.
+ - class: CheckReplDBHashInBackground
- class: CheckReplOplogs
- class: CheckReplDBHash
- class: ValidateCollections
@@ -42,11 +43,4 @@ executor:
set_parameters:
enableTestCommands: 1
useMultipleOplogEntryFormatForTransactions: true
- # TODO SERVER-39321: Remove the following block now that SERVER-35377 has been resolved.
- #
- # When running tests that intentionally trigger a DuplicateKeyError, we somehow end up
- # performing an atClusterTime read in the CheckReplDBHashInBackground hook based on an
- # operationTime that is greater than anything in the oplog. The periodic no-op writer must
- # be enabled to ensure that we eventually reach the clusterTime we are waiting for.
- writePeriodicNoops: 1
num_nodes: 2
diff --git a/jstests/hooks/run_check_repl_dbhash_background.js b/jstests/hooks/run_check_repl_dbhash_background.js
index 8cd338b2d32..712ea6013a7 100644
--- a/jstests/hooks/run_check_repl_dbhash_background.js
+++ b/jstests/hooks/run_check_repl_dbhash_background.js
@@ -251,7 +251,9 @@
for (let dbName of dbNames) {
let result;
let clusterTime;
+ let previousClusterTime;
let hasTransientError;
+ let performNoopWrite;
// The isTransientError() function is responsible for setting hasTransientError to true.
const isTransientError = (e) => {
@@ -265,18 +267,36 @@
// ReplSetTest#getCollectionDiffUsingSessions() upon detecting a dbHash mismatch. It is
// presumed to still useful to know that a bug exists even if we cannot get more
// diagnostics for it.
- if (e.code === ErrorCodes.Interrupted || e.code === ErrorCodes.SnapshotUnavailable) {
+ if (e.code === ErrorCodes.Interrupted) {
hasTransientError = true;
- return true;
}
- return false;
+ // Perform a no-op write to the primary if the clusterTime between each call remain
+ // the same and if we encounter the SnapshotUnavailable error as the secondaries minimum
+ // timestamp can be greater than the primaries minimum timestamp.
+ if (e.code === ErrorCodes.SnapshotUnavailable) {
+ if (bsonBinaryEqual(clusterTime, previousClusterTime)) {
+ performNoopWrite = true;
+ }
+ hasTransientError = true;
+ }
+
+ // InvalidOptions can be returned when $_internalReadAtClusterTime is greater than the
+ // all-committed timestamp. As the dbHash command is running in the background at
+ // varying times, it's possible that we may run dbHash while a prepared transactions
+ // has yet to commit or abort.
+ if (e.code === ErrorCodes.InvalidOptions) {
+ hasTransientError = true;
+ }
+
+ return hasTransientError;
};
do {
// SERVER-38928: Due to races around advancing last applied, there's technically no
// guarantee that a primary will report a later operation time than its
// secondaries. Perform the snapshot read at the latest reported operation time.
+ previousClusterTime = clusterTime;
clusterTime = sessions[0].getOperationTime();
let signedClusterTime = sessions[0].getClusterTime();
for (let sess of sessions.slice(1)) {
@@ -297,12 +317,19 @@
}
hasTransientError = false;
+ performNoopWrite = false;
try {
result = checkCollectionHashesForDB(dbName, clusterTime);
} catch (e) {
if (isTransientError(e)) {
- debugInfo.push({"transientError": e});
+ if (performNoopWrite) {
+ const primarySession = sessions[0];
+ assert.commandWorked(primarySession.getDatabase(dbName).adminCommand(
+ {appendOplogNote: 1, data: {}}));
+ }
+
+ debugInfo.push({"transientError": e, "performNoopWrite": performNoopWrite});
continue;
}
diff --git a/src/mongo/db/commands/dbhash.cpp b/src/mongo/db/commands/dbhash.cpp
index 3706db1e133..6a6ab9cd1a3 100644
--- a/src/mongo/db/commands/dbhash.cpp
+++ b/src/mongo/db/commands/dbhash.cpp
@@ -181,11 +181,18 @@ public:
// We lock the entire database in S-mode in order to ensure that the contents will not
// change for the snapshot.
auto lockMode = LockMode::MODE_S;
+ boost::optional<ShouldNotConflictWithSecondaryBatchApplicationBlock> shouldNotConflictBlock;
if (opCtx->recoveryUnit()->getTimestampReadSource() ==
RecoveryUnit::ReadSource::kProvided) {
// However, if we are performing a read at a timestamp, then we only need to lock the
// database in intent mode to ensure that none of the collections get dropped.
lockMode = LockMode::MODE_IS;
+
+ // Additionally, if we are performing a read at a timestamp, then we allow oplog
+ // application to proceed concurrently with the dbHash command. This is done
+ // to ensure a prepare conflict is able to eventually be resolved by processing a
+ // later commitTransaction or abortTransaction oplog entry.
+ shouldNotConflictBlock.emplace(opCtx->lockState());
}
AutoGetDb autoDb(opCtx, ns, lockMode);
Database* db = autoDb.getDb();