diff options
author | Judah Schvimer <judah@mongodb.com> | 2020-06-01 19:02:23 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-06-17 15:57:44 +0000 |
commit | e5a25068d1e122488ea5e8d400e86e96022e4c72 (patch) | |
tree | 98785d27c3669e2dd3ac2859ad3ff08c87606efb | |
parent | f5b36d71a86cf3c46a32b9d2eb109b6ec760b512 (diff) | |
download | mongo-e5a25068d1e122488ea5e8d400e86e96022e4c72.tar.gz |
SERVER-48527 Aborting in-progress transactions on step-up should clear session state before returning
(cherry picked from commit 07169364c2aece0fb99f4a97b796196edb033efa)
4 files changed, 114 insertions, 0 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml index 4be1df1d525..49d61358cc9 100644 --- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml +++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml @@ -134,6 +134,7 @@ selector: - jstests/sharding/mongos_no_replica_set_refresh.js - jstests/sharding/movechunk_interrupt_at_primary_stepdown.js - jstests/sharding/primary_config_server_blackholed_from_mongos.js + - jstests/sharding/refine_collection_shard_key_abort_on_stepup.js # Nothing is affected by config server step down - jstests/sharding/basic_sharding_params.js # ShardingTest is never used, so continuous step down thread never starts diff --git a/jstests/sharding/refine_collection_shard_key_abort_on_stepup.js b/jstests/sharding/refine_collection_shard_key_abort_on_stepup.js new file mode 100644 index 00000000000..1953ea273aa --- /dev/null +++ b/jstests/sharding/refine_collection_shard_key_abort_on_stepup.js @@ -0,0 +1,99 @@ +// +// Tests that refineCollectionShardKey can be safely aborted on step-up after a failover leads to +// only half of the internal transaction getting replicated. This is a regression test for +// SERVER-48527. +// +// Tag this test as 'requires_find_command' to prevent it from running in the legacy passthrough. +// @tags: [ +// requires_find_command, +// requires_fcv_44, +// ] +// + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load('jstests/libs/parallel_shell_helpers.js'); +load("jstests/replsets/rslib.js"); + +const st = new ShardingTest({ + shards: 1, + mongos: 1, + useBridge: true, + other: { + configOptions: { + setParameter: { + // Ensure transactions have multiple oplog entries. + maxNumberOfTransactionOperationsInSingleOplogEntry: 1, + bgSyncOplogFetcherBatchSize: 1 + } + } + } +}); +jsTestLog("Reconfig CSRS to have stable primary"); +const csrs = st.configRS; +let cfg = csrs.getReplSetConfigFromNode(0); +cfg.settings.electionTimeoutMillis = csrs.kDefaultTimeoutMS; +cfg.settings.catchUpTimeoutMillis = 0; +cfg.settings.chainingAllowed = false; +reconfig(csrs, cfg, true); +waitForConfigReplication(csrs.getPrimary()); +csrs.awaitReplication(); + +const kDbName = jsTestName(); +const kCollName = 'foo'; +const kNsName = kDbName + '.' + kCollName; + +assert.commandWorked(st.s.adminCommand({enableSharding: kDbName})); +assert.commandWorked(st.s.adminCommand({shardCollection: kNsName, key: {_id: 1}})); +assert.commandWorked(st.s.getCollection(kNsName).createIndex({_id: 1, aKey: 1})); + +let primary = csrs.getPrimary(); +let secondaries = csrs.getSecondaries(); +let newPrimary = secondaries[0]; +st.s.disconnect(secondaries); + +jsTest.log("Stop secondary oplog replication on the extra secondary so it will vote for anyone"); +const stopReplProducerFailPoint = configureFailPoint(secondaries[1], 'stopReplProducer'); + +jsTest.log("Stop secondary oplog replication before the last operation in the transaction"); +// The stopReplProducerOnDocument failpoint ensures that secondary stops replicating before +// applying the last operation in the transaction. This depends on the oplog fetcher batch size +// being 1. This also relies on the last operation in the transaction modifying 'config.chunks'. +const stopReplProducerOnDocumentFailPoint = configureFailPoint( + newPrimary, "stopReplProducerOnDocument", {document: {"applyOps.ns": "config.chunks"}}); + +jsTestLog("Refining collection shard key in a parallel shell"); +let parallelRefineFn = function(ns) { + assert.commandWorked(db.adminCommand({refineCollectionShardKey: ns, key: {_id: 1, aKey: 1}})); +}; +const awaitShell = startParallelShell(funWithArgs(parallelRefineFn, kNsName), st.s.port); + +jsTestLog("Wait for the new primary to block on fail point"); +stopReplProducerOnDocumentFailPoint.wait(); + +jsTestLog(`Triggering CSRS failover from ${primary.host} to ${newPrimary.host}`); +assert.commandWorked(newPrimary.adminCommand({replSetStepUp: 1})); +st.s.reconnect(newPrimary); + +jsTestLog("Waiting for set to agree on the new primary, " + newPrimary.host); +csrs.awaitNodesAgreeOnPrimary(); + +jsTestLog("Wait for parallel shell to complete"); +awaitShell(); + +// Make sure we won't apply the whole transaction by any chance. +jsTestLog("Wait for the new primary to stop replication after primary catch-up"); +checkLog.contains(newPrimary, "Stopping replication producer"); + +jsTestLog("Enable replication on the new primary so that it can finish state transition"); +stopReplProducerOnDocumentFailPoint.off(); +assert.eq(csrs.getPrimary(), newPrimary); + +jsTestLog("Re-enable replication on the extra secondary so it can catch up"); +stopReplProducerFailPoint.off(); +csrs.awaitReplication(); + +st.stop(); +})(); diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h index 46626d843b7..d8f92af60cf 100644 --- a/src/mongo/db/operation_context.h +++ b/src/mongo/db/operation_context.h @@ -407,6 +407,19 @@ public: } /** + * Clears metadata associated with a multi-document transaction. + */ + void resetMultiDocumentTransactionState() { + invariant(_inMultiDocumentTransaction); + invariant(!_writeUnitOfWork); + invariant(_ruState == WriteUnitOfWork::RecoveryUnitState::kNotInUnitOfWork); + _inMultiDocumentTransaction = false; + _isStartingMultiDocumentTransaction = false; + _lsid = boost::none; + _txnNumber = boost::none; + } + + /** * Returns whether this operation is starting a multi-document transaction. */ bool isStartingMultiDocumentTransaction() const { diff --git a/src/mongo/db/session_catalog_mongod.cpp b/src/mongo/db/session_catalog_mongod.cpp index 15fc2edca02..7ae2190fe77 100644 --- a/src/mongo/db/session_catalog_mongod.cpp +++ b/src/mongo/db/session_catalog_mongod.cpp @@ -212,6 +212,7 @@ void abortInProgressTransactions(OperationContext* opCtx) { "sessionId"_attr = txnRecord.getSessionId().toBSON(), "txnNumber"_attr = txnRecord.getTxnNum()); txnParticipant.abortTransaction(opCtx); + opCtx->resetMultiDocumentTransactionState(); } } } // namespace |