summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml1
-rw-r--r--jstests/sharding/refine_collection_shard_key_abort_on_stepup.js98
-rw-r--r--src/mongo/db/operation_context.h13
-rw-r--r--src/mongo/db/session_catalog_mongod.cpp1
4 files changed, 113 insertions, 0 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
index fb4c7daa9ae..92bc74dd109 100644
--- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
@@ -137,6 +137,7 @@ selector:
- jstests/sharding/mongos_no_replica_set_refresh.js
- jstests/sharding/movechunk_interrupt_at_primary_stepdown.js
- jstests/sharding/primary_config_server_blackholed_from_mongos.js
+ - jstests/sharding/refine_collection_shard_key_abort_on_stepup.js
# Nothing is affected by config server step down
- jstests/sharding/basic_sharding_params.js
# ShardingTest is never used, so continuous step down thread never starts
diff --git a/jstests/sharding/refine_collection_shard_key_abort_on_stepup.js b/jstests/sharding/refine_collection_shard_key_abort_on_stepup.js
new file mode 100644
index 00000000000..ab486fb4a29
--- /dev/null
+++ b/jstests/sharding/refine_collection_shard_key_abort_on_stepup.js
@@ -0,0 +1,98 @@
+//
+// Tests that refineCollectionShardKey can be safely aborted on step-up after a failover leads to
+// only half of the internal transaction getting replicated. This is a regression test for
+// SERVER-48527.
+//
+// Tag this test as 'requires_find_command' to prevent it from running in the legacy passthrough.
+// @tags: [
+// requires_find_command,
+// ]
+//
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load('jstests/libs/parallel_shell_helpers.js');
+load("jstests/replsets/rslib.js");
+
+const st = new ShardingTest({
+ shards: 1,
+ mongos: 1,
+ useBridge: true,
+ other: {
+ configOptions: {
+ setParameter: {
+ // Ensure transactions have multiple oplog entries.
+ maxNumberOfTransactionOperationsInSingleOplogEntry: 1,
+ bgSyncOplogFetcherBatchSize: 1
+ }
+ }
+ }
+});
+jsTestLog("Reconfig CSRS to have stable primary");
+const csrs = st.configRS;
+let cfg = csrs.getReplSetConfigFromNode(0);
+cfg.settings.electionTimeoutMillis = csrs.kDefaultTimeoutMS;
+cfg.settings.catchUpTimeoutMillis = 0;
+cfg.settings.chainingAllowed = false;
+reconfig(csrs, cfg, true);
+waitForConfigReplication(csrs.getPrimary());
+csrs.awaitReplication();
+
+const kDbName = jsTestName();
+const kCollName = 'foo';
+const kNsName = kDbName + '.' + kCollName;
+
+assert.commandWorked(st.s.adminCommand({enableSharding: kDbName}));
+assert.commandWorked(st.s.adminCommand({shardCollection: kNsName, key: {_id: 1}}));
+assert.commandWorked(st.s.getCollection(kNsName).createIndex({_id: 1, aKey: 1}));
+
+let primary = csrs.getPrimary();
+let secondaries = csrs.getSecondaries();
+let newPrimary = secondaries[0];
+st.s.disconnect(secondaries);
+
+jsTest.log("Stop secondary oplog replication on the extra secondary so it will vote for anyone");
+const stopReplProducerFailPoint = configureFailPoint(secondaries[1], 'stopReplProducer');
+
+jsTest.log("Stop secondary oplog replication before the last operation in the transaction");
+// The stopReplProducerOnDocument failpoint ensures that secondary stops replicating before
+// applying the last operation in the transaction. This depends on the oplog fetcher batch size
+// being 1. This also relies on the last operation in the transaction modifying 'config.chunks'.
+const stopReplProducerOnDocumentFailPoint = configureFailPoint(
+ newPrimary, "stopReplProducerOnDocument", {document: {"applyOps.ns": "config.chunks"}});
+
+jsTestLog("Refining collection shard key in a parallel shell");
+let parallelRefineFn = function(ns) {
+ assert.commandWorked(db.adminCommand({refineCollectionShardKey: ns, key: {_id: 1, aKey: 1}}));
+};
+const awaitShell = startParallelShell(funWithArgs(parallelRefineFn, kNsName), st.s.port);
+
+jsTestLog("Wait for the new primary to block on fail point");
+stopReplProducerOnDocumentFailPoint.wait();
+
+jsTestLog(`Triggering CSRS failover from ${primary.host} to ${newPrimary.host}`);
+assert.commandWorked(newPrimary.adminCommand({replSetStepUp: 1}));
+st.s.reconnect(newPrimary);
+
+jsTestLog("Waiting for set to agree on the new primary, " + newPrimary.host);
+csrs.awaitNodesAgreeOnPrimary();
+
+jsTestLog("Wait for parallel shell to complete");
+awaitShell();
+
+// Make sure we won't apply the whole transaction by any chance.
+jsTestLog("Wait for the new primary to stop replication after primary catch-up");
+checkLog.contains(newPrimary, "Stopping replication producer");
+
+jsTestLog("Enable replication on the new primary so that it can finish state transition");
+stopReplProducerOnDocumentFailPoint.off();
+assert.eq(csrs.getPrimary(), newPrimary);
+
+jsTestLog("Re-enable replication on the extra secondary so it can catch up");
+stopReplProducerFailPoint.off();
+csrs.awaitReplication();
+
+st.stop();
+})();
diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h
index 46626d843b7..d8f92af60cf 100644
--- a/src/mongo/db/operation_context.h
+++ b/src/mongo/db/operation_context.h
@@ -407,6 +407,19 @@ public:
}
/**
+ * Clears metadata associated with a multi-document transaction.
+ */
+ void resetMultiDocumentTransactionState() {
+ invariant(_inMultiDocumentTransaction);
+ invariant(!_writeUnitOfWork);
+ invariant(_ruState == WriteUnitOfWork::RecoveryUnitState::kNotInUnitOfWork);
+ _inMultiDocumentTransaction = false;
+ _isStartingMultiDocumentTransaction = false;
+ _lsid = boost::none;
+ _txnNumber = boost::none;
+ }
+
+ /**
* Returns whether this operation is starting a multi-document transaction.
*/
bool isStartingMultiDocumentTransaction() const {
diff --git a/src/mongo/db/session_catalog_mongod.cpp b/src/mongo/db/session_catalog_mongod.cpp
index 15fc2edca02..7ae2190fe77 100644
--- a/src/mongo/db/session_catalog_mongod.cpp
+++ b/src/mongo/db/session_catalog_mongod.cpp
@@ -212,6 +212,7 @@ void abortInProgressTransactions(OperationContext* opCtx) {
"sessionId"_attr = txnRecord.getSessionId().toBSON(),
"txnNumber"_attr = txnRecord.getTxnNum());
txnParticipant.abortTransaction(opCtx);
+ opCtx->resetMultiDocumentTransactionState();
}
}
} // namespace