diff options
-rw-r--r-- | jstests/libs/write_concern_util.js | 13 | ||||
-rw-r--r-- | jstests/replsets/retryable_write_concern.js | 147 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog_fetcher.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_client_info.h | 5 | ||||
-rw-r--r-- | src/mongo/db/s/txn_two_phase_commit_cmds.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/service_entry_point_mongod.cpp | 6 |
6 files changed, 181 insertions, 15 deletions
diff --git a/jstests/libs/write_concern_util.js b/jstests/libs/write_concern_util.js index f2a9152c054..ed2678ff3f0 100644 --- a/jstests/libs/write_concern_util.js +++ b/jstests/libs/write_concern_util.js @@ -120,7 +120,7 @@ function runWriteConcernRetryabilityTest(priConn, secConn, cmd, kNodes, dbName, // Send a dummy write to this connection so it will have the Client object initialized. const secondPriConn = new Mongo(priConn.host); const testDB2 = secondPriConn.getDB(dbName); - assert.writeOK(testDB2.dummy.insert({x: 1}, {writeConcern: {w: kNodes}})); + assert.commandWorked(testDB2.dummy.insert({x: 1}, {writeConcern: {w: kNodes}})); if (setupFunc) { setupFunc(priConn); @@ -130,6 +130,17 @@ function runWriteConcernRetryabilityTest(priConn, secConn, cmd, kNodes, dbName, const testDB = priConn.getDB(dbName); checkWriteConcernTimedOut(testDB.runCommand(cmd)); + + // Retry the command on the new connection whose lastOp will be less than the main connection. + checkWriteConcernTimedOut(testDB2.runCommand(cmd)); + + // Retry the command on the main connection whose lastOp will not have changed. + checkWriteConcernTimedOut(testDB.runCommand(cmd)); + + // Bump forward the client lastOp on both connections and try again on both. + assert.commandWorked(testDB.dummy.insert({x: 2})); + assert.commandWorked(testDB2.dummy.insert({x: 3})); + checkWriteConcernTimedOut(testDB.runCommand(cmd)); checkWriteConcernTimedOut(testDB2.runCommand(cmd)); restartServerReplication(secConn); diff --git a/jstests/replsets/retryable_write_concern.js b/jstests/replsets/retryable_write_concern.js index 2678d1c5445..65f5d4ccad7 100644 --- a/jstests/replsets/retryable_write_concern.js +++ b/jstests/replsets/retryable_write_concern.js @@ -1,7 +1,7 @@ /** * Tests for making sure that retried writes will wait properly for writeConcern. * - * @tags: [uses_transactions] + * @tags: [uses_transactions, uses_prepare_transaction] */ (function() { @@ -9,6 +9,7 @@ load("jstests/libs/retryable_writes_util.js"); load("jstests/libs/write_concern_util.js"); + load("jstests/libs/feature_compatibility_version.js"); if (!RetryableWritesUtil.storageEngineSupportsRetryableWrites(jsTest.options().storageEngine)) { jsTestLog("Retryable writes are not supported, skipping test"); @@ -24,8 +25,17 @@ let priConn = replTest.getPrimary(); let secConn = replTest.getSecondary(); + // Stopping replication on secondaries can take up to 5 seconds normally. Set a small oplog + // getMore timeout so the test runs faster. + assert.commandWorked(secConn.adminCommand( + {configureFailPoint: 'setSmallOplogGetMoreMaxTimeMS', mode: 'alwaysOn'})); + let lsid = UUID(); + // Start at an arbitrary txnNumber. + let txnNumber = 31; + + txnNumber++; runWriteConcernRetryabilityTest(priConn, secConn, { @@ -33,11 +43,12 @@ documents: [{_id: 10}, {_id: 30}], ordered: false, lsid: {id: lsid}, - txnNumber: NumberLong(34), + txnNumber: NumberLong(txnNumber), writeConcern: {w: 'majority', wtimeout: 200}, }, kNodes); + txnNumber++; runWriteConcernRetryabilityTest(priConn, secConn, { @@ -47,11 +58,12 @@ ], ordered: false, lsid: {id: lsid}, - txnNumber: NumberLong(35), + txnNumber: NumberLong(txnNumber), writeConcern: {w: 'majority', wtimeout: 200}, }, kNodes); + txnNumber++; runWriteConcernRetryabilityTest(priConn, secConn, { @@ -59,11 +71,12 @@ deletes: [{q: {x: 1}, limit: 1}, {q: {y: 1}, limit: 1}], ordered: false, lsid: {id: lsid}, - txnNumber: NumberLong(36), + txnNumber: NumberLong(txnNumber), writeConcern: {w: 'majority', wtimeout: 200}, }, kNodes); + txnNumber++; runWriteConcernRetryabilityTest(priConn, secConn, { @@ -73,7 +86,7 @@ new: true, upsert: true, lsid: {id: lsid}, - txnNumber: NumberLong(37), + txnNumber: NumberLong(txnNumber), writeConcern: {w: 'majority', wtimeout: 200}, }, kNodes); @@ -81,9 +94,32 @@ runWriteConcernRetryabilityTest(priConn, secConn, { + setFeatureCompatibilityVersion: lastStableFCV, + writeConcern: {w: 'majority', wtimeout: 200}, + }, + kNodes, + 'admin'); + assert.commandWorked(priConn.adminCommand({setFeatureCompatibilityVersion: lastStableFCV})); + checkFCV(priConn.getDB('admin'), lastStableFCV); + + runWriteConcernRetryabilityTest(priConn, + secConn, + { + setFeatureCompatibilityVersion: latestFCV, + writeConcern: {w: 'majority', wtimeout: 200}, + }, + kNodes, + 'admin'); + assert.commandWorked(priConn.adminCommand({setFeatureCompatibilityVersion: latestFCV})); + checkFCV(priConn.getDB('admin'), latestFCV); + + txnNumber++; + runWriteConcernRetryabilityTest(priConn, + secConn, + { commitTransaction: 1, lsid: {id: lsid}, - txnNumber: NumberLong(38), + txnNumber: NumberLong(txnNumber), autocommit: false, writeConcern: {w: 'majority', wtimeout: 200}, }, @@ -95,7 +131,7 @@ documents: [{_id: 80}, {_id: 90}], ordered: false, lsid: {id: lsid}, - txnNumber: NumberLong(38), + txnNumber: NumberLong(txnNumber), readConcern: {level: 'snapshot'}, autocommit: false, startTransaction: true @@ -103,5 +139,102 @@ }); + txnNumber++; + runWriteConcernRetryabilityTest(priConn, + secConn, + { + prepareTransaction: 1, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + autocommit: false, + writeConcern: {w: 'majority', wtimeout: 200}, + }, + kNodes, + 'admin', + function(conn) { + assert.commandWorked(conn.getDB('test').runCommand({ + insert: 'user', + documents: [{_id: 100}, {_id: 110}], + ordered: false, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + readConcern: {level: 'snapshot'}, + autocommit: false, + startTransaction: true + })); + }); + assert.commandWorked(priConn.adminCommand({ + abortTransaction: 1, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + autocommit: false, + writeConcern: {w: 'majority'}, + })); + + txnNumber++; + runWriteConcernRetryabilityTest(priConn, + secConn, + { + abortTransaction: 1, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + autocommit: false, + writeConcern: {w: 'majority', wtimeout: 200}, + }, + kNodes, + 'admin', + function(conn) { + assert.commandWorked(conn.getDB('test').runCommand({ + insert: 'user', + documents: [{_id: 120}, {_id: 130}], + ordered: false, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + readConcern: {level: 'snapshot'}, + autocommit: false, + startTransaction: true + })); + assert.commandWorked(conn.adminCommand({ + prepareTransaction: 1, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + autocommit: false, + writeConcern: {w: 'majority'}, + })); + }); + + txnNumber++; + assert.commandWorked(priConn.getDB('test').runCommand({ + insert: 'user', + documents: [{_id: 140}, {_id: 150}], + ordered: false, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + readConcern: {level: 'snapshot'}, + autocommit: false, + startTransaction: true + })); + const prepareTS = assert + .commandWorked(priConn.adminCommand({ + prepareTransaction: 1, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + autocommit: false, + writeConcern: {w: 'majority'}, + })) + .prepareTimestamp; + runWriteConcernRetryabilityTest(priConn, + secConn, + { + commitTransaction: 1, + commitTimestamp: prepareTS, + lsid: {id: lsid}, + txnNumber: NumberLong(txnNumber), + autocommit: false, + writeConcern: {w: 'majority', wtimeout: 200}, + }, + kNodes, + 'admin'); + replTest.stopSet(); })(); diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp index 25a320812eb..9c4df1a1bec 100644 --- a/src/mongo/db/repl/oplog_fetcher.cpp +++ b/src/mongo/db/repl/oplog_fetcher.cpp @@ -52,6 +52,7 @@ Seconds OplogFetcher::kDefaultProtocolZeroAwaitDataTimeout(2); MONGO_FAIL_POINT_DEFINE(stopReplProducer); MONGO_FAIL_POINT_DEFINE(stopReplProducerOnDocument); +MONGO_FAIL_POINT_DEFINE(setSmallOplogGetMoreMaxTimeMS); namespace { @@ -391,6 +392,10 @@ Milliseconds OplogFetcher::getAwaitDataTimeout_forTest() const { } Milliseconds OplogFetcher::_getGetMoreMaxTime() const { + if (MONGO_FAIL_POINT(setSmallOplogGetMoreMaxTimeMS)) { + return Milliseconds(50); + } + return _awaitDataTimeout; } diff --git a/src/mongo/db/repl/repl_client_info.h b/src/mongo/db/repl/repl_client_info.h index 5f8e19cc6be..9f45ec49a24 100644 --- a/src/mongo/db/repl/repl_client_info.h +++ b/src/mongo/db/repl/repl_client_info.h @@ -46,6 +46,11 @@ class ReplClientInfo { public: static const Client::Decoration<ReplClientInfo> forClient; + /** + * Sets the LastOp to the provided op, which MUST be greater than or equal to the current value + * of the LastOp. This also marks that the LastOp was set explicitly on the client so we wait + * for write concern. + */ void setLastOp(OperationContext* opCtx, const OpTime& op); OpTime getLastOp() const { diff --git a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp index eef32a087a5..e7369ceb69c 100644 --- a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp +++ b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp @@ -108,14 +108,20 @@ public: auto& replClient = repl::ReplClientInfo::forClient(opCtx->getClient()); auto prepareOpTime = txnParticipant.getPrepareOpTime(); - // Ensure waiting for writeConcern of the prepare OpTime. + // Ensure waiting for writeConcern of the prepare OpTime. If the node has failed + // over, then we want to wait on an OpTime in the new term, so we wait on the + // lastApplied OpTime. If we've gotten to this point, then we are guaranteed that + // the transaction was prepared at this prepareOpTime on this branch of history and + // that waiting on this lastApplied OpTime waits on the prepareOpTime as well. + replClient.setLastOpToSystemLastOpTime(opCtx); + + // TODO SERVER-39364: Due to a bug in setlastOpToSystemLastOpTime, the prepareOpTime + // may still be greater than the lastApplied. In that case we make sure that we wait + // on the prepareOpTime which is guaranteed to be in the current term. SERVER-39364 + // can remove this extra setLastOp() call and just rely on the call to + // setLastOpToSystemLastOpTime() above. if (prepareOpTime > replClient.getLastOp()) { - // In case this node has failed over, in which case the term will have - // increased, set the Client's last OpTime to the larger of the system last - // OpTime and the prepare OpTime. - const auto systemLastOpTime = - repl::ReplicationCoordinator::get(opCtx)->getMyLastAppliedOpTime(); - replClient.setLastOp(opCtx, std::max(prepareOpTime, systemLastOpTime)); + replClient.setLastOp(opCtx, prepareOpTime); } invariant(opCtx->recoveryUnit()->getPrepareTimestamp() == diff --git a/src/mongo/db/service_entry_point_mongod.cpp b/src/mongo/db/service_entry_point_mongod.cpp index 09c60294c18..5435e14b49a 100644 --- a/src/mongo/db/service_entry_point_mongod.cpp +++ b/src/mongo/db/service_entry_point_mongod.cpp @@ -140,6 +140,12 @@ public: return; } + // Waits for write concern if we tried to explicitly set the lastOp forward but lastOp was + // already up to date. We still want to wait for write concern on the lastOp. This is + // primarily to make sure back to back retryable write retries still wait for write concern. + // + // WARNING: Retryable writes that expect to wait for write concern on retries must ensure + // this is entered by calling setLastOp() or setLastOpToSystemLastOpTime(). if (repl::ReplClientInfo::forClient(opCtx->getClient()) .lastOpWasSetExplicitlyByClientForCurrentOperation(opCtx)) { waitForWriteConcernAndAppendStatus(); |