diff options
15 files changed, 475 insertions, 201 deletions
diff --git a/jstests/core/txns/aggregation_in_transaction.js b/jstests/core/txns/aggregation_in_transaction.js index 76c5f4d6a0a..e6f9dffa3a6 100644 --- a/jstests/core/txns/aggregation_in_transaction.js +++ b/jstests/core/txns/aggregation_in_transaction.js @@ -5,6 +5,10 @@ load("jstests/libs/fixture_helpers.js"); // For isSharded. +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For withTxnAndAutoRetryOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const session = db.getMongo().startSession({causalConsistency: false}); const testDB = session.getDatabase("test"); const coll = testDB.getCollection("aggregation_in_transaction"); @@ -31,76 +35,90 @@ assert.commandWorked(foreignColl.insert(foreignDoc, {writeConcern: {w: "majority const isForeignSharded = FixtureHelpers.isSharded(foreignColl); -// Run a dummy find to start the transaction. -jsTestLog("Starting transaction."); -session.startTransaction({readConcern: {level: "snapshot"}}); -let cursor = coll.find(); -cursor.next(); - -// Insert a document outside of the transaction. Subsequent aggregations should not see this -// document. -jsTestLog("Inserting document outside of transaction."); -assert.commandWorked(db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).insert({ - _id: "not_visible_in_transaction", - foreignKey: "orange", -})); - -// Perform an aggregation that is fed by a cursor on the underlying collection. Only the -// majority-committed document present at the start of the transaction should be found. -jsTestLog("Starting aggregations inside of the transaction."); -cursor = coll.aggregate({$match: {}}); -assert.docEq(testDoc, cursor.next()); -assert(!cursor.hasNext()); - -// Perform aggregations that look at other collections. -// TODO: SERVER-39162 Sharded $lookup is not supported in transactions. -if (!isForeignSharded) { - const lookupDoc = Object.extend(testDoc, {lookup: [foreignDoc]}); - cursor = coll.aggregate({ - $lookup: { - from: foreignColl.getName(), - localField: "foreignKey", - foreignField: "_id", - as: "lookup", - } - }); - assert.docEq(cursor.next(), lookupDoc); - assert(!cursor.hasNext()); +const txnOptions = { + readConcern: {level: "snapshot"} +}; - cursor = coll.aggregate({ - $graphLookup: { - from: foreignColl.getName(), - startWith: "$foreignKey", - connectFromField: "foreignKey", - connectToField: "_id", - as: "lookup" - } - }); - assert.docEq(cursor.next(), lookupDoc); +// TODO (SERVER-39704): We use the withTxnAndAutoRetryOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// withTxnAndAutoRetryOnMongos function can be removed +withTxnAndAutoRetryOnMongos(session, () => { + // Cleaning collection in case the transaction is retried + db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).remove({ + _id: "not_visible_in_transaction" + }); + + // Run a dummy find to start the transaction. + jsTestLog("Transaction started."); + + let cursor = coll.find(); + cursor.next(); + + // Insert a document outside of the transaction. Subsequent aggregations should not see this + // document. + jsTestLog("Inserting document outside of transaction."); + assert.commandWorked(db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).insert({ + _id: "not_visible_in_transaction", + foreignKey: "orange", + })); + + // Perform an aggregation that is fed by a cursor on the underlying collection. Only the + // majority-committed document present at the start of the transaction should be found. + jsTestLog("Starting aggregations inside of the transaction."); + cursor = coll.aggregate({$match: {}}); + assert.docEq(testDoc, cursor.next()); assert(!cursor.hasNext()); -} else { - // TODO SERVER-39048: Test that $lookup on sharded collection is banned - // within a transaction. -} -jsTestLog("Testing $count within a transaction."); + // Perform aggregations that look at other collections. + // TODO: SERVER-39162 Sharded $lookup is not supported in transactions. + if (!isForeignSharded) { + const lookupDoc = Object.merge(testDoc, {lookup: [foreignDoc]}); + cursor = coll.aggregate({ + $lookup: { + from: foreignColl.getName(), + localField: "foreignKey", + foreignField: "_id", + as: "lookup", + } + }); + assert.docEq(cursor.next(), lookupDoc); + assert(!cursor.hasNext()); + + cursor = coll.aggregate({ + $graphLookup: { + from: foreignColl.getName(), + startWith: "$foreignKey", + connectFromField: "foreignKey", + connectToField: "_id", + as: "lookup" + } + }); + assert.docEq(cursor.next(), lookupDoc); + assert(!cursor.hasNext()); + } else { + // TODO SERVER-39048: Test that $lookup on sharded collection is banned + // within a transaction. + } + + jsTestLog("Testing $count within a transaction."); -let countRes = coll.aggregate([{$count: "count"}]).toArray(); -assert.eq(countRes.length, 1, tojson(countRes)); -assert.eq(countRes[0].count, 1, tojson(countRes)); + let countRes = coll.aggregate([{$count: "count"}]).toArray(); + assert.eq(countRes.length, 1, tojson(countRes)); + assert.eq(countRes[0].count, 1, tojson(countRes)); -assert.commandWorked(coll.insert({a: 2})); -countRes = coll.aggregate([{$count: "count"}]).toArray(); -assert.eq(countRes.length, 1, tojson(countRes)); -assert.eq(countRes[0].count, 2, tojson(countRes)); + assert.commandWorked(coll.insert({a: 2})); + countRes = coll.aggregate([{$count: "count"}]).toArray(); + assert.eq(countRes.length, 1, tojson(countRes)); + assert.eq(countRes[0].count, 2, tojson(countRes)); -assert.commandWorked( - db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).insert({a: 3})); -countRes = coll.aggregate([{$count: "count"}]).toArray(); -assert.eq(countRes.length, 1, tojson(countRes)); -assert.eq(countRes[0].count, 2, tojson(countRes)); + assert.commandWorked( + db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).insert({a: 3})); + countRes = coll.aggregate([{$count: "count"}]).toArray(); + assert.eq(countRes.length, 1, tojson(countRes)); + assert.eq(countRes[0].count, 2, tojson(countRes)); +}, txnOptions); -assert.commandWorked(session.commitTransaction_forTesting()); jsTestLog("Transaction committed."); // Perform aggregations with non-cursor initial sources and assert that they are not supported diff --git a/jstests/core/txns/basic_causal_consistency.js b/jstests/core/txns/basic_causal_consistency.js index 5a78ddc0900..beda5fa1d91 100644 --- a/jstests/core/txns/basic_causal_consistency.js +++ b/jstests/core/txns/basic_causal_consistency.js @@ -3,6 +3,10 @@ (function() { "use strict"; +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For withTxnAndAutoRetryOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName = "test"; const collName = "basic_causal_consistency"; const testDB = db.getSiblingDB(dbName); @@ -18,18 +22,20 @@ const session = testDB.getMongo().startSession(sessionOptions); const sessionDb = session.getDatabase(dbName); const sessionColl = sessionDb.getCollection(collName); -session.startTransaction({readConcern: {level: "snapshot"}}); - -// Performing a read first should work when snapshot readConcern is specified. -assert.docEq(null, sessionColl.findOne({_id: "insert-1"})); - -assert.commandWorked(sessionColl.insert({_id: "insert-1"})); +// TODO (SERVER-39704): We use the withTxnAndAutoRetryOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// withTxnAndAutoRetryOnMongos function can be removed +withTxnAndAutoRetryOnMongos(session, () => { + // Performing a read first should work when snapshot readConcern is specified. + assert.docEq(null, sessionColl.findOne({_id: "insert-1"})); -assert.docEq(null, sessionColl.findOne({_id: "insert-2"})); + assert.commandWorked(sessionColl.insert({_id: "insert-1"})); -assert.docEq({_id: "insert-1"}, sessionColl.findOne({_id: "insert-1"})); + assert.docEq(null, sessionColl.findOne({_id: "insert-2"})); -assert.commandWorked(session.commitTransaction_forTesting()); + assert.docEq({_id: "insert-1"}, sessionColl.findOne({_id: "insert-1"})); +}); session.endSession(); }()); diff --git a/jstests/core/txns/commands_not_allowed_in_txn.js b/jstests/core/txns/commands_not_allowed_in_txn.js index fdb296b9d3a..8d161c8b21b 100644 --- a/jstests/core/txns/commands_not_allowed_in_txn.js +++ b/jstests/core/txns/commands_not_allowed_in_txn.js @@ -6,6 +6,10 @@ (function() { "use strict"; +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For retryOnceOnTransientOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName = "test"; const collName = "commands_not_allowed_in_txn"; const testDB = db.getSiblingDB(dbName); @@ -75,15 +79,22 @@ function testCommand(command) { // Check that the command fails inside a transaction, but does not abort the transaction. setup(); - assert.commandWorked(sessionDb.runCommand({ - insert: collName, - documents: [{}], - readConcern: {level: "snapshot"}, - txnNumber: NumberLong(++txnNumber), - stmtId: NumberInt(0), - startTransaction: true, - autocommit: false - })); + // TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos + // function to handle how MongoS will propagate a StaleShardVersion error as a + // TransientTransactionError. After SERVER-39704 is completed the + // retryOnceOnTransientOnMongos can be removed + retryOnceOnTransientOnMongos(session, () => { + assert.commandWorked(sessionDb.runCommand({ + insert: collName, + documents: [{}], + readConcern: {level: "snapshot"}, + txnNumber: NumberLong(++txnNumber), + stmtId: NumberInt(0), + startTransaction: true, + autocommit: false + })); + }); + res = assert.commandFailedWithCode( sessionDb.runCommand(Object.assign( {}, diff --git a/jstests/core/txns/concurrent_drops_and_creates.js b/jstests/core/txns/concurrent_drops_and_creates.js index 50e53d9e53e..2b1e7ce7f9f 100644 --- a/jstests/core/txns/concurrent_drops_and_creates.js +++ b/jstests/core/txns/concurrent_drops_and_creates.js @@ -4,6 +4,10 @@ (function() { "use strict"; +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For retryOnceOnTransientAndRestartTxnOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName1 = "test1"; const dbName2 = "test2"; const collNameA = "coll_A"; @@ -31,8 +35,18 @@ assert.commandWorked(sessionCollA.insert({})); assert.commandWorked(sessionCollB.insert({})); // Start the transaction with a write to collection A. -session.startTransaction({readConcern: {level: "snapshot"}}); -assert.commandWorked(sessionCollA.insert({})); +const txnOptions = { + readConcern: {level: "snapshot"} +}; +session.startTransaction(txnOptions); + +// TODO (SERVER-39704): We use the retryOnceOnTransientAndRestartTxnOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// retryOnceOnTransientAndRestartTxnOnMongos can be removed +retryOnceOnTransientAndRestartTxnOnMongos(session, () => { + assert.commandWorked(sessionCollA.insert({})); +}, txnOptions); // Drop collection B outside of the transaction. Advance the cluster time of the session // performing the drop to ensure it happens at a later cluster time than the transaction began. diff --git a/jstests/core/txns/finished_transaction_error_handling.js b/jstests/core/txns/finished_transaction_error_handling.js index 7cabb693fe5..2c22e606532 100644 --- a/jstests/core/txns/finished_transaction_error_handling.js +++ b/jstests/core/txns/finished_transaction_error_handling.js @@ -3,6 +3,10 @@ (function() { "use strict"; +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For retryOnceOnTransientOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName = "test"; const collName = "finished_transaction_error_handling"; const testDB = db.getSiblingDB(dbName); @@ -25,16 +29,23 @@ const session = db.getMongo().startSession(sessionOptions); const sessionDb = session.getDatabase(dbName); jsTestLog("Test aborted transaction number cannot be reused."); -txnNumber++; -assert.commandWorked(sessionDb.runCommand({ - insert: collName, - documents: [{_id: "abort-txn-1"}], - readConcern: {level: "snapshot"}, - txnNumber: NumberLong(txnNumber), - startTransaction: true, - stmtId: NumberInt(stmtId++), - autocommit: false -})); + +// TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// retryOnceOnTransientOnMongos can be removed +retryOnceOnTransientOnMongos(session, () => { + assert.commandWorked(sessionDb.runCommand({ + insert: collName, + documents: [{_id: "abort-txn-1"}], + readConcern: {level: "snapshot"}, + txnNumber: NumberLong(++txnNumber), + startTransaction: true, + stmtId: NumberInt(stmtId++), + autocommit: false + })); +}); + assert.commandWorked(sessionDb.adminCommand({ abortTransaction: 1, writeConcern: {w: "majority"}, diff --git a/jstests/core/txns/list_collections_not_blocked_by_txn.js b/jstests/core/txns/list_collections_not_blocked_by_txn.js index faf095129d2..fb9cf6340e4 100644 --- a/jstests/core/txns/list_collections_not_blocked_by_txn.js +++ b/jstests/core/txns/list_collections_not_blocked_by_txn.js @@ -3,6 +3,11 @@ // as a result of taking MODE_S locks that are incompatible with MODE_IX needed for writes. (function() { "use strict"; + +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For withTxnAndAutoRetryOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + var dbName = 'list_collections_not_blocked'; var mydb = db.getSiblingDB(dbName); var session = db.getMongo().startSession({causalConsistency: false}); @@ -21,23 +26,26 @@ if (isMongos) { assert.commandWorked(sessionDb.runCommand({listCollections: 1, nameOnly: true})); } -session.startTransaction({readConcern: {level: "snapshot"}}); - -assert.commandWorked(sessionDb.foo.insert({x: 1})); - -for (let nameOnly of [false, true]) { - // Check that both the nameOnly and full versions of listCollections don't block. - let res = mydb.runCommand({listCollections: 1, nameOnly, maxTimeMS: 20 * 1000}); - assert.commandWorked(res, "listCollections should have succeeded and not timed out"); - let collObj = res.cursor.firstBatch[0]; - // collObj should only have name and type fields. - assert.eq('foo', collObj.name); - assert.eq('collection', collObj.type); - assert(collObj.hasOwnProperty("idIndex") == !nameOnly, tojson(collObj)); - assert(collObj.hasOwnProperty("options") == !nameOnly, tojson(collObj)); - assert(collObj.hasOwnProperty("info") == !nameOnly, tojson(collObj)); -} +// TODO (SERVER-39704): We use the withTxnAndAutoRetryOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// withTxnAndAutoRetryOnMongos function can be removed +withTxnAndAutoRetryOnMongos(session, () => { + assert.commandWorked(sessionDb.foo.insert({x: 1})); + + for (let nameOnly of [false, true]) { + // Check that both the nameOnly and full versions of listCollections don't block. + let res = mydb.runCommand({listCollections: 1, nameOnly, maxTimeMS: 20 * 1000}); + assert.commandWorked(res, "listCollections should have succeeded and not timed out"); + let collObj = res.cursor.firstBatch[0]; + // collObj should only have name and type fields. + assert.eq('foo', collObj.name); + assert.eq('collection', collObj.type); + assert(collObj.hasOwnProperty("idIndex") == !nameOnly, tojson(collObj)); + assert(collObj.hasOwnProperty("options") == !nameOnly, tojson(collObj)); + assert(collObj.hasOwnProperty("info") == !nameOnly, tojson(collObj)); + } +}, {readConcern: {level: "snapshot"}}); -assert.commandWorked(session.commitTransaction_forTesting()); session.endSession(); }()); diff --git a/jstests/core/txns/multi_statement_transaction_abort.js b/jstests/core/txns/multi_statement_transaction_abort.js index a7946af8eda..7a8cf0b85df 100644 --- a/jstests/core/txns/multi_statement_transaction_abort.js +++ b/jstests/core/txns/multi_statement_transaction_abort.js @@ -3,6 +3,10 @@ (function() { "use strict"; +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For retryOnceOnTransientOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName = "test"; const collName = "multi_statement_transaction_abort"; const testDB = db.getSiblingDB(dbName); @@ -22,14 +26,21 @@ const sessionDb = session.getDatabase(dbName); jsTest.log("Insert two documents in a transaction and abort"); // Insert a doc within the transaction. -assert.commandWorked(sessionDb.runCommand({ - insert: collName, - documents: [{_id: "insert-1"}], - readConcern: {level: "snapshot"}, - txnNumber: NumberLong(txnNumber), - startTransaction: true, - autocommit: false -})); + +// TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// retryOnceOnTransientOnMongos can be removed +retryOnceOnTransientOnMongos(session, () => { + assert.commandWorked(sessionDb.runCommand({ + insert: collName, + documents: [{_id: "insert-1"}], + readConcern: {level: "snapshot"}, + txnNumber: NumberLong(++txnNumber), + startTransaction: true, + autocommit: false + })); +}); // Insert a doc within a transaction. assert.commandWorked(sessionDb.runCommand({ diff --git a/jstests/core/txns/multi_statement_transaction_using_api.js b/jstests/core/txns/multi_statement_transaction_using_api.js index 910fa45c68b..f1b961f9fdc 100644 --- a/jstests/core/txns/multi_statement_transaction_using_api.js +++ b/jstests/core/txns/multi_statement_transaction_using_api.js @@ -3,6 +3,10 @@ (function() { "use strict"; +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For withTxnAndAutoRetryOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName = "test"; const collName = "multi_transaction_test_using_api"; const testDB = db.getSiblingDB(dbName); @@ -39,37 +43,40 @@ session.startTransaction({readConcern: {level: "snapshot"}, writeConcern: {w: "m assert.commandWorked(session.commitTransaction_forTesting()); jsTestLog("Run CRUD ops, read ops, and commit transaction."); -session.startTransaction({readConcern: {level: "snapshot"}, writeConcern: {w: "majority"}}); -// Performing a read first should work when snapshot readConcern is specified. -assert.docEq(null, sessionColl.findOne({_id: "insert-1"})); +// TODO (SERVER-39704): We use the withTxnAndAutoRetryOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// withTxnAndAutoRetryOnMongos function can be removed +withTxnAndAutoRetryOnMongos(session, () => { + // Performing a read first should work when snapshot readConcern is specified. + assert.docEq(null, sessionColl.findOne({_id: "insert-1"})); -assert.commandWorked(sessionColl.insert({_id: "insert-1", a: 0})); + assert.commandWorked(sessionColl.insert({_id: "insert-1", a: 0})); -assert.commandWorked(sessionColl.insert({_id: "insert-2", a: 0})); + assert.commandWorked(sessionColl.insert({_id: "insert-2", a: 0})); -assert.commandWorked(sessionColl.insert({_id: "insert-3", a: 0})); + assert.commandWorked(sessionColl.insert({_id: "insert-3", a: 0})); -assert.commandWorked(sessionColl.update({_id: "insert-1"}, {$inc: {a: 1}})); + assert.commandWorked(sessionColl.update({_id: "insert-1"}, {$inc: {a: 1}})); -assert.commandWorked(sessionColl.deleteOne({_id: "insert-2"})); + assert.commandWorked(sessionColl.deleteOne({_id: "insert-2"})); -sessionColl.findAndModify({query: {_id: "insert-3"}, update: {$set: {a: 2}}}); + sessionColl.findAndModify({query: {_id: "insert-3"}, update: {$set: {a: 2}}}); -// Try to find a document within a transaction. -let cursor = sessionColl.find({_id: "insert-1"}); -assert.docEq({_id: "insert-1", a: 1}, cursor.next()); -assert(!cursor.hasNext()); + // Try to find a document within a transaction. + let cursor = sessionColl.find({_id: "insert-1"}); + assert.docEq({_id: "insert-1", a: 1}, cursor.next()); + assert(!cursor.hasNext()); -// Try to find a document using findOne within a transaction -assert.eq({_id: "insert-1", a: 1}, sessionColl.findOne({_id: "insert-1"})); + // Try to find a document using findOne within a transaction + assert.eq({_id: "insert-1", a: 1}, sessionColl.findOne({_id: "insert-1"})); -// Find a document with the aggregation shell helper within a transaction. -cursor = sessionColl.aggregate({$match: {_id: "insert-1"}}); -assert.docEq({_id: "insert-1", a: 1}, cursor.next()); -assert(!cursor.hasNext()); - -assert.commandWorked(session.commitTransaction_forTesting()); + // Find a document with the aggregation shell helper within a transaction. + cursor = sessionColl.aggregate({$match: {_id: "insert-1"}}); + assert.docEq({_id: "insert-1", a: 1}, cursor.next()); + assert(!cursor.hasNext()); +}, {readConcern: {level: "snapshot"}, writeConcern: {w: "majority"}}); // Make sure the correct documents exist after committing the transaciton. assert.eq({_id: "insert-1", a: 1}, sessionColl.findOne({_id: "insert-1"})); diff --git a/jstests/core/txns/no_read_or_write_concern_inside_txn.js b/jstests/core/txns/no_read_or_write_concern_inside_txn.js index b8333eed92f..165a0c99fb1 100644 --- a/jstests/core/txns/no_read_or_write_concern_inside_txn.js +++ b/jstests/core/txns/no_read_or_write_concern_inside_txn.js @@ -7,6 +7,11 @@ (function() { "use strict"; + +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For retryOnceOnTransientOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName = "test"; const collName = "no_read_or_write_concerns_inside_txn"; const testDB = db.getSiblingDB(dbName); @@ -27,15 +32,22 @@ let txnNumber = 0; let stmtId = 0; jsTestLog("Starting first transaction"); -assert.commandWorked(sessionDb.runCommand({ - insert: collName, - documents: [{_id: 0}], - readConcern: {level: "snapshot"}, - startTransaction: true, - autocommit: false, - txnNumber: NumberLong(txnNumber), - stmtId: NumberInt(stmtId++) -})); +// Insert a doc within the transaction. +// TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// retryOnceOnTransientOnMongos can be removed +retryOnceOnTransientOnMongos(session, () => { + assert.commandWorked(sessionDb.runCommand({ + insert: collName, + documents: [{_id: 0}], + readConcern: {level: "snapshot"}, + startTransaction: true, + autocommit: false, + txnNumber: NumberLong(++txnNumber), + stmtId: NumberInt(stmtId++) + })); +}); jsTestLog("Attempting to insert with readConcern: snapshot within a transaction."); assert.commandFailedWithCode(sessionDb.runCommand({ diff --git a/jstests/core/txns/non_transactional_operations_on_session_with_transaction.js b/jstests/core/txns/non_transactional_operations_on_session_with_transaction.js index 8fb9b6b5a3e..d3c26b49de8 100644 --- a/jstests/core/txns/non_transactional_operations_on_session_with_transaction.js +++ b/jstests/core/txns/non_transactional_operations_on_session_with_transaction.js @@ -9,6 +9,10 @@ (function() { "use strict"; +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For retryOnceOnTransientOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName = "test"; const collName = "non_transactional_operations_on_session_with_transactions"; @@ -51,14 +55,20 @@ const doc2 = { }; // Insert a document in a transaction. -assert.commandWorked(sessionDb.runCommand({ - insert: collName, - documents: [doc1], - readConcern: {level: "snapshot"}, - txnNumber: NumberLong(txnNumber), - startTransaction: true, - autocommit: false -})); +// TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// retryOnceOnTransientOnMongos can be removed +retryOnceOnTransientOnMongos(session, () => { + assert.commandWorked(sessionDb.runCommand({ + insert: collName, + documents: [doc1], + readConcern: {level: "snapshot"}, + txnNumber: NumberLong(++txnNumber), + startTransaction: true, + autocommit: false + })); +}); // Test that we cannot observe the insert outside of the transaction. assert.eq(null, testColl.findOne(doc1)); diff --git a/jstests/core/txns/repeatable_reads_in_transaction.js b/jstests/core/txns/repeatable_reads_in_transaction.js index 870a1d58e6f..fbfb34ed1de 100644 --- a/jstests/core/txns/repeatable_reads_in_transaction.js +++ b/jstests/core/txns/repeatable_reads_in_transaction.js @@ -4,6 +4,10 @@ (function() { "use strict"; +// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed +// For retryOnceOnTransientAndRestartTxnOnMongos. +load('jstests/libs/auto_retry_transaction_in_sharding.js'); + const dbName = "test"; const collName = "repeatable_reads_in_transaction"; const testDB = db.getSiblingDB(dbName); @@ -24,6 +28,9 @@ const sessionColl = sessionDb.getCollection(collName); const session2 = testDB.getMongo().startSession(sessionOptions); const session2Db = session2.getDatabase(dbName); const session2Coll = session2Db.getCollection(collName); +const txnOptions = { + writeConcern: {w: "majority"} +}; jsTest.log("Prepopulate the collection."); assert.commandWorked( @@ -35,14 +42,21 @@ assert.commandWorked( const expectedDocs = [{_id: 0}, {_id: 1}, {_id: 2}]; jsTestLog("Start a read-only transaction on the first session."); -session.startTransaction({writeConcern: {w: "majority"}}); +session.startTransaction(txnOptions); assert.sameMembers(expectedDocs, sessionColl.find().toArray()); jsTestLog("Start a transaction on the second session that modifies the same collection."); session2.startTransaction({readConcern: {level: "snapshot"}, writeConcern: {w: "majority"}}); -assert.commandWorked(session2Coll.insert({_id: 3})); +// TODO (SERVER-39704): We use the retryOnceOnTransientAndRestartTxnOnMongos +// function to handle how MongoS will propagate a StaleShardVersion error as a +// TransientTransactionError. After SERVER-39704 is completed the +// retryOnceOnTransientAndRestartTxnOnMongos can be removed +retryOnceOnTransientAndRestartTxnOnMongos(session2, () => { + assert.commandWorked(session2Coll.insert({_id: 3})); +}, txnOptions); + assert.commandWorked(session2Coll.update({_id: 1}, {$set: {a: 1}})); assert.commandWorked(session2Coll.deleteOne({_id: 2})); diff --git a/jstests/libs/auto_retry_transaction_in_sharding.js b/jstests/libs/auto_retry_transaction_in_sharding.js new file mode 100644 index 00000000000..05f1f5c9374 --- /dev/null +++ b/jstests/libs/auto_retry_transaction_in_sharding.js @@ -0,0 +1,85 @@ +'use strict'; + +load('jstests/concurrency/fsm_workload_helpers/auto_retry_transaction.js'); +Random.setRandomSeed(); + +var { + withTxnAndAutoRetryOnMongos, + retryOnceOnTransientOnMongos, + retryOnceOnTransientAndRestartTxnOnMongos +} = (() => { + /** + * Runs 'func' inside of a transaction started with 'txnOptions', and automatically retries + * until it either succeeds or the server returns a non-TransientTransactionError error + * response. + * + * The caller should take care to ensure 'func' doesn't modify any captured variables in a + * speculative fashion where calling it multiple times would lead to unintended behavior. The + * transaction started by the withTxnAndAutoRetryOnMongos() function is only known to have + * committed after the withTxnAndAutoRetryOnMongos() function returns. + * + * This behaviour only applies if the client is a mongos + */ + function withTxnAndAutoRetryOnMongos(session, func, txnOptions) { + if (session.getClient().isMongos()) { + withTxnAndAutoRetry(session, func, {txnOptions}); + } else { + session.startTransaction(txnOptions); + func(); + assert.commandWorked(session.commitTransaction_forTesting()); + } + } + + /** + * Runs 'func' and retries it only once if a transient error occurred. + * + * This behaviour only applies if the client is a mongos + */ + function retryOnceOnTransientOnMongos(session, func) { + if (session.getClient().isMongos()) { + try { + func(); + } catch (e) { + if ((e.hasOwnProperty('errorLabels') && + e.errorLabels.includes('TransientTransactionError'))) { + func(); + } else { + throw e; + } + } + } else { + func(); + } + } + + /** + * Runs 'func' and retries it only once restarting the transaction if a transient + * error occurred. + * + * This behaviour only applies if the client is a mongos + */ + function retryOnceOnTransientAndRestartTxnOnMongos(session, func, txnOptions) { + if (session.getClient().isMongos()) { + try { + func(); + } catch (e) { + if ((e.hasOwnProperty('errorLabels') && + e.errorLabels.includes('TransientTransactionError'))) { + session.abortTransaction_forTesting(); + session.startTransaction(txnOptions); + func(); + } else { + throw e; + } + } + } else { + func(); + } + } + + return { + withTxnAndAutoRetryOnMongos, + retryOnceOnTransientOnMongos, + retryOnceOnTransientAndRestartTxnOnMongos + }; +})();
\ No newline at end of file diff --git a/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js b/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js deleted file mode 100644 index 36d08d02279..00000000000 --- a/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Tests that after a restart on a shard a multi-write operation will not be repeated - * - * This test requrires persistence because it asumes the shard will still have it's - * data after restarting - * Remove requires_fcv_44 tag if SERVER-44598 is backported or 4.4 becomes last stable - * @tags: [requires_persistence, requires_fcv_44] - */ -(function() { -'use strict'; -var st = new ShardingTest({shards: 2, mongos: 1}); - -st.enableSharding('TestDB'); -st.ensurePrimaryShard('TestDB', st.rs1.getURL()); - -// Hash shard the collection so that the chunks are placed uniformly across the shards from the -// beginning (because the collection is empty) -st.shardCollection('TestDB.TestColl', {Key: 'hashed'}, false, {numInitialChunks: 120}); -// Helper function to restart a node and wait that the entire set is operational - -// Insert a document outside the shard key range so the update below will generate a scather- -// gather write operation -st.s0.getDB('TestDB').TestColl.insert({X: 'Key 0', inc: 0}); - -// Restart shard 0, given the fact that the ssv flag is true, then -// mongos should send the info only once - -st.restartShardRS(0); - -// Do a scather-gather write operation. One of the shards have been restarted -// so StaleShardVersion should be returned, and the multi-write should be -// executed only once per shard -var bulkOp = st.s0.getDB('TestDB').TestColl.initializeUnorderedBulkOp(); -bulkOp.find({X: 'Key 0'}).update({$inc: {inc: 1}}); -bulkOp.execute(); - -// Make sure inc have been incremented only 1 time -assert.eq(1, st.s0.getDB('TestDB').TestColl.findOne({X: 'Key 0'}).inc); -st.stop(); -})(); diff --git a/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js b/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js new file mode 100644 index 00000000000..700d47990f8 --- /dev/null +++ b/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js @@ -0,0 +1,95 @@ +/** + * Tests that after a restart on a shard multi write operations, finds and aggregations + * work as expected on stale routers + * + * This test requrires persistence because it asumes the shard will still have it's + * data after restarting + * TODO: Remove requires_fcv_44 tag if SERVER-32198 is backported or 4.4 becomes last + * @tags: [requires_persistence, requires_fcv_44] + */ +(function() { +'use strict'; +// TODO (SERVER-32198) remove after SERVER-32198 is fixed +TestData.skipCheckOrphans = true; + +var st = new ShardingTest({shards: 2, mongos: 2}); + +// Used to get the shard destination ids for the moveChunks commands +var shard0Name = st.shard0.shardName; +var shard1Name = st.shard1.shardName; + +var database = 'TestDB'; +st.enableSharding(database); +// Creates and shard collName with 2 chunks, one per shard. Only one router knows that collName +// is sharded, and all the shards are restarted so they don't have the collection's sharding status +function setupCollectionForTest(collName) { + var ns = database + '.' + collName; + assert.commandFailedWithCode(st.s0.adminCommand({getShardVersion: ns}), + ErrorCodes.NamespaceNotSharded); + assert.commandFailedWithCode(st.s1.adminCommand({getShardVersion: ns}), + ErrorCodes.NamespaceNotSharded); + st.shardCollection(ns, {Key: 1}); + + st.s0.adminCommand({split: ns, middle: {Key: 0}}); + st.s0.adminCommand({moveChunk: ns, find: {Key: -1}, to: shard0Name}); + st.s0.adminCommand({moveChunk: ns, find: {Key: 0}, to: shard1Name}); + assert.commandFailedWithCode(st.s1.adminCommand({getShardVersion: ns}), + ErrorCodes.NamespaceNotSharded); + + // This document will go to shard 0 + assert.commandWorked(st.s0.getDB(database).getCollection(collName).insert({Key: -1, inc: 0})); + // This document will go to shard 1 + assert.commandWorked(st.s0.getDB(database).getCollection(collName).insert({Key: 0, inc: 0})); + st.restartShardRS(0); + st.restartShardRS(1); +} + +// Test a multi insert with collection unknown on a stale mongos +setupCollectionForTest('TestColl'); + +var bulkOp = st.s1.getDB('TestDB').TestColl.initializeUnorderedBulkOp(); +bulkOp.insert({Key: -2}); +bulkOp.insert({Key: 1}); +bulkOp.execute(); + +assert.neq(4, st.s0.getDB('TestDB').TestColl.find().itcount()); +// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq +assert.neq(4, st.s1.getDB('TestDB').TestColl.find().itcount()); + +// Test multi update with collection unknown on a stale mongos +setupCollectionForTest('TestUpdateColl'); + +var updateBulkOp = st.s1.getDB('TestDB').TestUpdateColl.initializeUnorderedBulkOp(); +updateBulkOp.find({}).update({$inc: {inc: 1}}); +updateBulkOp.execute(); +var s0Doc = st.s0.getDB('TestDB').TestUpdateColl.findOne({Key: -1}); +// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq +assert.neq(1, s0Doc.inc); +var s1Doc = st.s0.getDB('TestDB').TestUpdateColl.findOne({Key: 0}); +assert.eq(1, s1Doc.inc); + +// Test multi remove with collection unknown on a stale mongos +setupCollectionForTest('TestRemoveColl'); + +var removeBulkOp = st.s1.getDB('TestDB').TestRemoveColl.initializeUnorderedBulkOp(); +removeBulkOp.find({}).remove({}); +removeBulkOp.execute(); +// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq +assert.neq(0, st.s0.getDB('TestDB').TestRemoveColl.find().itcount()); + +// Test find with collection unknown on a stale mongos +setupCollectionForTest('TestFindColl'); + +var coll = st.s1.getDB('TestDB').TestFindColl.find().toArray(); +// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq +assert.neq(2, coll.length); + +// Test aggregate with collection unknown on a stale mongos +setupCollectionForTest('TestAggregateColl'); + +var count = st.s1.getDB('TestDB').TestAggregateColl.aggregate([{$count: 'total'}]).toArray(); +// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq +assert.neq(2, count[0].total); + +st.stop(); +})(); diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp index 4d91e633550..becd6afd89c 100644 --- a/src/mongo/db/s/collection_sharding_runtime.cpp +++ b/src/mongo/db/s/collection_sharding_runtime.cpp @@ -123,11 +123,21 @@ CollectionShardingRuntime* CollectionShardingRuntime::get_UNSAFE(ServiceContext* } ScopedCollectionFilter CollectionShardingRuntime::getOwnershipFilter(OperationContext* opCtx) { + const auto optReceivedShardVersion = getOperationReceivedVersion(opCtx, _nss); + if (!optReceivedShardVersion) + return {kUnshardedCollection}; + const auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime(); auto optMetadata = _getMetadataWithVersionCheckAt(opCtx, atClusterTime); - if (!optMetadata) - return {kUnshardedCollection}; + uassert(StaleConfigInfo(_nss, + *optReceivedShardVersion, + getCurrentShardVersionIfKnown(), + ShardingState::get(opCtx)->shardId()), + str::stream() << "sharding status of collection " << _nss.ns() + << " is not currently available for filtering and needs to be recovered " + << "from the config server", + optMetadata); return {std::move(*optMetadata)}; } @@ -365,7 +375,9 @@ CollectionShardingRuntime::_getMetadataWithVersionCheckAt( if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) { uassert(std::move(sci), - "no metadata found on multi-write operation, need to refresh", + str::stream() + << "sharding status of collection " << _nss.ns() + << " is not currently known and needs to be recovered from the config server", !receivedShardVersion.getCanThrowSSVOnIgnored() || _getCurrentMetadataIfKnown(atClusterTime)); return boost::none; |