summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos José Grillo Ramírez <marcos.grillo@mongodb.com>2020-03-02 13:20:40 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-03-04 20:32:41 +0000
commit246ed94e7fc900e034775c2d60bedde2bf92bce2 (patch)
tree6fdf189d27321b04cc554f61a3d05009103d58e5
parentc752f2b20efee7de94123787950415723c38b91f (diff)
downloadmongo-246ed94e7fc900e034775c2d60bedde2bf92bce2.tar.gz
SERVER-45779 Throw Stale Shard Version when collection is unknown on getOwnershipFilter
-rw-r--r--jstests/core/txns/aggregation_in_transaction.js144
-rw-r--r--jstests/core/txns/basic_causal_consistency.js24
-rw-r--r--jstests/core/txns/commands_not_allowed_in_txn.js29
-rw-r--r--jstests/core/txns/concurrent_drops_and_creates.js18
-rw-r--r--jstests/core/txns/finished_transaction_error_handling.js31
-rw-r--r--jstests/core/txns/list_collections_not_blocked_by_txn.js42
-rw-r--r--jstests/core/txns/multi_statement_transaction_abort.js27
-rw-r--r--jstests/core/txns/multi_statement_transaction_using_api.js49
-rw-r--r--jstests/core/txns/no_read_or_write_concern_inside_txn.js30
-rw-r--r--jstests/core/txns/non_transactional_operations_on_session_with_transaction.js26
-rw-r--r--jstests/core/txns/repeatable_reads_in_transaction.js18
-rw-r--r--jstests/libs/auto_retry_transaction_in_sharding.js85
-rw-r--r--jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js40
-rw-r--r--jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js95
-rw-r--r--src/mongo/db/s/collection_sharding_runtime.cpp18
15 files changed, 475 insertions, 201 deletions
diff --git a/jstests/core/txns/aggregation_in_transaction.js b/jstests/core/txns/aggregation_in_transaction.js
index 76c5f4d6a0a..e6f9dffa3a6 100644
--- a/jstests/core/txns/aggregation_in_transaction.js
+++ b/jstests/core/txns/aggregation_in_transaction.js
@@ -5,6 +5,10 @@
load("jstests/libs/fixture_helpers.js"); // For isSharded.
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For withTxnAndAutoRetryOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const session = db.getMongo().startSession({causalConsistency: false});
const testDB = session.getDatabase("test");
const coll = testDB.getCollection("aggregation_in_transaction");
@@ -31,76 +35,90 @@ assert.commandWorked(foreignColl.insert(foreignDoc, {writeConcern: {w: "majority
const isForeignSharded = FixtureHelpers.isSharded(foreignColl);
-// Run a dummy find to start the transaction.
-jsTestLog("Starting transaction.");
-session.startTransaction({readConcern: {level: "snapshot"}});
-let cursor = coll.find();
-cursor.next();
-
-// Insert a document outside of the transaction. Subsequent aggregations should not see this
-// document.
-jsTestLog("Inserting document outside of transaction.");
-assert.commandWorked(db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).insert({
- _id: "not_visible_in_transaction",
- foreignKey: "orange",
-}));
-
-// Perform an aggregation that is fed by a cursor on the underlying collection. Only the
-// majority-committed document present at the start of the transaction should be found.
-jsTestLog("Starting aggregations inside of the transaction.");
-cursor = coll.aggregate({$match: {}});
-assert.docEq(testDoc, cursor.next());
-assert(!cursor.hasNext());
-
-// Perform aggregations that look at other collections.
-// TODO: SERVER-39162 Sharded $lookup is not supported in transactions.
-if (!isForeignSharded) {
- const lookupDoc = Object.extend(testDoc, {lookup: [foreignDoc]});
- cursor = coll.aggregate({
- $lookup: {
- from: foreignColl.getName(),
- localField: "foreignKey",
- foreignField: "_id",
- as: "lookup",
- }
- });
- assert.docEq(cursor.next(), lookupDoc);
- assert(!cursor.hasNext());
+const txnOptions = {
+ readConcern: {level: "snapshot"}
+};
- cursor = coll.aggregate({
- $graphLookup: {
- from: foreignColl.getName(),
- startWith: "$foreignKey",
- connectFromField: "foreignKey",
- connectToField: "_id",
- as: "lookup"
- }
- });
- assert.docEq(cursor.next(), lookupDoc);
+// TODO (SERVER-39704): We use the withTxnAndAutoRetryOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// withTxnAndAutoRetryOnMongos function can be removed
+withTxnAndAutoRetryOnMongos(session, () => {
+ // Cleaning collection in case the transaction is retried
+ db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).remove({
+ _id: "not_visible_in_transaction"
+ });
+
+ // Run a dummy find to start the transaction.
+ jsTestLog("Transaction started.");
+
+ let cursor = coll.find();
+ cursor.next();
+
+ // Insert a document outside of the transaction. Subsequent aggregations should not see this
+ // document.
+ jsTestLog("Inserting document outside of transaction.");
+ assert.commandWorked(db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).insert({
+ _id: "not_visible_in_transaction",
+ foreignKey: "orange",
+ }));
+
+ // Perform an aggregation that is fed by a cursor on the underlying collection. Only the
+ // majority-committed document present at the start of the transaction should be found.
+ jsTestLog("Starting aggregations inside of the transaction.");
+ cursor = coll.aggregate({$match: {}});
+ assert.docEq(testDoc, cursor.next());
assert(!cursor.hasNext());
-} else {
- // TODO SERVER-39048: Test that $lookup on sharded collection is banned
- // within a transaction.
-}
-jsTestLog("Testing $count within a transaction.");
+ // Perform aggregations that look at other collections.
+ // TODO: SERVER-39162 Sharded $lookup is not supported in transactions.
+ if (!isForeignSharded) {
+ const lookupDoc = Object.merge(testDoc, {lookup: [foreignDoc]});
+ cursor = coll.aggregate({
+ $lookup: {
+ from: foreignColl.getName(),
+ localField: "foreignKey",
+ foreignField: "_id",
+ as: "lookup",
+ }
+ });
+ assert.docEq(cursor.next(), lookupDoc);
+ assert(!cursor.hasNext());
+
+ cursor = coll.aggregate({
+ $graphLookup: {
+ from: foreignColl.getName(),
+ startWith: "$foreignKey",
+ connectFromField: "foreignKey",
+ connectToField: "_id",
+ as: "lookup"
+ }
+ });
+ assert.docEq(cursor.next(), lookupDoc);
+ assert(!cursor.hasNext());
+ } else {
+ // TODO SERVER-39048: Test that $lookup on sharded collection is banned
+ // within a transaction.
+ }
+
+ jsTestLog("Testing $count within a transaction.");
-let countRes = coll.aggregate([{$count: "count"}]).toArray();
-assert.eq(countRes.length, 1, tojson(countRes));
-assert.eq(countRes[0].count, 1, tojson(countRes));
+ let countRes = coll.aggregate([{$count: "count"}]).toArray();
+ assert.eq(countRes.length, 1, tojson(countRes));
+ assert.eq(countRes[0].count, 1, tojson(countRes));
-assert.commandWorked(coll.insert({a: 2}));
-countRes = coll.aggregate([{$count: "count"}]).toArray();
-assert.eq(countRes.length, 1, tojson(countRes));
-assert.eq(countRes[0].count, 2, tojson(countRes));
+ assert.commandWorked(coll.insert({a: 2}));
+ countRes = coll.aggregate([{$count: "count"}]).toArray();
+ assert.eq(countRes.length, 1, tojson(countRes));
+ assert.eq(countRes[0].count, 2, tojson(countRes));
-assert.commandWorked(
- db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).insert({a: 3}));
-countRes = coll.aggregate([{$count: "count"}]).toArray();
-assert.eq(countRes.length, 1, tojson(countRes));
-assert.eq(countRes[0].count, 2, tojson(countRes));
+ assert.commandWorked(
+ db.getSiblingDB(testDB.getName()).getCollection(coll.getName()).insert({a: 3}));
+ countRes = coll.aggregate([{$count: "count"}]).toArray();
+ assert.eq(countRes.length, 1, tojson(countRes));
+ assert.eq(countRes[0].count, 2, tojson(countRes));
+}, txnOptions);
-assert.commandWorked(session.commitTransaction_forTesting());
jsTestLog("Transaction committed.");
// Perform aggregations with non-cursor initial sources and assert that they are not supported
diff --git a/jstests/core/txns/basic_causal_consistency.js b/jstests/core/txns/basic_causal_consistency.js
index 5a78ddc0900..beda5fa1d91 100644
--- a/jstests/core/txns/basic_causal_consistency.js
+++ b/jstests/core/txns/basic_causal_consistency.js
@@ -3,6 +3,10 @@
(function() {
"use strict";
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For withTxnAndAutoRetryOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName = "test";
const collName = "basic_causal_consistency";
const testDB = db.getSiblingDB(dbName);
@@ -18,18 +22,20 @@ const session = testDB.getMongo().startSession(sessionOptions);
const sessionDb = session.getDatabase(dbName);
const sessionColl = sessionDb.getCollection(collName);
-session.startTransaction({readConcern: {level: "snapshot"}});
-
-// Performing a read first should work when snapshot readConcern is specified.
-assert.docEq(null, sessionColl.findOne({_id: "insert-1"}));
-
-assert.commandWorked(sessionColl.insert({_id: "insert-1"}));
+// TODO (SERVER-39704): We use the withTxnAndAutoRetryOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// withTxnAndAutoRetryOnMongos function can be removed
+withTxnAndAutoRetryOnMongos(session, () => {
+ // Performing a read first should work when snapshot readConcern is specified.
+ assert.docEq(null, sessionColl.findOne({_id: "insert-1"}));
-assert.docEq(null, sessionColl.findOne({_id: "insert-2"}));
+ assert.commandWorked(sessionColl.insert({_id: "insert-1"}));
-assert.docEq({_id: "insert-1"}, sessionColl.findOne({_id: "insert-1"}));
+ assert.docEq(null, sessionColl.findOne({_id: "insert-2"}));
-assert.commandWorked(session.commitTransaction_forTesting());
+ assert.docEq({_id: "insert-1"}, sessionColl.findOne({_id: "insert-1"}));
+});
session.endSession();
}());
diff --git a/jstests/core/txns/commands_not_allowed_in_txn.js b/jstests/core/txns/commands_not_allowed_in_txn.js
index fdb296b9d3a..8d161c8b21b 100644
--- a/jstests/core/txns/commands_not_allowed_in_txn.js
+++ b/jstests/core/txns/commands_not_allowed_in_txn.js
@@ -6,6 +6,10 @@
(function() {
"use strict";
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For retryOnceOnTransientOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName = "test";
const collName = "commands_not_allowed_in_txn";
const testDB = db.getSiblingDB(dbName);
@@ -75,15 +79,22 @@ function testCommand(command) {
// Check that the command fails inside a transaction, but does not abort the transaction.
setup();
- assert.commandWorked(sessionDb.runCommand({
- insert: collName,
- documents: [{}],
- readConcern: {level: "snapshot"},
- txnNumber: NumberLong(++txnNumber),
- stmtId: NumberInt(0),
- startTransaction: true,
- autocommit: false
- }));
+ // TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos
+ // function to handle how MongoS will propagate a StaleShardVersion error as a
+ // TransientTransactionError. After SERVER-39704 is completed the
+ // retryOnceOnTransientOnMongos can be removed
+ retryOnceOnTransientOnMongos(session, () => {
+ assert.commandWorked(sessionDb.runCommand({
+ insert: collName,
+ documents: [{}],
+ readConcern: {level: "snapshot"},
+ txnNumber: NumberLong(++txnNumber),
+ stmtId: NumberInt(0),
+ startTransaction: true,
+ autocommit: false
+ }));
+ });
+
res = assert.commandFailedWithCode(
sessionDb.runCommand(Object.assign(
{},
diff --git a/jstests/core/txns/concurrent_drops_and_creates.js b/jstests/core/txns/concurrent_drops_and_creates.js
index 50e53d9e53e..2b1e7ce7f9f 100644
--- a/jstests/core/txns/concurrent_drops_and_creates.js
+++ b/jstests/core/txns/concurrent_drops_and_creates.js
@@ -4,6 +4,10 @@
(function() {
"use strict";
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For retryOnceOnTransientAndRestartTxnOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName1 = "test1";
const dbName2 = "test2";
const collNameA = "coll_A";
@@ -31,8 +35,18 @@ assert.commandWorked(sessionCollA.insert({}));
assert.commandWorked(sessionCollB.insert({}));
// Start the transaction with a write to collection A.
-session.startTransaction({readConcern: {level: "snapshot"}});
-assert.commandWorked(sessionCollA.insert({}));
+const txnOptions = {
+ readConcern: {level: "snapshot"}
+};
+session.startTransaction(txnOptions);
+
+// TODO (SERVER-39704): We use the retryOnceOnTransientAndRestartTxnOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// retryOnceOnTransientAndRestartTxnOnMongos can be removed
+retryOnceOnTransientAndRestartTxnOnMongos(session, () => {
+ assert.commandWorked(sessionCollA.insert({}));
+}, txnOptions);
// Drop collection B outside of the transaction. Advance the cluster time of the session
// performing the drop to ensure it happens at a later cluster time than the transaction began.
diff --git a/jstests/core/txns/finished_transaction_error_handling.js b/jstests/core/txns/finished_transaction_error_handling.js
index 7cabb693fe5..2c22e606532 100644
--- a/jstests/core/txns/finished_transaction_error_handling.js
+++ b/jstests/core/txns/finished_transaction_error_handling.js
@@ -3,6 +3,10 @@
(function() {
"use strict";
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For retryOnceOnTransientOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName = "test";
const collName = "finished_transaction_error_handling";
const testDB = db.getSiblingDB(dbName);
@@ -25,16 +29,23 @@ const session = db.getMongo().startSession(sessionOptions);
const sessionDb = session.getDatabase(dbName);
jsTestLog("Test aborted transaction number cannot be reused.");
-txnNumber++;
-assert.commandWorked(sessionDb.runCommand({
- insert: collName,
- documents: [{_id: "abort-txn-1"}],
- readConcern: {level: "snapshot"},
- txnNumber: NumberLong(txnNumber),
- startTransaction: true,
- stmtId: NumberInt(stmtId++),
- autocommit: false
-}));
+
+// TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// retryOnceOnTransientOnMongos can be removed
+retryOnceOnTransientOnMongos(session, () => {
+ assert.commandWorked(sessionDb.runCommand({
+ insert: collName,
+ documents: [{_id: "abort-txn-1"}],
+ readConcern: {level: "snapshot"},
+ txnNumber: NumberLong(++txnNumber),
+ startTransaction: true,
+ stmtId: NumberInt(stmtId++),
+ autocommit: false
+ }));
+});
+
assert.commandWorked(sessionDb.adminCommand({
abortTransaction: 1,
writeConcern: {w: "majority"},
diff --git a/jstests/core/txns/list_collections_not_blocked_by_txn.js b/jstests/core/txns/list_collections_not_blocked_by_txn.js
index faf095129d2..fb9cf6340e4 100644
--- a/jstests/core/txns/list_collections_not_blocked_by_txn.js
+++ b/jstests/core/txns/list_collections_not_blocked_by_txn.js
@@ -3,6 +3,11 @@
// as a result of taking MODE_S locks that are incompatible with MODE_IX needed for writes.
(function() {
"use strict";
+
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For withTxnAndAutoRetryOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
var dbName = 'list_collections_not_blocked';
var mydb = db.getSiblingDB(dbName);
var session = db.getMongo().startSession({causalConsistency: false});
@@ -21,23 +26,26 @@ if (isMongos) {
assert.commandWorked(sessionDb.runCommand({listCollections: 1, nameOnly: true}));
}
-session.startTransaction({readConcern: {level: "snapshot"}});
-
-assert.commandWorked(sessionDb.foo.insert({x: 1}));
-
-for (let nameOnly of [false, true]) {
- // Check that both the nameOnly and full versions of listCollections don't block.
- let res = mydb.runCommand({listCollections: 1, nameOnly, maxTimeMS: 20 * 1000});
- assert.commandWorked(res, "listCollections should have succeeded and not timed out");
- let collObj = res.cursor.firstBatch[0];
- // collObj should only have name and type fields.
- assert.eq('foo', collObj.name);
- assert.eq('collection', collObj.type);
- assert(collObj.hasOwnProperty("idIndex") == !nameOnly, tojson(collObj));
- assert(collObj.hasOwnProperty("options") == !nameOnly, tojson(collObj));
- assert(collObj.hasOwnProperty("info") == !nameOnly, tojson(collObj));
-}
+// TODO (SERVER-39704): We use the withTxnAndAutoRetryOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// withTxnAndAutoRetryOnMongos function can be removed
+withTxnAndAutoRetryOnMongos(session, () => {
+ assert.commandWorked(sessionDb.foo.insert({x: 1}));
+
+ for (let nameOnly of [false, true]) {
+ // Check that both the nameOnly and full versions of listCollections don't block.
+ let res = mydb.runCommand({listCollections: 1, nameOnly, maxTimeMS: 20 * 1000});
+ assert.commandWorked(res, "listCollections should have succeeded and not timed out");
+ let collObj = res.cursor.firstBatch[0];
+ // collObj should only have name and type fields.
+ assert.eq('foo', collObj.name);
+ assert.eq('collection', collObj.type);
+ assert(collObj.hasOwnProperty("idIndex") == !nameOnly, tojson(collObj));
+ assert(collObj.hasOwnProperty("options") == !nameOnly, tojson(collObj));
+ assert(collObj.hasOwnProperty("info") == !nameOnly, tojson(collObj));
+ }
+}, {readConcern: {level: "snapshot"}});
-assert.commandWorked(session.commitTransaction_forTesting());
session.endSession();
}());
diff --git a/jstests/core/txns/multi_statement_transaction_abort.js b/jstests/core/txns/multi_statement_transaction_abort.js
index a7946af8eda..7a8cf0b85df 100644
--- a/jstests/core/txns/multi_statement_transaction_abort.js
+++ b/jstests/core/txns/multi_statement_transaction_abort.js
@@ -3,6 +3,10 @@
(function() {
"use strict";
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For retryOnceOnTransientOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName = "test";
const collName = "multi_statement_transaction_abort";
const testDB = db.getSiblingDB(dbName);
@@ -22,14 +26,21 @@ const sessionDb = session.getDatabase(dbName);
jsTest.log("Insert two documents in a transaction and abort");
// Insert a doc within the transaction.
-assert.commandWorked(sessionDb.runCommand({
- insert: collName,
- documents: [{_id: "insert-1"}],
- readConcern: {level: "snapshot"},
- txnNumber: NumberLong(txnNumber),
- startTransaction: true,
- autocommit: false
-}));
+
+// TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// retryOnceOnTransientOnMongos can be removed
+retryOnceOnTransientOnMongos(session, () => {
+ assert.commandWorked(sessionDb.runCommand({
+ insert: collName,
+ documents: [{_id: "insert-1"}],
+ readConcern: {level: "snapshot"},
+ txnNumber: NumberLong(++txnNumber),
+ startTransaction: true,
+ autocommit: false
+ }));
+});
// Insert a doc within a transaction.
assert.commandWorked(sessionDb.runCommand({
diff --git a/jstests/core/txns/multi_statement_transaction_using_api.js b/jstests/core/txns/multi_statement_transaction_using_api.js
index 910fa45c68b..f1b961f9fdc 100644
--- a/jstests/core/txns/multi_statement_transaction_using_api.js
+++ b/jstests/core/txns/multi_statement_transaction_using_api.js
@@ -3,6 +3,10 @@
(function() {
"use strict";
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For withTxnAndAutoRetryOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName = "test";
const collName = "multi_transaction_test_using_api";
const testDB = db.getSiblingDB(dbName);
@@ -39,37 +43,40 @@ session.startTransaction({readConcern: {level: "snapshot"}, writeConcern: {w: "m
assert.commandWorked(session.commitTransaction_forTesting());
jsTestLog("Run CRUD ops, read ops, and commit transaction.");
-session.startTransaction({readConcern: {level: "snapshot"}, writeConcern: {w: "majority"}});
-// Performing a read first should work when snapshot readConcern is specified.
-assert.docEq(null, sessionColl.findOne({_id: "insert-1"}));
+// TODO (SERVER-39704): We use the withTxnAndAutoRetryOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// withTxnAndAutoRetryOnMongos function can be removed
+withTxnAndAutoRetryOnMongos(session, () => {
+ // Performing a read first should work when snapshot readConcern is specified.
+ assert.docEq(null, sessionColl.findOne({_id: "insert-1"}));
-assert.commandWorked(sessionColl.insert({_id: "insert-1", a: 0}));
+ assert.commandWorked(sessionColl.insert({_id: "insert-1", a: 0}));
-assert.commandWorked(sessionColl.insert({_id: "insert-2", a: 0}));
+ assert.commandWorked(sessionColl.insert({_id: "insert-2", a: 0}));
-assert.commandWorked(sessionColl.insert({_id: "insert-3", a: 0}));
+ assert.commandWorked(sessionColl.insert({_id: "insert-3", a: 0}));
-assert.commandWorked(sessionColl.update({_id: "insert-1"}, {$inc: {a: 1}}));
+ assert.commandWorked(sessionColl.update({_id: "insert-1"}, {$inc: {a: 1}}));
-assert.commandWorked(sessionColl.deleteOne({_id: "insert-2"}));
+ assert.commandWorked(sessionColl.deleteOne({_id: "insert-2"}));
-sessionColl.findAndModify({query: {_id: "insert-3"}, update: {$set: {a: 2}}});
+ sessionColl.findAndModify({query: {_id: "insert-3"}, update: {$set: {a: 2}}});
-// Try to find a document within a transaction.
-let cursor = sessionColl.find({_id: "insert-1"});
-assert.docEq({_id: "insert-1", a: 1}, cursor.next());
-assert(!cursor.hasNext());
+ // Try to find a document within a transaction.
+ let cursor = sessionColl.find({_id: "insert-1"});
+ assert.docEq({_id: "insert-1", a: 1}, cursor.next());
+ assert(!cursor.hasNext());
-// Try to find a document using findOne within a transaction
-assert.eq({_id: "insert-1", a: 1}, sessionColl.findOne({_id: "insert-1"}));
+ // Try to find a document using findOne within a transaction
+ assert.eq({_id: "insert-1", a: 1}, sessionColl.findOne({_id: "insert-1"}));
-// Find a document with the aggregation shell helper within a transaction.
-cursor = sessionColl.aggregate({$match: {_id: "insert-1"}});
-assert.docEq({_id: "insert-1", a: 1}, cursor.next());
-assert(!cursor.hasNext());
-
-assert.commandWorked(session.commitTransaction_forTesting());
+ // Find a document with the aggregation shell helper within a transaction.
+ cursor = sessionColl.aggregate({$match: {_id: "insert-1"}});
+ assert.docEq({_id: "insert-1", a: 1}, cursor.next());
+ assert(!cursor.hasNext());
+}, {readConcern: {level: "snapshot"}, writeConcern: {w: "majority"}});
// Make sure the correct documents exist after committing the transaciton.
assert.eq({_id: "insert-1", a: 1}, sessionColl.findOne({_id: "insert-1"}));
diff --git a/jstests/core/txns/no_read_or_write_concern_inside_txn.js b/jstests/core/txns/no_read_or_write_concern_inside_txn.js
index b8333eed92f..165a0c99fb1 100644
--- a/jstests/core/txns/no_read_or_write_concern_inside_txn.js
+++ b/jstests/core/txns/no_read_or_write_concern_inside_txn.js
@@ -7,6 +7,11 @@
(function() {
"use strict";
+
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For retryOnceOnTransientOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName = "test";
const collName = "no_read_or_write_concerns_inside_txn";
const testDB = db.getSiblingDB(dbName);
@@ -27,15 +32,22 @@ let txnNumber = 0;
let stmtId = 0;
jsTestLog("Starting first transaction");
-assert.commandWorked(sessionDb.runCommand({
- insert: collName,
- documents: [{_id: 0}],
- readConcern: {level: "snapshot"},
- startTransaction: true,
- autocommit: false,
- txnNumber: NumberLong(txnNumber),
- stmtId: NumberInt(stmtId++)
-}));
+// Insert a doc within the transaction.
+// TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// retryOnceOnTransientOnMongos can be removed
+retryOnceOnTransientOnMongos(session, () => {
+ assert.commandWorked(sessionDb.runCommand({
+ insert: collName,
+ documents: [{_id: 0}],
+ readConcern: {level: "snapshot"},
+ startTransaction: true,
+ autocommit: false,
+ txnNumber: NumberLong(++txnNumber),
+ stmtId: NumberInt(stmtId++)
+ }));
+});
jsTestLog("Attempting to insert with readConcern: snapshot within a transaction.");
assert.commandFailedWithCode(sessionDb.runCommand({
diff --git a/jstests/core/txns/non_transactional_operations_on_session_with_transaction.js b/jstests/core/txns/non_transactional_operations_on_session_with_transaction.js
index 8fb9b6b5a3e..d3c26b49de8 100644
--- a/jstests/core/txns/non_transactional_operations_on_session_with_transaction.js
+++ b/jstests/core/txns/non_transactional_operations_on_session_with_transaction.js
@@ -9,6 +9,10 @@
(function() {
"use strict";
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For retryOnceOnTransientOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName = "test";
const collName = "non_transactional_operations_on_session_with_transactions";
@@ -51,14 +55,20 @@ const doc2 = {
};
// Insert a document in a transaction.
-assert.commandWorked(sessionDb.runCommand({
- insert: collName,
- documents: [doc1],
- readConcern: {level: "snapshot"},
- txnNumber: NumberLong(txnNumber),
- startTransaction: true,
- autocommit: false
-}));
+// TODO (SERVER-39704): We use the retryOnceOnTransientOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// retryOnceOnTransientOnMongos can be removed
+retryOnceOnTransientOnMongos(session, () => {
+ assert.commandWorked(sessionDb.runCommand({
+ insert: collName,
+ documents: [doc1],
+ readConcern: {level: "snapshot"},
+ txnNumber: NumberLong(++txnNumber),
+ startTransaction: true,
+ autocommit: false
+ }));
+});
// Test that we cannot observe the insert outside of the transaction.
assert.eq(null, testColl.findOne(doc1));
diff --git a/jstests/core/txns/repeatable_reads_in_transaction.js b/jstests/core/txns/repeatable_reads_in_transaction.js
index 870a1d58e6f..fbfb34ed1de 100644
--- a/jstests/core/txns/repeatable_reads_in_transaction.js
+++ b/jstests/core/txns/repeatable_reads_in_transaction.js
@@ -4,6 +4,10 @@
(function() {
"use strict";
+// TODO (SERVER-39704): Remove the following load after SERVER-397074 is completed
+// For retryOnceOnTransientAndRestartTxnOnMongos.
+load('jstests/libs/auto_retry_transaction_in_sharding.js');
+
const dbName = "test";
const collName = "repeatable_reads_in_transaction";
const testDB = db.getSiblingDB(dbName);
@@ -24,6 +28,9 @@ const sessionColl = sessionDb.getCollection(collName);
const session2 = testDB.getMongo().startSession(sessionOptions);
const session2Db = session2.getDatabase(dbName);
const session2Coll = session2Db.getCollection(collName);
+const txnOptions = {
+ writeConcern: {w: "majority"}
+};
jsTest.log("Prepopulate the collection.");
assert.commandWorked(
@@ -35,14 +42,21 @@ assert.commandWorked(
const expectedDocs = [{_id: 0}, {_id: 1}, {_id: 2}];
jsTestLog("Start a read-only transaction on the first session.");
-session.startTransaction({writeConcern: {w: "majority"}});
+session.startTransaction(txnOptions);
assert.sameMembers(expectedDocs, sessionColl.find().toArray());
jsTestLog("Start a transaction on the second session that modifies the same collection.");
session2.startTransaction({readConcern: {level: "snapshot"}, writeConcern: {w: "majority"}});
-assert.commandWorked(session2Coll.insert({_id: 3}));
+// TODO (SERVER-39704): We use the retryOnceOnTransientAndRestartTxnOnMongos
+// function to handle how MongoS will propagate a StaleShardVersion error as a
+// TransientTransactionError. After SERVER-39704 is completed the
+// retryOnceOnTransientAndRestartTxnOnMongos can be removed
+retryOnceOnTransientAndRestartTxnOnMongos(session2, () => {
+ assert.commandWorked(session2Coll.insert({_id: 3}));
+}, txnOptions);
+
assert.commandWorked(session2Coll.update({_id: 1}, {$set: {a: 1}}));
assert.commandWorked(session2Coll.deleteOne({_id: 2}));
diff --git a/jstests/libs/auto_retry_transaction_in_sharding.js b/jstests/libs/auto_retry_transaction_in_sharding.js
new file mode 100644
index 00000000000..05f1f5c9374
--- /dev/null
+++ b/jstests/libs/auto_retry_transaction_in_sharding.js
@@ -0,0 +1,85 @@
+'use strict';
+
+load('jstests/concurrency/fsm_workload_helpers/auto_retry_transaction.js');
+Random.setRandomSeed();
+
+var {
+ withTxnAndAutoRetryOnMongos,
+ retryOnceOnTransientOnMongos,
+ retryOnceOnTransientAndRestartTxnOnMongos
+} = (() => {
+ /**
+ * Runs 'func' inside of a transaction started with 'txnOptions', and automatically retries
+ * until it either succeeds or the server returns a non-TransientTransactionError error
+ * response.
+ *
+ * The caller should take care to ensure 'func' doesn't modify any captured variables in a
+ * speculative fashion where calling it multiple times would lead to unintended behavior. The
+ * transaction started by the withTxnAndAutoRetryOnMongos() function is only known to have
+ * committed after the withTxnAndAutoRetryOnMongos() function returns.
+ *
+ * This behaviour only applies if the client is a mongos
+ */
+ function withTxnAndAutoRetryOnMongos(session, func, txnOptions) {
+ if (session.getClient().isMongos()) {
+ withTxnAndAutoRetry(session, func, {txnOptions});
+ } else {
+ session.startTransaction(txnOptions);
+ func();
+ assert.commandWorked(session.commitTransaction_forTesting());
+ }
+ }
+
+ /**
+ * Runs 'func' and retries it only once if a transient error occurred.
+ *
+ * This behaviour only applies if the client is a mongos
+ */
+ function retryOnceOnTransientOnMongos(session, func) {
+ if (session.getClient().isMongos()) {
+ try {
+ func();
+ } catch (e) {
+ if ((e.hasOwnProperty('errorLabels') &&
+ e.errorLabels.includes('TransientTransactionError'))) {
+ func();
+ } else {
+ throw e;
+ }
+ }
+ } else {
+ func();
+ }
+ }
+
+ /**
+ * Runs 'func' and retries it only once restarting the transaction if a transient
+ * error occurred.
+ *
+ * This behaviour only applies if the client is a mongos
+ */
+ function retryOnceOnTransientAndRestartTxnOnMongos(session, func, txnOptions) {
+ if (session.getClient().isMongos()) {
+ try {
+ func();
+ } catch (e) {
+ if ((e.hasOwnProperty('errorLabels') &&
+ e.errorLabels.includes('TransientTransactionError'))) {
+ session.abortTransaction_forTesting();
+ session.startTransaction(txnOptions);
+ func();
+ } else {
+ throw e;
+ }
+ }
+ } else {
+ func();
+ }
+ }
+
+ return {
+ withTxnAndAutoRetryOnMongos,
+ retryOnceOnTransientOnMongos,
+ retryOnceOnTransientAndRestartTxnOnMongos
+ };
+})(); \ No newline at end of file
diff --git a/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js b/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js
deleted file mode 100644
index 36d08d02279..00000000000
--- a/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Tests that after a restart on a shard a multi-write operation will not be repeated
- *
- * This test requrires persistence because it asumes the shard will still have it's
- * data after restarting
- * Remove requires_fcv_44 tag if SERVER-44598 is backported or 4.4 becomes last stable
- * @tags: [requires_persistence, requires_fcv_44]
- */
-(function() {
-'use strict';
-var st = new ShardingTest({shards: 2, mongos: 1});
-
-st.enableSharding('TestDB');
-st.ensurePrimaryShard('TestDB', st.rs1.getURL());
-
-// Hash shard the collection so that the chunks are placed uniformly across the shards from the
-// beginning (because the collection is empty)
-st.shardCollection('TestDB.TestColl', {Key: 'hashed'}, false, {numInitialChunks: 120});
-// Helper function to restart a node and wait that the entire set is operational
-
-// Insert a document outside the shard key range so the update below will generate a scather-
-// gather write operation
-st.s0.getDB('TestDB').TestColl.insert({X: 'Key 0', inc: 0});
-
-// Restart shard 0, given the fact that the ssv flag is true, then
-// mongos should send the info only once
-
-st.restartShardRS(0);
-
-// Do a scather-gather write operation. One of the shards have been restarted
-// so StaleShardVersion should be returned, and the multi-write should be
-// executed only once per shard
-var bulkOp = st.s0.getDB('TestDB').TestColl.initializeUnorderedBulkOp();
-bulkOp.find({X: 'Key 0'}).update({$inc: {inc: 1}});
-bulkOp.execute();
-
-// Make sure inc have been incremented only 1 time
-assert.eq(1, st.s0.getDB('TestDB').TestColl.findOne({X: 'Key 0'}).inc);
-st.stop();
-})();
diff --git a/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js b/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js
new file mode 100644
index 00000000000..700d47990f8
--- /dev/null
+++ b/jstests/sharding/stale_mongos_and_restarted_shards_agree_on_shard_version.js
@@ -0,0 +1,95 @@
+/**
+ * Tests that after a restart on a shard multi write operations, finds and aggregations
+ * work as expected on stale routers
+ *
+ * This test requrires persistence because it asumes the shard will still have it's
+ * data after restarting
+ * TODO: Remove requires_fcv_44 tag if SERVER-32198 is backported or 4.4 becomes last
+ * @tags: [requires_persistence, requires_fcv_44]
+ */
+(function() {
+'use strict';
+// TODO (SERVER-32198) remove after SERVER-32198 is fixed
+TestData.skipCheckOrphans = true;
+
+var st = new ShardingTest({shards: 2, mongos: 2});
+
+// Used to get the shard destination ids for the moveChunks commands
+var shard0Name = st.shard0.shardName;
+var shard1Name = st.shard1.shardName;
+
+var database = 'TestDB';
+st.enableSharding(database);
+// Creates and shard collName with 2 chunks, one per shard. Only one router knows that collName
+// is sharded, and all the shards are restarted so they don't have the collection's sharding status
+function setupCollectionForTest(collName) {
+ var ns = database + '.' + collName;
+ assert.commandFailedWithCode(st.s0.adminCommand({getShardVersion: ns}),
+ ErrorCodes.NamespaceNotSharded);
+ assert.commandFailedWithCode(st.s1.adminCommand({getShardVersion: ns}),
+ ErrorCodes.NamespaceNotSharded);
+ st.shardCollection(ns, {Key: 1});
+
+ st.s0.adminCommand({split: ns, middle: {Key: 0}});
+ st.s0.adminCommand({moveChunk: ns, find: {Key: -1}, to: shard0Name});
+ st.s0.adminCommand({moveChunk: ns, find: {Key: 0}, to: shard1Name});
+ assert.commandFailedWithCode(st.s1.adminCommand({getShardVersion: ns}),
+ ErrorCodes.NamespaceNotSharded);
+
+ // This document will go to shard 0
+ assert.commandWorked(st.s0.getDB(database).getCollection(collName).insert({Key: -1, inc: 0}));
+ // This document will go to shard 1
+ assert.commandWorked(st.s0.getDB(database).getCollection(collName).insert({Key: 0, inc: 0}));
+ st.restartShardRS(0);
+ st.restartShardRS(1);
+}
+
+// Test a multi insert with collection unknown on a stale mongos
+setupCollectionForTest('TestColl');
+
+var bulkOp = st.s1.getDB('TestDB').TestColl.initializeUnorderedBulkOp();
+bulkOp.insert({Key: -2});
+bulkOp.insert({Key: 1});
+bulkOp.execute();
+
+assert.neq(4, st.s0.getDB('TestDB').TestColl.find().itcount());
+// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq
+assert.neq(4, st.s1.getDB('TestDB').TestColl.find().itcount());
+
+// Test multi update with collection unknown on a stale mongos
+setupCollectionForTest('TestUpdateColl');
+
+var updateBulkOp = st.s1.getDB('TestDB').TestUpdateColl.initializeUnorderedBulkOp();
+updateBulkOp.find({}).update({$inc: {inc: 1}});
+updateBulkOp.execute();
+var s0Doc = st.s0.getDB('TestDB').TestUpdateColl.findOne({Key: -1});
+// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq
+assert.neq(1, s0Doc.inc);
+var s1Doc = st.s0.getDB('TestDB').TestUpdateColl.findOne({Key: 0});
+assert.eq(1, s1Doc.inc);
+
+// Test multi remove with collection unknown on a stale mongos
+setupCollectionForTest('TestRemoveColl');
+
+var removeBulkOp = st.s1.getDB('TestDB').TestRemoveColl.initializeUnorderedBulkOp();
+removeBulkOp.find({}).remove({});
+removeBulkOp.execute();
+// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq
+assert.neq(0, st.s0.getDB('TestDB').TestRemoveColl.find().itcount());
+
+// Test find with collection unknown on a stale mongos
+setupCollectionForTest('TestFindColl');
+
+var coll = st.s1.getDB('TestDB').TestFindColl.find().toArray();
+// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq
+assert.neq(2, coll.length);
+
+// Test aggregate with collection unknown on a stale mongos
+setupCollectionForTest('TestAggregateColl');
+
+var count = st.s1.getDB('TestDB').TestAggregateColl.aggregate([{$count: 'total'}]).toArray();
+// TODO (SERVER-32198): After SERVER-32198 is fixed and backported change neq to eq
+assert.neq(2, count[0].total);
+
+st.stop();
+})();
diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp
index 4d91e633550..becd6afd89c 100644
--- a/src/mongo/db/s/collection_sharding_runtime.cpp
+++ b/src/mongo/db/s/collection_sharding_runtime.cpp
@@ -123,11 +123,21 @@ CollectionShardingRuntime* CollectionShardingRuntime::get_UNSAFE(ServiceContext*
}
ScopedCollectionFilter CollectionShardingRuntime::getOwnershipFilter(OperationContext* opCtx) {
+ const auto optReceivedShardVersion = getOperationReceivedVersion(opCtx, _nss);
+ if (!optReceivedShardVersion)
+ return {kUnshardedCollection};
+
const auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime();
auto optMetadata = _getMetadataWithVersionCheckAt(opCtx, atClusterTime);
- if (!optMetadata)
- return {kUnshardedCollection};
+ uassert(StaleConfigInfo(_nss,
+ *optReceivedShardVersion,
+ getCurrentShardVersionIfKnown(),
+ ShardingState::get(opCtx)->shardId()),
+ str::stream() << "sharding status of collection " << _nss.ns()
+ << " is not currently available for filtering and needs to be recovered "
+ << "from the config server",
+ optMetadata);
return {std::move(*optMetadata)};
}
@@ -365,7 +375,9 @@ CollectionShardingRuntime::_getMetadataWithVersionCheckAt(
if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) {
uassert(std::move(sci),
- "no metadata found on multi-write operation, need to refresh",
+ str::stream()
+ << "sharding status of collection " << _nss.ns()
+ << " is not currently known and needs to be recovered from the config server",
!receivedShardVersion.getCanThrowSSVOnIgnored() ||
_getCurrentMetadataIfKnown(atClusterTime));
return boost::none;