summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml2
-rw-r--r--jstests/sharding/safe_secondary_reads_causal_consistency.js164
-rw-r--r--src/mongo/db/s/flush_routing_table_cache_updates_command.cpp2
-rw-r--r--src/mongo/db/s/migration_source_manager.cpp3
4 files changed, 170 insertions, 1 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index eeedcb03b2f..93dff6ef134 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -163,6 +163,8 @@ selector:
- jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js
# Requires behavior that does not and will never exist in 4.0.
- jstests/sharding/migration_retries_on_write_conflict_exceptions.js
+ # Enable after SERVER-48679 is backported and available on 4.0 binaries.
+ - jstests/sharding/safe_secondary_reads_causal_consistency.js
executor:
config:
shell_options:
diff --git a/jstests/sharding/safe_secondary_reads_causal_consistency.js b/jstests/sharding/safe_secondary_reads_causal_consistency.js
new file mode 100644
index 00000000000..3c1167fffe8
--- /dev/null
+++ b/jstests/sharding/safe_secondary_reads_causal_consistency.js
@@ -0,0 +1,164 @@
+/**
+ * Tests that donor shard's secondaries correctly
+ * block reads and refresh metadata cache when migration occurs.
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("./jstests/libs/chunk_manipulation_util.js");
+
+/**
+ * @summary This function executes a count command with read preference "secondary" and returns the
+ * command response
+ * @param {DBObject} db - The DB connection gotten from the shardingTest object that contains
+ * the collection the query will be run against.
+ * @param {String} collectionName - The string name of the collection the read query will be run
+ * against
+ * @param {Object} query the fields by which the read command should query by. For example {_id: 1,
+ * x: 4}, etc
+ * @param {Object} optionalParams - non-required additional parameters to include into the read
+ * query.
+ * * ^ Currently only supports readConcern and maxTimeMS
+ * @returns {Object} - returns the an object with the total number of documents that matched the
+ * query on property 'n'
+ */
+const runReadCmdWithReadPrefSecondary = (db, collectionName, query, {readConcern, maxTimeMS}) => {
+ return db.runCommand({
+ count: collectionName,
+ query,
+ $readPreference: {mode: "secondary"},
+ readConcern,
+ maxTimeMS
+ });
+};
+
+/**
+ * @summary This function does three things
+ * 1.) Enables sharding for the database,
+ * 2.) Shard the collection with the given namespace and uses _id as the shardkey,
+ * 3.) Splits the chunk on the sharded collection based of {_id} as the middle of the chunk split.
+ * @param {ShardingTest} shardingTest The instance of the initialized shardingTest
+ * @param {String} dbName The string name of the database in which sharding will be enabled. Also
+ * the DB that contains the collection that will be sharded
+ * @param {String} collectionNamespace The namespace (DB Name + Collection Name) of the collection
+ * that will be sharded. After sharding the namespace will be used for chunk splitting
+ * @param {String} primaryShardName - Used to ensure the desired shard becomes the primary shard.
+ */
+const setupShardedCollection = (shardingTest, dbName, collectionNamespace, primaryShardName) => {
+ assert.commandWorked(shardingTest.s.adminCommand({enableSharding: dbName}));
+
+ shardingTest.ensurePrimaryShard(dbName, primaryShardName);
+ assert.commandWorked(shardingTest.s.adminCommand({
+ shardCollection: collectionNamespace,
+ key: {_id: 1},
+ }));
+ assert.commandWorked(shardingTest.s.adminCommand({
+ split: collectionNamespace,
+ middle: {_id: 0},
+ }));
+};
+
+const dbName1 = "alpha";
+const dbName2 = "beta";
+
+const collName1 = "foo";
+const collName2 = "bar";
+
+const ns1 = `${dbName1}.${collName1}`;
+const ns2 = `${dbName2}.${collName2}`;
+
+const preCommitRefreshFailPointName = "hangBeforePostMigrationCommitRefresh";
+
+const st = new ShardingTest({
+ mongos: 2,
+ shards: 2,
+ rs: {nodes: [{}, {rsConfig: {priority: 0}}]},
+ causalConsistency: true,
+});
+
+const mongos0AlphaDB = st.s0.getDB(dbName1);
+const mongos1AlphaDB = st.s1.getDB(dbName1);
+const mongos1BetaDB = st.s1.getDB(dbName2);
+
+jsTest.log(`Sharding collection: ${ns1} and ${ns2}.`);
+setupShardedCollection(st, dbName1, ns1, st.shard0.shardName);
+setupShardedCollection(st, dbName2, ns2, st.shard1.shardName);
+
+// Starts a migration in a parallel shell to move the chunk [0, maxKey) in ns1 from shard0 to
+// shard1. Then it pauses the migration right before it enters the commit phase.
+
+jsTest.log("Starting migration.");
+
+const staticMongod = MongoRunner.runMongod({});
+const joinMoveChunk = moveChunkParallel(
+ staticMongod, st.s.host, {_id: 1}, null, ns1, st.shard1.shardName, true /** expectSuccess */
+);
+pauseMoveChunkAtStep(st.shard0, moveChunkStepNames.chunkDataCommitted);
+waitForMoveChunkStep(st.shard0, moveChunkStepNames.chunkDataCommitted);
+
+// Sends a versioned read through mongos1, the second router, to cause a refresh on the secondary of
+// the donor shard. Since it runs while the migration is paused on 'chunkDataCommitted' the primary
+// is only blocking writes and if the secondary isn't checking for writes being blocked when sending
+// 'flushRoutingTableCacheUpdates' the secondary will serve reads for stale mongoses.
+
+jsTest.log("Sending a read query to donor shard's secondary.");
+
+assert.commandFailedWithCode(
+ runReadCmdWithReadPrefSecondary(
+ mongos1AlphaDB, collName1, {_id: 1}, {maxTimeMS: 10000, readConcern: {level: "local"}}),
+ [ErrorCodes.MaxTimeMSExpired, ErrorCodes.StaleConfig]);
+
+// Allow the migration to commit and pause it before the donor shard's primary refreshes from the
+// config server.
+jsTest.log("Unpausing migration and enabling hangBeforePostMigrationCommitRefresh failpoint.");
+const hangBeforeRefreshFP = configureFailPoint(st.shard0, preCommitRefreshFailPointName);
+unpauseMoveChunkAtStep(st.shard0, moveChunkStepNames.chunkDataCommitted);
+hangBeforeRefreshFP.wait();
+
+// Write to the migrated ns1 chunk. Run flushRouterConfig on the mongos to force it to refresh from
+// the config server when it does the write since otherwise it will route the request shard0 which
+// will be blocked behind the critical section. This insert will be used to test causal consistency
+// in the later read.
+jsTest.log("Sending insert through mongos0.");
+assert.commandWorked(mongos0AlphaDB.adminCommand({flushRouterConfig: 1}));
+assert.commandWorked(mongos0AlphaDB.runCommand(
+ {insert: collName1, documents: [{_id: 2}], writeConcern: {w: 'majority'}}));
+
+// Bump the clusterTime of mongos1 to at least equal to the operationTime T for the
+// above write by writing to shard1. This is required for the afterClusterTime read
+// below to work since the 'afterCluterTime' of a command cannot be larger than the
+// current clusterTime of the mongod (i.e. shard0's secondary) that executes the
+// command. By bumping the clusterTime of mongos1, the clusterTime of shard0's
+// secondary will also get bumped to >= T due to clusterTime gossiping when we do
+// the afterClusterTime read.
+
+jsTest.log("Sending insert to other collection to bump cluster time.");
+assert.commandWorked(mongos1BetaDB.runCommand({
+ insert: collName2,
+ documents: [{_id: 1}],
+ writeConcern: {w: "majority"},
+}));
+assert.gt(bsonWoCompare(mongos1AlphaDB.getSession().getOperationTime(),
+ mongos0AlphaDB.getSession().getOperationTime()),
+ 0);
+
+// If the secondary doesn't wait behind the critical section like it should,
+// this read will find no matching documents because mongos1 will target
+// shard0 instead of shard1 (since mongos1's cache is stale).
+jsTest.log("Sending read query to secondary of shard0.");
+const totalDocsFound = runReadCmdWithReadPrefSecondary(mongos1AlphaDB, collName1, {_id: 2}, {
+ readConcern: {
+ afterClusterTime: mongos0AlphaDB.getSession().getOperationTime(),
+ },
+ }).n;
+assert.neq(totalDocsFound, 0);
+
+jsTest.log("Completing chunk migration.");
+hangBeforeRefreshFP.off();
+joinMoveChunk();
+
+MongoRunner.stopMongod(staticMongod);
+st.stop();
+})();
diff --git a/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp b/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
index f2791bf1fbd..413f6cdf2ea 100644
--- a/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
+++ b/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
@@ -123,7 +123,7 @@ public:
// consistency guarantee.
auto const css = CollectionShardingState::get(opCtx, ns());
auto criticalSectionSignal =
- css->getCriticalSectionSignal(ShardingMigrationCriticalSection::kRead);
+ css->getCriticalSectionSignal(ShardingMigrationCriticalSection::kWrite);
if (criticalSectionSignal) {
oss.setMigrationCriticalSectionSignal(criticalSectionSignal);
}
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index 4533ec35968..ab52ecb77fe 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -119,6 +119,7 @@ MONGO_FAIL_POINT_DEFINE(doNotRefreshRecipientAfterCommit);
MONGO_FAIL_POINT_DEFINE(failMigrationCommit);
MONGO_FAIL_POINT_DEFINE(hangBeforeLeavingCriticalSection);
MONGO_FAIL_POINT_DEFINE(migrationCommitNetworkError);
+MONGO_FAIL_POINT_DEFINE(hangBeforePostMigrationCommitRefresh);
MigrationSourceManager* MigrationSourceManager::get(CollectionShardingRuntime* csr,
CollectionShardingRuntime::CSRLock& csrLock) {
@@ -491,6 +492,8 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC
<< "metadata also failed"));
}
+ MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforePostMigrationCommitRefresh);
+
// Do a best effort attempt to incrementally refresh the metadata before leaving the critical
// section. It is okay if the refresh fails because that will cause the metadata to be cleared
// and subsequent callers will try to do a full refresh.