diff options
author | Randolph Tan <randolph@10gen.com> | 2016-01-05 15:42:01 -0500 |
---|---|---|
committer | Ramon Fernandez <rfmnyc@gmail.com> | 2016-01-29 08:33:18 -0500 |
commit | e9e372e1e2e48c1420c11af63f13b5ec227b4e8c (patch) | |
tree | c9144d504bb331cceaa6a14185300e73fec96b0c | |
parent | e2835f1cbfcd47108bc1c077d886246c96dd5efe (diff) | |
download | mongo-e9e372e1e2e48c1420c11af63f13b5ec227b4e8c.tar.gz |
SERVER-18671 SecondaryPreferred can end up using unversioned connections
(cherry picked from commit 1d611a8c7ee346929a4186f524c21007ef7a279d)
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_legacy_multiversion.yml | 1 | ||||
-rw-r--r-- | jstests/sharding/all_config_hosts_down.js | 1 | ||||
-rw-r--r-- | jstests/sharding/secondary_query_routing.js | 38 | ||||
-rw-r--r-- | src/mongo/client/dbclient_rs.cpp | 2 | ||||
-rw-r--r-- | src/mongo/client/parallel.cpp | 87 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor.cpp | 12 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor.h | 5 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor_test.cpp | 13 |
8 files changed, 111 insertions, 48 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_legacy_multiversion.yml b/buildscripts/resmokeconfig/suites/sharding_legacy_multiversion.yml index f821f0f23fb..02dcc8e5217 100644 --- a/buildscripts/resmokeconfig/suites/sharding_legacy_multiversion.yml +++ b/buildscripts/resmokeconfig/suites/sharding_legacy_multiversion.yml @@ -47,6 +47,7 @@ selector: - jstests/sharding/zero_shard_version.js # SERVER-20530, fixed in v3.1.9 - jstests/sharding/drop_sharded_db.js # SERVER-17723, fixed in v3.1.2 - jstests/sharding/implicit_db_creation.js # SERVER-17723, fixed in v3.1.2 + - jstests/sharding/secondary_query_routing.js # SERVER-18671, fixed in v3.3.0 # The following tests fail because of known bugs: - jstests/sharding/sync_cluster_config/sync6.js # SERVER-21660 diff --git a/jstests/sharding/all_config_hosts_down.js b/jstests/sharding/all_config_hosts_down.js index 1834b1cf6ad..0ca261bcf08 100644 --- a/jstests/sharding/all_config_hosts_down.js +++ b/jstests/sharding/all_config_hosts_down.js @@ -34,7 +34,6 @@ for( var i = 0; i < 2; i++ ){ assert(e.code == 8002 || // SCCC config down, for v3.0 compatibility. e.code == 10276 || // Transport error e.code == 13328 || // Connect error - e.code == 13639 || // Connect error to replSet primary e.code == ErrorCodes.HostUnreachable || e.code == ErrorCodes.FailedToSatisfyReadPreference || e.code == ErrorCodes.ReplicaSetNotFound); diff --git a/jstests/sharding/secondary_query_routing.js b/jstests/sharding/secondary_query_routing.js new file mode 100644 index 00000000000..8b9649a23ad --- /dev/null +++ b/jstests/sharding/secondary_query_routing.js @@ -0,0 +1,38 @@ +/** + * Test that queries can be routed to the right secondaries that are caught + * up with the primary. + */ +(function() { + +var rsOpts = { nodes: 2 }; +var st = new ShardingTest({ mongos: 2, shards: { rs0: rsOpts, rs1: rsOpts }}); + +st.s0.adminCommand({ enableSharding: 'test' }); + +st.ensurePrimaryShard('test', 'test-rs0'); +st.s0.adminCommand({ shardCollection: 'test.user', key: { x: 1 }}); +st.s0.adminCommand({ split: 'test.user', middle: { x: 0 }}); + +st.s1.setReadPref('secondary'); +var testDB = st.s1.getDB('test'); +// This establishes the shard version Mongos #1's view. +testDB.user.insert({ x: 1 }); + +// Mongos #0 bumps up the version without Mongos #1 knowledge. +// Note: moveChunk has implicit { w: 2 } write concern. +st.s0.adminCommand({ moveChunk: 'test.user', + find: { x: 0 }, + to: 'test-rs1', + _waitForDelete: true }); + +// Clear all the connections to make sure that Mongos #1 will attempt to establish +// the shard version. +assert.commandWorked(testDB.adminCommand({ connPoolSync: 1 })); + +// Mongos #1 performs a query to the secondary. +var res = testDB.runReadCommand({ count: 'user', query: { x: 1 }}); +assert(res.ok); +assert.eq(1, res.n, tojson(res)); + +st.stop(); +})(); diff --git a/src/mongo/client/dbclient_rs.cpp b/src/mongo/client/dbclient_rs.cpp index d392ba214df..5c1af64d8fd 100644 --- a/src/mongo/client/dbclient_rs.cpp +++ b/src/mongo/client/dbclient_rs.cpp @@ -310,7 +310,7 @@ DBClientConnection* DBClientReplicaSet::checkMaster() { if (newConn == NULL || !errmsg.empty()) { monitor->failedHost(_masterHost); - uasserted(13639, + uasserted(ErrorCodes::FailedToSatisfyReadPreference, str::stream() << "can't connect to new replica set master [" << _masterHost.toString() << "]" << (errmsg.empty() ? "" : ", err: ") << errmsg); diff --git a/src/mongo/client/parallel.cpp b/src/mongo/client/parallel.cpp index 1468b1aa332..661156340bc 100644 --- a/src/mongo/client/parallel.cpp +++ b/src/mongo/client/parallel.cpp @@ -394,60 +394,55 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk(OperationContext* const DBClientBase* rawConn = state->conn->getRawConn(); bool allowShardVersionFailure = rawConn->type() == ConnectionString::SET && DBClientReplicaSet::isSecondaryQuery(_qSpec.ns(), _qSpec.query(), _qSpec.options()); - bool connIsDown = rawConn->isFailed(); - if (allowShardVersionFailure && !connIsDown) { - // If the replica set connection believes that it has a valid primary that is up, - // confirm that the replica set monitor agrees that the suspected primary is indeed up. + + // Skip shard version checking if primary is known to be down. + if (allowShardVersionFailure) { const DBClientReplicaSet* replConn = dynamic_cast<const DBClientReplicaSet*>(rawConn); invariant(replConn); ReplicaSetMonitorPtr rsMonitor = ReplicaSetMonitor::get(replConn->getSetName()); - if (!rsMonitor->isHostUp(replConn->getSuspectedPrimaryHostAndPort())) { - connIsDown = true; + if (!rsMonitor->isKnownToHaveGoodPrimary()) { + state->conn->donotCheckVersion(); + + // A side effect of this short circuiting is the mongos will not be able figure out + // that the primary is now up on it's own and has to rely on other threads to refresh + // node states. + + OCCASIONALLY { + const DBClientReplicaSet* repl = dynamic_cast<const DBClientReplicaSet*>(rawConn); + dassert(repl); + warning() << "Primary for " << repl->getServerAddress() + << " was down before, bypassing setShardVersion." + << " The local replica set view and targeting may be stale."; + } + + return; } } - if (allowShardVersionFailure && connIsDown) { - // If we're doing a secondary-allowed query and the primary is down, don't attempt to - // set the shard version. - - state->conn->donotCheckVersion(); - - // A side effect of this short circuiting is the mongos will not be able figure out that - // the primary is now up on it's own and has to rely on other threads to refresh node - // states. - - OCCASIONALLY { - const DBClientReplicaSet* repl = dynamic_cast<const DBClientReplicaSet*>(rawConn); - dassert(repl); - warning() << "Primary for " << repl->getServerAddress() - << " was down before, bypassing setShardVersion." - << " The local replica set view and targeting may be stale."; + try { + if (state->conn->setVersion()) { + LOG(pc) << "needed to set remote version on connection to value " + << "compatible with " << vinfo; } - } else { - try { - if (state->conn->setVersion()) { - // It's actually okay if we set the version here, since either the - // manager will be verified as compatible, or if the manager doesn't - // exist, we don't care about version consistency - LOG(pc) << "needed to set remote version on connection to value " - << "compatible with " << vinfo; - } - } catch (const DBException&) { - if (allowShardVersionFailure) { - // It's okay if we don't set the version when talking to a secondary, we can - // be stale in any case. - - OCCASIONALLY { - const DBClientReplicaSet* repl = - dynamic_cast<const DBClientReplicaSet*>(state->conn->getRawConn()); - dassert(repl); - warning() << "Cannot contact primary for " << repl->getServerAddress() - << " to check shard version." - << " The local replica set view and targeting may be stale."; - } - } else { - throw; + } catch (const DBException& dbExcep) { + auto errCode = dbExcep.getCode(); + if (allowShardVersionFailure && + (ErrorCodes::isNotMasterError(ErrorCodes::fromInt(errCode)) || + errCode == ErrorCodes::FailedToSatisfyReadPreference || + errCode == 9001)) { // socket exception + // It's okay if we don't set the version when talking to a secondary, we can + // be stale in any case. + + OCCASIONALLY { + const DBClientReplicaSet* repl = + dynamic_cast<const DBClientReplicaSet*>(state->conn->getRawConn()); + dassert(repl); + warning() << "Cannot contact primary for " << repl->getServerAddress() + << " to check shard version." + << " The local replica set view and targeting may be stale."; } + } else { + throw; } } } diff --git a/src/mongo/client/replica_set_monitor.cpp b/src/mongo/client/replica_set_monitor.cpp index 4b79a494b22..5f6d09b2702 100644 --- a/src/mongo/client/replica_set_monitor.cpp +++ b/src/mongo/client/replica_set_monitor.cpp @@ -459,6 +459,18 @@ void ReplicaSetMonitor::cleanup() { globalRSMonitorManager.removeAllMonitors(); } +bool ReplicaSetMonitor::isKnownToHaveGoodPrimary() const { + stdx::lock_guard<stdx::mutex> lk(_state->mutex); + + for (const auto& node : _state->nodes) { + if (node.isMaster) { + return true; + } + } + + return false; +} + Refresher::Refresher(const SetStatePtr& setState) : _set(setState), _scan(setState->currentScan), _startedNewScan(false) { if (_scan) diff --git a/src/mongo/client/replica_set_monitor.h b/src/mongo/client/replica_set_monitor.h index e4ea65b5937..10f9558d6a2 100644 --- a/src/mongo/client/replica_set_monitor.h +++ b/src/mongo/client/replica_set_monitor.h @@ -155,6 +155,11 @@ public: void appendInfo(BSONObjBuilder& b) const; /** + * Returns true if the monitor knows a usable primary from it's interal view. + */ + bool isKnownToHaveGoodPrimary() const; + + /** * Creates a new ReplicaSetMonitor, if it doesn't already exist. */ static void createIfNeeded(const std::string& name, const std::set<HostAndPort>& servers); diff --git a/src/mongo/client/replica_set_monitor_test.cpp b/src/mongo/client/replica_set_monitor_test.cpp index ebce86ee3fc..7f29de2666f 100644 --- a/src/mongo/client/replica_set_monitor_test.cpp +++ b/src/mongo/client/replica_set_monitor_test.cpp @@ -955,4 +955,17 @@ TEST(ReplicaSetMonitorTests, StalePrimaryWithObsoleteElectionId) { ASSERT(ns.host.empty()); } +TEST(ReplicaSetMonitor, NoPrimaryUpCheck) { + SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet); + ReplicaSetMonitor rsm(state); + ASSERT_FALSE(rsm.isKnownToHaveGoodPrimary()); +} + +TEST(ReplicaSetMonitor, PrimaryIsUpCheck) { + SetStatePtr state = std::make_shared<SetState>("name", basicSeedsSet); + state->nodes.front().isMaster = true; + ReplicaSetMonitor rsm(state); + ASSERT_TRUE(rsm.isKnownToHaveGoodPrimary()); +} + } // namespace |