summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRandolph Tan <randolph@10gen.com>2015-04-29 13:48:17 -0400
committerRandolph Tan <randolph@10gen.com>2015-05-29 09:44:55 -0400
commitdd81a3bec958d1d78913630fd4a3523170fd23db (patch)
tree11eb0547a53f611546b7a67054505b241748f365
parent2b0ff7c06a46301fd87c794a1eb2df90d9767ad9 (diff)
downloadmongo-dd81a3bec958d1d78913630fd4a3523170fd23db.tar.gz
SERVER-18246 getmore on secondary in recovery mode can crash mongos
(cherry picked from commit 5fed2241cb67c358f77fd0ff20b2afbcefa55ec1) Conflicts: src/mongo/client/dbclient.cpp src/mongo/db/repl/sync_source_feedback.cpp
-rw-r--r--src/mongo/client/dbclient.cpp53
-rw-r--r--src/mongo/client/dbclient_rs.cpp4
-rw-r--r--src/mongo/client/dbclientinterface.h32
-rw-r--r--src/mongo/db/repl/oplogreader.cpp1
-rw-r--r--src/mongo/db/repl/scoped_conn.cpp1
-rw-r--r--src/mongo/db/repl/scoped_conn.h2
-rw-r--r--src/mongo/db/repl/sync_source_feedback.cpp2
-rw-r--r--src/mongo/dbtests/perftests.cpp2
8 files changed, 60 insertions, 37 deletions
diff --git a/src/mongo/client/dbclient.cpp b/src/mongo/client/dbclient.cpp
index 5f5fd47ffa6..26ae163d9e1 100644
--- a/src/mongo/client/dbclient.cpp
+++ b/src/mongo/client/dbclient.cpp
@@ -34,8 +34,9 @@
#include "mongo/bson/util/bson_extract.h"
#include "mongo/bson/util/builder.h"
#include "mongo/client/constants.h"
-#include "mongo/client/dbclient_rs.h"
#include "mongo/client/dbclientcursor.h"
+#include "mongo/client/dbclient_rs.h"
+#include "mongo/client/replica_set_monitor.h"
#include "mongo/client/sasl_client_authenticate.h"
#include "mongo/client/syncclusterconnection.h"
#include "mongo/db/auth/internal_user_auth.h"
@@ -1094,15 +1095,14 @@ namespace mongo {
int options) {
if (DBClientWithCommands::runCommand(dbname, cmd, info, options))
return true;
-
- if ( clientSet && isNotMasterErrorString( info["errmsg"] ) ) {
- clientSet->isntMaster();
+
+ if (!_parentReplSetName.empty()) {
+ handleNotMasterResponse(info["errmsg"]);
}
return false;
}
-
void DBClientConnection::_checkConnection() {
if ( !_failed )
return;
@@ -1212,10 +1212,6 @@ namespace mongo {
return n;
}
- void DBClientConnection::setReplSetClientCallback(DBClientReplicaSet* rsClient) {
- clientSet = rsClient;
- }
-
unsigned long long DBClientConnection::query(
stdx::function<void(DBClientCursorBatchIterator &)> f,
const string& ns,
@@ -1556,6 +1552,14 @@ namespace mongo {
toSend.setData(dbQuery, b.buf(), b.len());
}
+ DBClientConnection::DBClientConnection(bool _autoReconnect, double so_timeout):
+ _failed(false),
+ autoReconnect(_autoReconnect),
+ autoReconnectBackoff(1000, 2000),
+ _so_timeout(so_timeout) {
+ _numConnections.fetchAndAdd(1);
+ }
+
void DBClientConnection::say( Message &toSend, bool isRetry , string * actualServer ) {
checkConnection();
try {
@@ -1632,12 +1636,10 @@ namespace mongo {
*retry = false;
*host = _serverString;
- if ( clientSet && nReturned ) {
+ if (!_parentReplSetName.empty() && nReturned) {
verify(data);
- BSONObj o(data);
- if ( isNotMasterErrorString( getErrField(o) ) ) {
- clientSet->isntMaster();
- }
+ BSONObj bsonView(data);
+ handleNotMasterResponse(getErrField(bsonView));
}
}
@@ -1656,6 +1658,27 @@ namespace mongo {
say(m);
}
+ void DBClientConnection::setParentReplSetName(const string& replSetName) {
+ _parentReplSetName = replSetName;
+ }
+
+ void DBClientConnection::handleNotMasterResponse(const BSONElement& elemToCheck) {
+ if (!isNotMasterErrorString(elemToCheck)) {
+ return;
+ }
+
+ MONGO_LOG_COMPONENT(1, logger::LogComponent::kReplication)
+ << "got not master from: " << _serverString
+ << " of repl set: " << _parentReplSetName;
+
+ ReplicaSetMonitorPtr monitor = ReplicaSetMonitor::get(_parentReplSetName);
+ if (monitor) {
+ monitor->failedHost(_server);
+ }
+
+ _failed = true;
+ }
+
#ifdef MONGO_SSL
static SimpleMutex s_mtx("SSLManager");
static SSLManagerInterface* s_sslMgr(NULL);
@@ -1675,7 +1698,7 @@ namespace mongo {
bool serverAlive( const string &uri ) {
- DBClientConnection c( false, 0, 20 ); // potentially the connection to server could fail while we're checking if it's alive - so use timeouts
+ DBClientConnection c(false, 20); // potentially the connection to server could fail while we're checking if it's alive - so use timeouts
string err;
if ( !c.connect( HostAndPort(uri), err ) )
return false;
diff --git a/src/mongo/client/dbclient_rs.cpp b/src/mongo/client/dbclient_rs.cpp
index 086eae84215..4f9f5d975e5 100644
--- a/src/mongo/client/dbclient_rs.cpp
+++ b/src/mongo/client/dbclient_rs.cpp
@@ -344,7 +344,7 @@ namespace {
_masterHost = h;
_master.reset(newConn);
- _master->setReplSetClientCallback(this);
+ _master->setParentReplSetName(_setName);
_master->setRunCommandHook(_runCommandHook);
_master->setPostRunCommandHook(_postRunCommandHook);
@@ -748,7 +748,7 @@ namespace {
newConn != NULL);
_lastSlaveOkConn.reset(newConn);
- _lastSlaveOkConn->setReplSetClientCallback(this);
+ _lastSlaveOkConn->setParentReplSetName(_setName);
_lastSlaveOkConn->setRunCommandHook(_runCommandHook);
_lastSlaveOkConn->setPostRunCommandHook(_postRunCommandHook);
diff --git a/src/mongo/client/dbclientinterface.h b/src/mongo/client/dbclientinterface.h
index 4f63c21db92..a88178ddfa4 100644
--- a/src/mongo/client/dbclientinterface.h
+++ b/src/mongo/client/dbclientinterface.h
@@ -1234,14 +1234,10 @@ namespace mongo {
/**
@param _autoReconnect if true, automatically reconnect on a connection failure
- @param cp used by DBClientReplicaSet. You do not need to specify this parameter
@param timeout tcp timeout in seconds - this is for read/write, not connect.
Connect timeout is fixed, but short, at 5 seconds.
*/
- DBClientConnection(bool _autoReconnect=false, DBClientReplicaSet* cp=0, double so_timeout=0) :
- clientSet(cp), _failed(false), autoReconnect(_autoReconnect), autoReconnectBackoff(1000, 2000), _so_timeout(so_timeout) {
- _numConnections.fetchAndAdd(1);
- }
+ DBClientConnection(bool _autoReconnect = false, double so_timeout = 0);
virtual ~DBClientConnection() {
_numConnections.fetchAndAdd(-1);
@@ -1336,16 +1332,10 @@ namespace mongo {
}
/**
- * Primarily used for notifying the replica set client that the server
- * it is talking to is not primary anymore.
- *
- * @param rsClient caller is responsible for managing the life of rsClient
- * and making sure that it lives longer than this object.
- *
- * Warning: This is only for internal use and will eventually be removed in
- * the future.
+ * Set the name of the replica set that this connection is associated to.
+ * Note: There is no validation on replSetName.
*/
- void setReplSetClientCallback(DBClientReplicaSet* rsClient);
+ void setParentReplSetName(const std::string& replSetName);
static void setLazyKillCursor( bool lazy ) { _lazyKillCursor = lazy; }
static bool getLazyKillCursor() { return _lazyKillCursor; }
@@ -1357,7 +1347,6 @@ namespace mongo {
virtual void _auth(const BSONObj& params);
virtual void sayPiggyBack( Message &toSend );
- DBClientReplicaSet *clientSet;
boost::scoped_ptr<MessagingPort> p;
boost::scoped_ptr<SockAddr> server;
bool _failed;
@@ -1381,6 +1370,19 @@ namespace mongo {
#ifdef MONGO_SSL
SSLManagerInterface* sslManager();
#endif
+
+ private:
+
+ /**
+ * Checks the BSONElement for the 'not master' keyword and if it does exist,
+ * try to inform the replica set monitor that the host this connects to is
+ * no longer primary.
+ */
+ void handleNotMasterResponse(const BSONElement& elemToCheck);
+
+ // Contains the string for the replica set name of the host this is connected to.
+ // Should be empty if this connection is not pointing to a replica set member.
+ std::string _parentReplSetName;
};
/** pings server to check if it's up
diff --git a/src/mongo/db/repl/oplogreader.cpp b/src/mongo/db/repl/oplogreader.cpp
index a3d2bca8e7f..7bc4ba3fc0a 100644
--- a/src/mongo/db/repl/oplogreader.cpp
+++ b/src/mongo/db/repl/oplogreader.cpp
@@ -91,7 +91,6 @@ namespace repl {
if (conn() == NULL || _host != host) {
resetConnection();
_conn = shared_ptr<DBClientConnection>(new DBClientConnection(false,
- 0,
tcp_timeout));
string errmsg;
if ( !_conn->connect(host, errmsg) ||
diff --git a/src/mongo/db/repl/scoped_conn.cpp b/src/mongo/db/repl/scoped_conn.cpp
index e01cabbea7b..e4116fcf584 100644
--- a/src/mongo/db/repl/scoped_conn.cpp
+++ b/src/mongo/db/repl/scoped_conn.cpp
@@ -49,7 +49,6 @@ namespace repl {
ScopedConn::ConnectionInfo::ConnectionInfo() : lock("ConnectionInfo"),
cc(new DBClientConnection(/*reconnect*/ true,
- /*replicaSet*/ 0,
/*timeout*/ DEFAULT_HEARTBEAT_TIMEOUT_SECS)),
connected(false) {
cc->_logLevel = logger::LogSeverity::Debug(2);
diff --git a/src/mongo/db/repl/scoped_conn.h b/src/mongo/db/repl/scoped_conn.h
index 9c3f67e8a8b..b357d17648e 100644
--- a/src/mongo/db/repl/scoped_conn.h
+++ b/src/mongo/db/repl/scoped_conn.h
@@ -68,7 +68,7 @@ namespace repl {
// conLock releases...
}
void reconnect() {
- connInfo->cc.reset(new DBClientConnection(true, 0, connInfo->getTimeout()));
+ connInfo->cc.reset(new DBClientConnection(true, connInfo->getTimeout()));
connInfo->cc->_logLevel = logger::LogSeverity::Debug(2);
connInfo->connected = false;
connect();
diff --git a/src/mongo/db/repl/sync_source_feedback.cpp b/src/mongo/db/repl/sync_source_feedback.cpp
index 593f99e663c..c70f101904e 100644
--- a/src/mongo/db/repl/sync_source_feedback.cpp
+++ b/src/mongo/db/repl/sync_source_feedback.cpp
@@ -164,7 +164,7 @@ namespace repl {
return true;
}
log() << "replset setting syncSourceFeedback to " << host.toString();
- _connection.reset(new DBClientConnection(false, 0, OplogReader::tcp_timeout));
+ _connection.reset(new DBClientConnection(false, OplogReader::tcp_timeout));
string errmsg;
try {
if (!_connection->connect(host, errmsg) ||
diff --git a/src/mongo/dbtests/perftests.cpp b/src/mongo/dbtests/perftests.cpp
index 548dc016e5f..a7bceba7fc0 100644
--- a/src/mongo/dbtests/perftests.cpp
+++ b/src/mongo/dbtests/perftests.cpp
@@ -158,7 +158,7 @@ namespace PerfTests {
}
}
- boost::shared_ptr<DBClientConnection> c(new DBClientConnection(false, 0, 60));
+ boost::shared_ptr<DBClientConnection> c(new DBClientConnection(false, 60));
string err;
if( c->connect(HostAndPort("perfdb.10gen.cc"), err) ) {
if( !c->auth("perf", "perf", pwd, err) ) {