diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2016-10-24 10:12:20 -0400 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2016-10-24 10:13:03 -0400 |
commit | c0fe7490ed8482c5244a36196253883d3c50e7c7 (patch) | |
tree | 90b050a39ccbb5b9796a03b119b71c413c86c098 /src/mongo/client/dbclient_rs.cpp | |
parent | 406e1f9e715617956283d32a021fbac7ac46a4cc (diff) | |
download | mongo-c0fe7490ed8482c5244a36196253883d3c50e7c7.tar.gz |
SERVER-26683 Log reason for marking repl hosts as failed in the RSM
Diffstat (limited to 'src/mongo/client/dbclient_rs.cpp')
-rw-r--r-- | src/mongo/client/dbclient_rs.cpp | 127 |
1 files changed, 65 insertions, 62 deletions
diff --git a/src/mongo/client/dbclient_rs.cpp b/src/mongo/client/dbclient_rs.cpp index bd8ec5863c9..1d4e0a8e891 100644 --- a/src/mongo/client/dbclient_rs.cpp +++ b/src/mongo/client/dbclient_rs.cpp @@ -80,6 +80,11 @@ public: } _populateReadPrefSecOkCmdList; /** + * Maximum number of retries to make for auto-retry logic when performing a slave ok operation. + */ +const size_t MAX_RETRY = 3; + +/** * Extracts the read preference settings from the query document. Note that this method * assumes that the query is ok for secondaries so it defaults to * ReadPreference::SecondaryPreferred when nothing is specified. Supports the following @@ -124,13 +129,13 @@ ReadPreferenceSetting* _extractReadPref(const BSONObj& query, int queryOptions) : mongo::ReadPreference::PrimaryOnly; return new ReadPreferenceSetting(pref, TagSet()); } + } // namespace // -------------------------------- // ----- DBClientReplicaSet --------- // -------------------------------- -const size_t DBClientReplicaSet::MAX_RETRY = 3; bool DBClientReplicaSet::_authPooledSecondaryConn = true; DBClientReplicaSet::DBClientReplicaSet(const string& name, @@ -293,7 +298,8 @@ DBClientConnection* DBClientReplicaSet::checkMaster() { if (!_master->isFailed()) return _master.get(); - monitor->failedHost(_masterHost); + monitor->failedHost( + _masterHost, {ErrorCodes::fromInt(40332), "Last known master host cannot be reached"}); h = monitor->getMasterOrUassert(); // old master failed, try again. } @@ -315,13 +321,11 @@ DBClientConnection* DBClientReplicaSet::checkMaster() { } if (newConn == NULL || !errmsg.empty()) { - monitor->failedHost(_masterHost); - uasserted(ErrorCodes::FailedToSatisfyReadPreference, - str::stream() << "can't connect to new replica set master [" - << _masterHost.toString() - << "]" - << (errmsg.empty() ? "" : ", err: ") - << errmsg); + const std::string message = str::stream() << "can't connect to new replica set master [" + << _masterHost.toString() << "]" + << (errmsg.empty() ? "" : ", err: ") << errmsg; + monitor->failedHost(_masterHost, {ErrorCodes::fromInt(40333), message}); + uasserted(ErrorCodes::FailedToSatisfyReadPreference, message); } resetMaster(); @@ -349,7 +353,8 @@ bool DBClientReplicaSet::checkLastHost(const ReadPreferenceSetting* readPref) { // Make sure we don't think the host is down. if (_lastSlaveOkConn->isFailed() || !_getMonitor()->isHostUp(_lastSlaveOkHost)) { - invalidateLastSlaveOkCache(); + _invalidateLastSlaveOkCache( + {ErrorCodes::fromInt(40334), "Last slave connection is no longer available"}); return false; } @@ -375,7 +380,7 @@ void DBClientReplicaSet::logoutAll(DBClientConnection* conn) { conn->logout(i->first, response); } catch (const UserException& ex) { warning() << "Failed to logout: " << conn->getServerAddress() << " on db: " << i->first - << endl; + << causedBy(redact(ex)); } } } @@ -448,13 +453,13 @@ void DBClientReplicaSet::_auth(const BSONObj& params) { throw; } - StringBuilder errMsgB; - errMsgB << "can't authenticate against replica set node " - << _lastSlaveOkHost.toString(); - lastNodeStatus = ex.toStatus(errMsgB.str()); - - LOG(1) << lastNodeStatus.reason() << endl; - invalidateLastSlaveOkCache(); + const Status status = ex.toStatus(); + lastNodeStatus = {status.code(), + str::stream() << "can't authenticate against replica set node " + << _lastSlaveOkHost + << ": " + << status.reason()}; + _invalidateLastSlaveOkCache(lastNodeStatus); } } @@ -525,6 +530,7 @@ unique_ptr<DBClientCursor> DBClientReplicaSet::query(const string& ns, << ")" << endl; string lastNodeErrMsg; + for (size_t retry = 0; retry < MAX_RETRY; retry++) { try { DBClientConnection* conn = selectNodeUsingTags(readPref); @@ -537,14 +543,11 @@ unique_ptr<DBClientCursor> DBClientReplicaSet::query(const string& ns, ns, query, nToReturn, nToSkip, fieldsToReturn, queryOptions, batchSize); return checkSlaveQueryResult(std::move(cursor)); - } catch (const DBException& dbExcep) { - StringBuilder errMsgBuilder; - errMsgBuilder << "can't query replica set node " << _lastSlaveOkHost.toString() - << ": " << causedBy(redact(dbExcep)); - lastNodeErrMsg = errMsgBuilder.str(); - - LOG(1) << lastNodeErrMsg << endl; - invalidateLastSlaveOkCache(); + } catch (const DBException& ex) { + const Status status = ex.toStatus(); + lastNodeErrMsg = str::stream() << "can't query replica set node " + << _lastSlaveOkHost << ": " << status.reason(); + _invalidateLastSlaveOkCache({status.code(), lastNodeErrMsg}); } } @@ -589,14 +592,13 @@ BSONObj DBClientReplicaSet::findOne(const string& ns, } return conn->findOne(ns, query, fieldsToReturn, queryOptions); - } catch (const DBException& dbExcep) { - StringBuilder errMsgBuilder; - errMsgBuilder << "can't findone replica set node " << _lastSlaveOkHost.toString() - << ": " << causedBy(redact(dbExcep)); - lastNodeErrMsg = errMsgBuilder.str(); - - LOG(1) << lastNodeErrMsg << endl; - invalidateLastSlaveOkCache(); + } catch (const DBException& ex) { + const Status status = ex.toStatus(); + _invalidateLastSlaveOkCache({status.code(), + str::stream() << "can't findone replica set node " + << _lastSlaveOkHost.toString() + << ": " + << status.reason()}); } } @@ -623,10 +625,11 @@ void DBClientReplicaSet::killCursor(long long cursorID) { } void DBClientReplicaSet::isntMaster() { - log() << "got not master for: " << _masterHost << endl; - // Can't use _getMonitor because that will create a new monitor from the cached seed if - // the monitor doesn't exist. - _rsm->failedHost(_masterHost); + // Can't use _getMonitor because that will create a new monitor from the cached seed if the + // monitor doesn't exist. + _rsm->failedHost( + _masterHost, + {ErrorCodes::NotMaster, str::stream() << "got not master for: " << _masterHost}); resetMaster(); } @@ -656,9 +659,11 @@ unique_ptr<DBClientCursor> DBClientReplicaSet::checkSlaveQueryResult( } void DBClientReplicaSet::isntSecondary() { - log() << "slave no longer has secondary status: " << _lastSlaveOkHost << endl; // Failover to next slave - _getMonitor()->failedHost(_lastSlaveOkHost); + _getMonitor()->failedHost( + _lastSlaveOkHost, + {ErrorCodes::NotMasterOrSecondary, + str::stream() << "slave no longer has secondary status: " << _lastSlaveOkHost}); resetSlaveOkConn(); } @@ -774,15 +779,13 @@ void DBClientReplicaSet::say(Message& toSend, bool isRetry, string* actualServer _lazyState._lastOp = lastOp; _lazyState._secondaryQueryOk = true; _lazyState._lastClient = conn; - } catch (const DBException& DBExcep) { - StringBuilder errMsgBuilder; - errMsgBuilder << "can't callLazy replica set node " - << _lastSlaveOkHost.toString() << ": " - << causedBy(redact(DBExcep)); - lastNodeErrMsg = errMsgBuilder.str(); - - LOG(1) << lastNodeErrMsg << endl; - invalidateLastSlaveOkCache(); + } catch (const DBException& e) { + const Status status = e.toStatus(); + _invalidateLastSlaveOkCache({status.code(), + str::stream() << "can't callLazy replica set node " + << _lastSlaveOkHost.toString() + << ": " + << status.reason()}); continue; } @@ -952,10 +955,10 @@ DBClientReplicaSet::runCommandWithMetadataAndTarget(StringData database, return std::make_tuple( conn->runCommandWithMetadata(database, command, metadata, commandArgs), conn); } catch (const DBException& ex) { - log() << exceptionToStatus(); - invalidateLastSlaveOkCache(); + _invalidateLastSlaveOkCache(ex.toStatus()); } } + uasserted(ErrorCodes::NodeNotFound, str::stream() << "Could not satisfy $readPreference of '" << readPref.toBSON() << "' " << "while attempting to run command " @@ -998,14 +1001,15 @@ bool DBClientReplicaSet::call(Message& toSend, } return conn->call(toSend, response, assertOk, nullptr); - } catch (const DBException& dbExcep) { - LOG(1) << "can't call replica set node " << _lastSlaveOkHost << ": " - << causedBy(redact(dbExcep)); - + } catch (const DBException& ex) { if (actualServer) *actualServer = ""; - invalidateLastSlaveOkCache(); + const Status status = ex.toStatus(); + _invalidateLastSlaveOkCache( + {status.code(), + str::stream() << "can't call replica set node " << _lastSlaveOkHost << ": " + << status.reason()}); } } @@ -1040,12 +1044,11 @@ bool DBClientReplicaSet::call(Message& toSend, return true; } -void DBClientReplicaSet::invalidateLastSlaveOkCache() { - /* This is not wrapped in with if (_lastSlaveOkConn && _lastSlaveOkConn->isFailed()) - * because there are certain exceptions that will not make the connection be labeled - * as failed. For example, asserts 13079, 13080, 16386 - */ - _getMonitor()->failedHost(_lastSlaveOkHost); +void DBClientReplicaSet::_invalidateLastSlaveOkCache(const Status& status) { + // This is not wrapped in with if (_lastSlaveOkConn && _lastSlaveOkConn->isFailed()) because + // there are certain exceptions that will not make the connection be labeled as failed. For + // example, asserts 13079, 13080, 16386 + _getMonitor()->failedHost(_lastSlaveOkHost, status); resetSlaveOkConn(); } |