summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgregs <greg@10gen.com>2011-06-09 15:41:21 -0400
committergregs <greg@10gen.com>2011-06-16 17:34:39 -0400
commit4d8ee4cc7c4d32ace1b1cab403dd429d9467a677 (patch)
tree69c6a5b529fc94bbaf4b437ebcd3d3d062c6df37
parent31f5fa3475bfdef256bb483b92afb599c6280835 (diff)
downloadmongo-4d8ee4cc7c4d32ace1b1cab403dd429d9467a677.tar.gz
parallel cursor recover gracefully from replica set and other errors SERVER-2481
-rw-r--r--client/dbclient.cpp9
-rw-r--r--client/dbclient.h19
-rw-r--r--client/dbclient_rs.cpp104
-rw-r--r--client/dbclient_rs.h23
-rw-r--r--client/dbclientcursor.cpp41
-rw-r--r--client/dbclientcursor.h7
-rw-r--r--client/parallel.cpp274
-rw-r--r--docs/errors.md23
8 files changed, 386 insertions, 114 deletions
diff --git a/client/dbclient.cpp b/client/dbclient.cpp
index 09adba72117..b5d908daab6 100644
--- a/client/dbclient.cpp
+++ b/client/dbclient.cpp
@@ -773,10 +773,6 @@ namespace mongo {
}
- DBClientBase* DBClientBase::callLazy( Message& toSend ) {
- say( toSend );
- return this;
- }
auto_ptr<DBClientCursor> DBClientWithCommands::getIndexes( const string &ns ) {
return query( Namespace( ns.c_str() ).getSisterNS( "system.indexes" ).c_str() , BSON( "ns" << ns ) );
@@ -968,11 +964,14 @@ namespace mongo {
return ! getErrField( o ).eoo();
}
- void DBClientConnection::checkResponse( const char *data, int nReturned ) {
+ void DBClientConnection::checkResponse( const char *data, int nReturned, bool* retry, string* host ) {
/* check for errors. the only one we really care about at
* this stage is "not master"
*/
+ *retry = false;
+ *host = _serverString;
+
if ( clientSet && nReturned ) {
assert(data);
BSONObj o(data);
diff --git a/client/dbclient.h b/client/dbclient.h
index 1ff085ca2dd..d7b15089cb7 100644
--- a/client/dbclient.h
+++ b/client/dbclient.h
@@ -343,10 +343,12 @@ namespace mongo {
virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 ) = 0;
virtual void say( Message &toSend ) = 0;
virtual void sayPiggyBack( Message &toSend ) = 0;
- virtual void checkResponse( const char* data, int nReturned ) {}
-
/* used by QueryOption_Exhaust. To use that your subclass must implement this. */
virtual bool recv( Message& m ) { assert(false); return false; }
+ // In general, for lazy queries, we'll need to say, recv, then checkResponse
+ virtual void checkResponse( const char* data, int nReturned, bool* retry = NULL, string* targetHost = NULL ) {
+ if( retry ) *retry = false; if( targetHost ) *targetHost = "";
+ }
};
/**
@@ -791,15 +793,6 @@ namespace mongo {
virtual bool callRead( Message& toSend , Message& response ) = 0;
// virtual bool callWrite( Message& toSend , Message& response ) = 0; // TODO: add this if needed
- virtual void say( Message& toSend ) = 0;
-
- /**
- * this sends the request but does not wait for the response
- * we return a DBClientBase in case this connection points to many servers
- * so we can call recv() on the right socket
- * @return the actual connection to call recv on
- */
- virtual DBClientBase* callLazy( Message& toSend );
virtual ConnectionString::ConnectionType type() const = 0;
@@ -915,9 +908,10 @@ namespace mongo {
virtual void killCursor( long long cursorID );
virtual bool callRead( Message& toSend , Message& response ) { return call( toSend , response ); }
virtual void say( Message &toSend );
+ virtual bool recv( Message& m );
+ virtual void checkResponse( const char *data, int nReturned, bool* retry = NULL, string* host = NULL );
virtual bool call( Message &toSend, Message &response, bool assertOk = true , string * actualServer = 0 );
virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; }
- virtual void checkResponse( const char *data, int nReturned );
void setSoTimeout(double to) { _so_timeout = to; }
static int getNumConnections() {
@@ -929,7 +923,6 @@ namespace mongo {
protected:
friend class SyncClusterConnection;
- virtual bool recv( Message& m );
virtual void sayPiggyBack( Message &toSend );
DBClientReplicaSet *clientSet;
diff --git a/client/dbclient_rs.cpp b/client/dbclient_rs.cpp
index 854a7fb7e93..798a3791fc3 100644
--- a/client/dbclient_rs.cpp
+++ b/client/dbclient_rs.cpp
@@ -436,6 +436,7 @@ namespace mongo {
}
DBClientConnection * DBClientReplicaSet::checkMaster() {
+ if( _lazyState._lastClient ) _lazyState = LazyState();
HostAndPort h = _monitor->getMaster();
if ( h == _masterHost && _master ) {
@@ -457,6 +458,7 @@ namespace mongo {
}
DBClientConnection * DBClientReplicaSet::checkSlave() {
+ if( _lazyState._lastClient ) _lazyState = LazyState();
HostAndPort h = _monitor->getSlave( _slaveHost );
if ( h == _slaveHost && _slave ) {
@@ -611,26 +613,114 @@ namespace mongo {
_slave.reset();
}
- DBClientBase* DBClientReplicaSet::callLazy( Message& toSend ) {
- if ( toSend.operation() == dbQuery ) {
+ void DBClientReplicaSet::say( Message& toSend ) {
+
+ int lastOp = -1;
+ bool slaveOk = false;
+
+ if ( ( lastOp = toSend.operation() ) == dbQuery ) {
// TODO: might be possible to do this faster by changing api
DbMessage dm( toSend );
QueryMessage qm( dm );
- if ( qm.queryOptions & QueryOption_SlaveOk ) {
- for ( int i=0; i<3; i++ ) {
+ if ( ( slaveOk = ( qm.queryOptions & QueryOption_SlaveOk ) ) ) {
+
+ for ( int i = _lazyState._retries; i < 3; i++ ) {
try {
- return checkSlave()->callLazy( toSend );
+ DBClientConnection* slave = checkSlave();
+ slave->say( toSend );
+
+ _lazyState._lastOp = lastOp;
+ _lazyState._slaveOk = slaveOk;
+ _lazyState._retries = i;
+ _lazyState._lastClient = slave;
+ return;
}
catch ( DBException &e ) {
- LOG(1) << "can't callLazy replica set slave " << i << " : " << _slaveHost << causedBy( e ) << endl;
+ LOG(1) << "can't callLazy replica set slave " << i << " : " << _slaveHost << causedBy( e ) << endl;
}
}
}
}
- return checkMaster()->callLazy( toSend );
+ DBClientConnection* master = checkMaster();
+ master->say( toSend );
+
+ _lazyState._lastOp = lastOp;
+ _lazyState._slaveOk = slaveOk;
+ _lazyState._retries = 3;
+ _lazyState._lastClient = master;
+ return;
+ }
+
+ bool DBClientReplicaSet::recv( Message& m ) {
+
+ assert( _lazyState._lastClient );
+
+ // TODO: It would be nice if we could easily wrap a conn error as a result error
+ try {
+ return _lazyState._lastClient->recv( m );
+ }
+ catch( DBException& e ){
+ log() << "could not receive data from " << _lazyState._lastClient << causedBy( e ) << endl;
+ return false;
+ }
+ }
+
+ void DBClientReplicaSet::checkResponse( const char* data, int nReturned, bool* retry, string* targetHost ){
+
+ // For now, do exactly as we did before, so as not to break things. In general though, we
+ // should fix this so checkResponse has a more consistent contract.
+ if( ! retry ){
+ if( _lazyState._lastClient )
+ return _lazyState._lastClient->checkResponse( data, nReturned );
+ else
+ return checkMaster()->checkResponse( data, nReturned );
+ }
+
+ *retry = false;
+ if( targetHost && _lazyState._lastClient ) *targetHost = _lazyState._lastClient->getServerAddress();
+ else *targetHost = "";
+
+ if( ! _lazyState._lastClient ) return;
+ if( nReturned > 1 ) return;
+
+ BSONObj dataObj;
+ if( nReturned == 1 && data )
+ dataObj = BSONObj( data );
+
+ // Check if we should retry here
+ if( _lazyState._lastOp == dbQuery && _lazyState._slaveOk ){
+
+ bool mayRetry = false;
+ if( dataObj.isEmpty() ){
+ warning() << "no data received from " << _lazyState._lastClient->toString() << endl;
+ mayRetry = true;
+ }
+ // Check the error code for a slave not secondary error
+ else if( hasErrField( dataObj ) && ! dataObj["code"].eoo() && dataObj["code"].Int() == 13436 ){
+ mayRetry = true;
+ }
+
+ if( mayRetry ){
+ if( _lazyState._lastClient == _slave.get() ){
+ isntSecondary();
+ }
+ else if( _lazyState._lastClient == _master.get() ){
+ isntMaster();
+ }
+ else
+ warning() << "passed " << dataObj << " but last rs client " << _lazyState._lastClient->toString() << " is not master or secondary" << endl;
+
+ if( _lazyState._retries < 3 ){
+ _lazyState._retries++;
+ *retry = true;
+ }
+ else log() << "Could not slave retries!" << endl;
+ }
+ }
}
+
bool DBClientReplicaSet::call( Message &toSend, Message &response, bool assertOk , string * actualServer ) {
if ( toSend.operation() == dbQuery ) {
// TODO: might be possible to do this faster by changing api
diff --git a/client/dbclient_rs.h b/client/dbclient_rs.h
index 4aa06417271..b49e12f7b28 100644
--- a/client/dbclient_rs.h
+++ b/client/dbclient_rs.h
@@ -210,7 +210,9 @@ namespace mongo {
// ---- callback pieces -------
- virtual void checkResponse( const char *data, int nReturned ) { checkMaster()->checkResponse( data , nReturned ); }
+ virtual void say( Message &toSend );
+ virtual bool recv( Message &toRecv );
+ virtual void checkResponse( const char* data, int nReturned, bool* retry = NULL, string* targetHost = NULL );
/* this is the callback from our underlying connections to notify us that we got a "not master" error.
*/
@@ -235,10 +237,7 @@ namespace mongo {
// ---- low level ------
virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 );
- virtual void say( Message &toSend ) { checkMaster()->say( toSend ); }
virtual bool callRead( Message& toSend , Message& response ) { return checkMaster()->callRead( toSend , response ); }
- virtual DBClientBase* callLazy( Message& toSend );
-
protected:
virtual void sayPiggyBack( Message &toSend ) { checkMaster()->say( toSend ); }
@@ -279,6 +278,22 @@ namespace mongo {
// this could be a security issue, as the password is stored in memory
// not sure if/how we should handle
list<AuthInfo> _auths;
+
+ protected:
+
+ /**
+ * for storing (non-threadsafe) information between lazy calls
+ */
+ class LazyState {
+ public:
+ LazyState() : _lastClient( NULL ), _lastOp( -1 ), _slaveOk( false ), _retries( 0 ) {}
+ DBClientConnection* _lastClient;
+ int _lastOp;
+ bool _slaveOk;
+ int _retries;
+
+ } _lazyState;
+
};
diff --git a/client/dbclientcursor.cpp b/client/dbclientcursor.cpp
index ddecfc34660..92b2dfe2c5f 100644
--- a/client/dbclientcursor.cpp
+++ b/client/dbclientcursor.cpp
@@ -72,23 +72,27 @@ namespace mongo {
void DBClientCursor::initLazy() {
Message toSend;
_assembleInit( toSend );
- _lazy = _client->callLazy( toSend );
- assert( _lazy );
+ _client->say( toSend );
}
- bool DBClientCursor::initLazyFinish() {
- assert( _lazy );
- if ( ! _lazy->recv( *b.m ) ) {
- log() << "DBClientCursor::init lazy call() failed" << endl;
- return false;
- }
- if ( b.m->empty() ) {
- // log msg temp?
- log() << "DBClientCursor::init message from call() was empty" << endl;
+ bool DBClientCursor::initLazyFinish( bool& retry ) {
+
+ bool recvd = _client->recv( *b.m );
+
+ // If we get a bad response, return false
+ if ( ! recvd || b.m->empty() ) {
+
+ if( !recvd )
+ log() << "DBClientCursor::init lazy say() failed" << endl;
+ if( b.m->empty() )
+ log() << "DBClientCursor::init message from say() was empty" << endl;
+
+ _client->checkResponse( NULL, 0, &retry, &_lazyHost );
return false;
}
- dataReceived();
- return true;
+
+ dataReceived( retry, _lazyHost );
+ return !retry;
}
void DBClientCursor::requestMore() {
@@ -136,7 +140,8 @@ namespace mongo {
dataReceived();
}
- void DBClientCursor::dataReceived() {
+ void DBClientCursor::dataReceived( bool& retry, string& host ) {
+
QueryResult *qr = (QueryResult *) b.m->singleData();
resultFlags = qr->resultFlags();
@@ -162,7 +167,7 @@ namespace mongo {
b.pos = 0;
b.data = qr->data();
- _client->checkResponse( b.data, b.nReturned ); // watches for "not master"
+ _client->checkResponse( b.data, b.nReturned, &retry, &host ); // watches for "not master"
/* this assert would fire the way we currently work:
assert( nReturned || cursorId == 0 );
@@ -250,8 +255,8 @@ namespace mongo {
if ( conn->get()->type() == ConnectionString::SET ||
conn->get()->type() == ConnectionString::SYNC ) {
- if( _lazy )
- _scopedHost = _lazy->getServerAddress();
+ if( _lazyHost.size() > 0 )
+ _scopedHost = _lazyHost;
else if( _client )
_scopedHost = _client->getServerAddress();
else
@@ -263,7 +268,7 @@ namespace mongo {
conn->done();
_client = 0;
- _lazy = 0;
+ _lazyHost = "";
}
DBClientCursor::~DBClientCursor() {
diff --git a/client/dbclientcursor.h b/client/dbclientcursor.h
index 2cdb031223c..fe7816142cb 100644
--- a/client/dbclientcursor.h
+++ b/client/dbclientcursor.h
@@ -165,7 +165,7 @@ namespace mongo {
bool init();
void initLazy();
- bool initLazyFinish();
+ bool initLazyFinish( bool& retry );
class Batch : boost::noncopyable {
friend class DBClientCursor;
@@ -198,10 +198,11 @@ namespace mongo {
long long cursorId;
bool _ownCursor; // see decouple()
string _scopedHost;
- DBClientBase* _lazy; // only for lazy init
+ string _lazyHost;
bool wasError;
- void dataReceived();
+ void dataReceived() { bool retry; string lazyHost; dataReceived( retry, lazyHost ); }
+ void dataReceived( bool& retry, string& lazyHost );
void requestMore();
void exhaustReceiveMore(); // for exhaust
diff --git a/client/parallel.cpp b/client/parallel.cpp
index 94c38205c5e..94453403067 100644
--- a/client/parallel.cpp
+++ b/client/parallel.cpp
@@ -411,76 +411,240 @@ namespace mongo {
}
void ParallelSortClusteredCursor::_init() {
+
+ // log() << "Starting parallel search..." << endl;
+
// make sure we're not already initialized
assert( ! _cursors );
-
_cursors = new FilteringClientCursor[_numServers];
+ bool returnPartial = ( _options & QueryOption_PartialResults );
- size_t num = 0;
- vector<shared_ptr<ShardConnection> > conns;
+ vector<ServerAndQuery> queries( _servers.begin(), _servers.end() );
+ set<int> retryQueries;
+ int finishedQueries = 0;
+
+ vector< shared_ptr<ShardConnection> > conns;
vector<string> servers;
-
- for ( set<ServerAndQuery>::iterator i = _servers.begin(); i!=_servers.end(); ++i ) {
- size_t me = num++;
- const ServerAndQuery& sq = *i;
+ // Since we may get all sorts of errors, record them all as they come and throw them later if necessary
+ vector<string> staleConfigExs;
+ vector<string> socketExs;
+ vector<string> otherExs;
+ bool allConfigStale = false;
+
+ int retries = -1;
+
+ // Loop through all the queries until we've finished or gotten a socket exception on all of them
+ // We break early for non-socket exceptions, and socket exceptions if we aren't returning partial results
+ do {
+ retries++;
+
+ bool firstPass = retryQueries.size() == 0;
- BSONObj q = _query;
- if ( ! sq._extra.isEmpty() ) {
- q = concatQuery( q , sq._extra );
+ if( ! firstPass ){
+ log() << "retrying " << ( returnPartial ? "(partial) " : "" ) << "parallel connection to ";
+ for( set<int>::iterator it = retryQueries.begin(); it != retryQueries.end(); ++it ){
+ log() << queries[*it]._server << ", ";
+ }
+ log() << finishedQueries << " finished queries." << endl;
}
- conns.push_back( shared_ptr<ShardConnection>( new ShardConnection( sq._server , _ns ) ) );
- servers.push_back( sq._server );
-
- if ( conns[me]->setVersion() ) {
- // we can't cleanly release other sockets
- // because there is data waiting on the sockets
- // TODO: should we read from them?
- // TODO: we should probably retry as well in this case, since a migrate commit means another
- // migrate will take some time to complete.
- // we can close this one because we know the state
- conns[me]->done();
- throw StaleConfigException( _ns , "ClusteredCursor::query ShardConnection had to change" , true );
+ size_t num = 0;
+ for ( vector<ServerAndQuery>::iterator it = queries.begin(); it != queries.end(); ++it ) {
+ size_t i = num++;
+
+ const ServerAndQuery& sq = *it;
+
+ // If we're not retrying this cursor on later passes, continue
+ if( ! firstPass && retryQueries.find( i ) == retryQueries.end() ) continue;
+
+ // log() << "Querying " << _query << " from " << _ns << " for " << sq._server << endl;
+
+ BSONObj q = _query;
+ if ( ! sq._extra.isEmpty() ) {
+ q = concatQuery( q , sq._extra );
+ }
+
+ string errLoc = " @ " + sq._server;
+
+ if( firstPass ){
+
+ // This may be the first time connecting to this shard, if so we can get an error here
+ try {
+ conns.push_back( shared_ptr<ShardConnection>( new ShardConnection( sq._server , _ns ) ) );
+ }
+ catch( std::exception& e ){
+ socketExs.push_back( e.what() + errLoc );
+ if( ! returnPartial ){
+ num--;
+ break;
+ }
+ conns.push_back( shared_ptr<ShardConnection>() );
+ continue;
+ }
+
+ servers.push_back( sq._server );
+ }
+
+ if ( conns[i]->setVersion() ) {
+ conns[i]->done();
+ staleConfigExs.push_back( StaleConfigException( _ns , "ClusteredCursor::query ShardConnection had to change" , true ).what() + errLoc );
+ break;
+ }
+
+ LOG(5) << "ParallelSortClusteredCursor::init server:" << sq._server << " ns:" << _ns
+ << " query:" << q << " _fields:" << _fields << " options: " << _options << endl;
+
+ if( ! _cursors[i].raw() )
+ _cursors[i].reset( new DBClientCursor( conns[i]->get() , _ns , q ,
+ 0 , // nToReturn
+ 0 , // nToSkip
+ _fields.isEmpty() ? 0 : &_fields , // fieldsToReturn
+ _options ,
+ _batchSize == 0 ? 0 : _batchSize + _needToSkip // batchSize
+ ) );
+
+ try{
+ _cursors[i].raw()->initLazy();
+ }
+ catch( SocketException& e ){
+ socketExs.push_back( e.what() + errLoc );
+ _cursors[i].reset( NULL );
+ conns[i]->done();
+ if( ! returnPartial ) break;
+ }
+ catch( std::exception& e){
+ otherExs.push_back( e.what() + errLoc );
+ _cursors[i].reset( NULL );
+ conns[i]->done();
+ break;
+ }
+
}
- LOG(5) << "ParallelSortClusteredCursor::init server:" << sq._server << " ns:" << _ns
- << " query:" << q << " _fields:" << _fields << " options: " << _options << endl;
-
- _cursors[me].reset( new DBClientCursor( conns[me]->get() , _ns , q ,
- 0 , // nToReturn
- 0 , // nToSkip
- _fields.isEmpty() ? 0 : &_fields , // fieldsToReturn
- _options ,
- _batchSize == 0 ? 0 : _batchSize + _needToSkip // batchSize
- ) );
-
- // note: this may throw a scoket exception
- // if it does, we lose our other connections as well
- _cursors[me].raw()->initLazy();
-
- }
+ // Go through all the potentially started cursors and finish initializing them or log any errors and
+ // potentially retry
+ // TODO: Better error classification would make this easier, errors are indicated in all sorts of ways
+ // here that we need to trap.
+ for ( size_t i = 0; i < num; i++ ) {
+
+ // log() << "Finishing query for " << cons[i].get()->getHost() << endl;
+ string errLoc = " @ " + queries[i]._server;
+
+ if( ! _cursors[i].raw() || ( ! firstPass && retryQueries.find( i ) == retryQueries.end() ) ){
+ if( conns[i] ) conns[i].get()->done();
+ continue;
+ }
+
+ assert( conns[i] );
+ retryQueries.erase( i );
+
+ bool retry = false;
+
+ try {
+
+ if( ! _cursors[i].raw()->initLazyFinish( retry ) ) {
+
+ warning() << "invalid result from " << conns[i]->getHost() << ( retry ? ", retrying" : "" ) << endl;
+ _cursors[i].reset( NULL );
+
+ if( ! retry ){
+ socketExs.push_back( str::stream() << "error querying server: " << servers[i] );
+ conns[i]->done();
+ }
+ else {
+ retryQueries.insert( i );
+ }
+
+ continue;
+ }
+ }
+ catch ( MsgAssertionException& e ){
+ socketExs.push_back( e.what() + errLoc );
+ _cursors[i].reset( NULL );
+ conns[i]->done();
+ continue;
+ }
+ catch ( SocketException& e ) {
+ socketExs.push_back( e.what() + errLoc );
+ _cursors[i].reset( NULL );
+ conns[i]->done();
+ continue;
+ }
+ catch( std::exception& e ){
+ otherExs.push_back( e.what() + errLoc );
+ _cursors[i].reset( NULL );
+ conns[i]->done();
+ continue;
+ }
- for ( size_t i=0; i<num; i++ ) {
- try {
- if ( ! _cursors[i].raw()->initLazyFinish() ) {
- // some sort of error
- // drop connection
- _cursors[i].reset( 0 );
-
- massert( 14047 , str::stream() << "error querying server: " << servers[i] , _options & QueryOption_PartialResults );
+ try {
+ _cursors[i].raw()->attach( conns[i].get() ); // this calls done on conn
+ _checkCursor( _cursors[i].raw() );
+
+ finishedQueries++;
+ }
+ catch ( StaleConfigException& e ){
+
+ // Our stored configuration data is actually stale, we need to reload it
+ // when we throw our exception
+ allConfigStale = true;
+
+ staleConfigExs.push_back( e.what() + errLoc );
+ _cursors[i].reset( NULL );
+ conns[i]->done();
+ continue;
+ }
+ catch( std::exception& e ){
+ otherExs.push_back( e.what() + errLoc );
+ _cursors[i].reset( NULL );
+ conns[i]->done();
+ continue;
}
}
- catch ( SocketException& e ) {
- if ( ! ( _options & QueryOption_PartialResults ) )
- throw e;
+
+ // Don't exceed our max retries, should not happen
+ assert( retries < 5 );
+ }
+ while( retryQueries.size() > 0 /* something to retry */ &&
+ ( socketExs.size() == 0 || returnPartial ) /* no conn issues */ &&
+ staleConfigExs.size() == 0 /* no config issues */ &&
+ otherExs.size() == 0 /* no other issues */);
+
+ // Assert that our conns are all closed!
+ for( vector< shared_ptr<ShardConnection> >::iterator i = conns.begin(); i < conns.end(); ++i ){
+ assert( ! (*i) || ! (*i)->ok() );
+ }
+
+ // Handle errors we got during initialization.
+ // If we're returning partial results, we can ignore socketExs, but nothing else
+ // Log a warning in any case, so we don't lose these messages
+ bool throwException = ( socketExs.size() > 0 && ! returnPartial ) || staleConfigExs.size() > 0 || otherExs.size() > 0;
+
+ if( socketExs.size() > 0 || staleConfigExs.size() > 0 || otherExs.size() > 0 ) {
+
+ vector<string> errMsgs;
+
+ errMsgs.insert( errMsgs.end(), staleConfigExs.begin(), staleConfigExs.end() );
+ errMsgs.insert( errMsgs.end(), otherExs.begin(), otherExs.end() );
+ errMsgs.insert( errMsgs.end(), socketExs.begin(), socketExs.end() );
+
+ stringstream errMsg;
+ errMsg << "could not initialize cursor across all shards because : ";
+ for( vector<string>::iterator i = errMsgs.begin(); i != errMsgs.end(); i++ ){
+ if( i != errMsgs.begin() ) errMsg << " :: and :: ";
+ errMsg << *i;
}
-
- _cursors[i].raw()->attach( conns[i].get() ); // this calls done on conn
- _checkCursor( _cursors[i].raw() );
+
+ if( throwException && staleConfigExs.size() > 0 )
+ throw StaleConfigException( _ns , errMsg.str() , ! allConfigStale );
+ else if( throwException )
+ throw DBException( errMsg.str(), 14827 );
+ else
+ warning() << errMsg.str() << endl;
}
-
+
}
ParallelSortClusteredCursor::~ParallelSortClusteredCursor() {
@@ -577,7 +741,9 @@ namespace mongo {
return _ok;
try {
- bool finished = _cursor->initLazyFinish();
+ // TODO: Allow retries?
+ bool retry = false;
+ bool finished = _cursor->initLazyFinish( retry );
// Shouldn't need to communicate with server any more
if ( _connHolder )
diff --git a/docs/errors.md b/docs/errors.md
index 406b066a138..3b1464f5c35 100644
--- a/docs/errors.md
+++ b/docs/errors.md
@@ -81,7 +81,6 @@ client/connpool.h
client/dbclient.cpp
-----
* 10005 [code](http://github.com/mongodb/mongo/blob/master/client/dbclient.cpp#L480) listdatabases failed" , runCommand( "admin" , BSON( "listDatabases
* 10006 [code](http://github.com/mongodb/mongo/blob/master/client/dbclient.cpp#L481) listDatabases.databases not array" , info["databases
* 10007 [code](http://github.com/mongodb/mongo/blob/master/client/dbclient.cpp#L789) dropIndex failed
@@ -96,8 +95,8 @@ client/dbclient.cpp
client/dbclient.h
----
-* 10011 [code](http://github.com/mongodb/mongo/blob/master/client/dbclient.h#L498) no collection name
-* 9000 [code](http://github.com/mongodb/mongo/blob/master/client/dbclient.h#L812)
+* 10011 [code](http://github.com/mongodb/mongo/blob/master/client/dbclient.h#L528) no collection name
+* 9000 [code](http://github.com/mongodb/mongo/blob/master/client/dbclient.h#L833)
client/dbclient_rs.cpp
@@ -110,9 +109,9 @@ client/dbclient_rs.cpp
client/dbclientcursor.cpp
----
-* 13127 [code](http://github.com/mongodb/mongo/blob/master/client/dbclientcursor.cpp#L152) getMore: cursor didn't exist on server, possible restart or timeout?
-* 13422 [code](http://github.com/mongodb/mongo/blob/master/client/dbclientcursor.cpp#L200) DBClientCursor next() called but more() is false
-* 14821 [code](http://github.com/mongodb/mongo/blob/master/client/dbclientcursor.cpp#L258) No client or lazy client specified, cannot store multi-host connection.
+* 13127 [code](http://github.com/mongodb/mongo/blob/master/client/dbclientcursor.cpp#L155) getMore: cursor didn't exist on server, possible restart or timeout?
+* 13422 [code](http://github.com/mongodb/mongo/blob/master/client/dbclientcursor.cpp#L203) DBClientCursor next() called but more() is false
+* 14821 [code](http://github.com/mongodb/mongo/blob/master/client/dbclientcursor.cpp#L261) No client or lazy client specified, cannot store multi-host connection.
client/dbclientcursor.h
@@ -155,12 +154,11 @@ client/parallel.cpp
----
* 10017 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L80) cursor already done
* 10018 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L335) no more items
-* 10019 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L537) no more elements
+* 10019 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L638) no more elements
* 13431 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L395) have to have sort key in projection and removing it
* 13633 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L109) error querying server:
-* 14047 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L472) error querying server:
-* 14812 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L586) Error running command on server:
-* 14813 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L587) Command returned nothing
+* 14812 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L687) Error running command on server:
+* 14813 [code](http://github.com/mongodb/mongo/blob/master/client/parallel.cpp#L688) Command returned nothing
client/syncclusterconnection.cpp
@@ -793,8 +791,13 @@ db/queryoptimizer.h
db/queryoptimizercursor.cpp
----
+<<<<<<< HEAD
* 14809 [code](http://github.com/mongodb/mongo/blob/master/db/queryoptimizercursor.cpp#L304) Invalid access for cursor that is not ok()
* 14826 [code](http://github.com/mongodb/mongo/blob/master/db/queryoptimizercursor.cpp#L174)
+=======
+* 14809 [code](http://github.com/mongodb/mongo/blob/master/db/queryoptimizercursor.cpp#L287) Invalid access for cursor that is not ok()
+* 14826 [code](http://github.com/mongodb/mongo/blob/master/db/queryoptimizercursor.cpp#L161)
+>>>>>>> Compiling version of this fix...
db/queryutil.cpp