summaryrefslogtreecommitdiff
path: root/src/mongo/s
diff options
context:
space:
mode:
authorRandolph Tan <randolph@10gen.com>2014-10-27 14:53:04 -0400
committerRandolph Tan <randolph@10gen.com>2014-10-29 13:03:38 -0400
commitfbbb0d2a1d845728cd714272199a652573e2f27d (patch)
treeec2286f7c8e93582a7306c059fbe4c5261cbb03a /src/mongo/s
parent27c5131ff96d66ecc7ac8f7eee0ba72a7aa3c890 (diff)
downloadmongo-fbbb0d2a1d845728cd714272199a652573e2f27d.tar.gz
SERVER-15593 Initial autosplit heuristics are very aggressive when config servers are down
Diffstat (limited to 'src/mongo/s')
-rw-r--r--src/mongo/s/chunk.cpp10
-rw-r--r--src/mongo/s/commands_admin.cpp2
-rw-r--r--src/mongo/s/config.cpp37
-rw-r--r--src/mongo/s/config.h12
4 files changed, 48 insertions, 13 deletions
diff --git a/src/mongo/s/chunk.cpp b/src/mongo/s/chunk.cpp
index a8b70aa162a..addc11b79a4 100644
--- a/src/mongo/s/chunk.cpp
+++ b/src/mongo/s/chunk.cpp
@@ -538,6 +538,14 @@ namespace mongo {
}
TicketHolderReleaser releaser( &(getManager()->_splitHeuristics._splitTickets) );
+ if (!configServer.allUp(true)) {
+ LOG(1) << "not performing auto-split because not all config servers are up";
+
+ // Back off indirectly by resetting _dataWritten.
+ _dataWritten = 0;
+ return false;
+ }
+
// this is a bit ugly
// we need it so that mongos blocks for the writes to actually be committed
// this does mean mongos has more back pressure than mongod alone
@@ -1354,7 +1362,7 @@ namespace mongo {
uassert( 13331 , "collection's metadata is undergoing changes. Please try again." , dlk.got() );
- uassert( 10174 , "config servers not all up" , configServer.allUp() );
+ uassert(10174, "config servers not all up", configServer.allUp(false));
set<Shard> seen;
diff --git a/src/mongo/s/commands_admin.cpp b/src/mongo/s/commands_admin.cpp
index 7c978405f61..f899eb66224 100644
--- a/src/mongo/s/commands_admin.cpp
+++ b/src/mongo/s/commands_admin.cpp
@@ -96,7 +96,7 @@ namespace mongo {
bool okForConfigChanges( string& errmsg ) {
string e;
- if ( ! configServer.allUp(e) ) {
+ if (!configServer.allUp(false, e)) {
errmsg = str::stream() << "not all config servers are up: " << e;
return false;
}
diff --git a/src/mongo/s/config.cpp b/src/mongo/s/config.cpp
index 078a5de0bb1..e9ba0d91b60 100644
--- a/src/mongo/s/config.cpp
+++ b/src/mongo/s/config.cpp
@@ -198,7 +198,9 @@ namespace mongo {
vector<Shard>* initShards) {
uassert( 8042 , "db doesn't have sharding enabled" , _shardingEnabled );
- uassert( 13648 , str::stream() << "can't shard collection because not all config servers are up" , configServer.allUp() );
+ uassert(13648,
+ str::stream() << "can't shard collection because not all config servers are up",
+ configServer.allUp(false));
ChunkManagerPtr manager;
@@ -648,7 +650,7 @@ namespace mongo {
configServer.logChange( "dropDatabase.start" , _name , BSONObj() );
// 1
- if ( ! configServer.allUp( errmsg ) ) {
+ if (!configServer.allUp(false, errmsg)) {
LOG(1) << "\t DBConfig::dropDatabase not all up" << endl;
return 0;
}
@@ -667,7 +669,7 @@ namespace mongo {
return false;
}
- if ( ! configServer.allUp( errmsg ) ) {
+ if (!configServer.allUp(false, errmsg)) {
log() << "error removing from config server even after checking!" << endl;
return 0;
}
@@ -995,21 +997,38 @@ namespace mongo {
return true;
}
- bool ConfigServer::allUp() {
+ bool ConfigServer::allUp(bool localCheckOnly) {
string errmsg;
- return allUp( errmsg );
+ return allUp(localCheckOnly, errmsg);
}
- bool ConfigServer::allUp( string& errmsg ) {
+ bool ConfigServer::allUp(bool localCheckOnly, string& errmsg) {
try {
ScopedDbConnection conn(_primary.getConnString(), 30.0);
+
+ // Note: SyncClusterConnection is different from normal connection types in
+ // that it can be instantiated even if all the config servers are down.
+ if (!conn->isStillConnected()) {
+ errmsg = str::stream() << "Not all config servers "
+ << _primary.toString() << " are reachable";
+ LOG(1) << errmsg;
+ return false;
+ }
+
+ if (localCheckOnly) {
+ return true;
+ }
+
+ // Note: For SyncClusterConnection, gle will only be sent to the first
+ // node, and it is not even guaranteed to be invoked.
conn->getLastError();
conn.done();
return true;
}
- catch ( DBException& ) {
- log() << "ConfigServer::allUp : " << _primary.toString() << " seems down!" << endl;
- errmsg = _primary.toString() + " seems down";
+ catch (const DBException& excep) {
+ errmsg = str::stream() << "Not all config servers "
+ << _primary.toString() << " are reachable"
+ << causedBy(excep);
return false;
}
diff --git a/src/mongo/s/config.h b/src/mongo/s/config.h
index fceb3f6b219..78810ed8653 100644
--- a/src/mongo/s/config.h
+++ b/src/mongo/s/config.h
@@ -240,8 +240,16 @@ namespace mongo {
*/
bool checkHostsAreUnique( const std::vector<std::string>& configHosts, std::string* errmsg );
- bool allUp();
- bool allUp( std::string& errmsg );
+ /**
+ * Checks if all config servers are up.
+ *
+ * If localCheckOnly is true, only check if the socket is still open with no errors.
+ * Otherwise, also send a getLastError command with recv timeout.
+ *
+ * TODO: fix this - SERVER-15811
+ */
+ bool allUp(bool localCheckOnly);
+ bool allUp(bool localCheckOnly, std::string& errmsg);
int dbConfigVersion();
int dbConfigVersion( DBClientBase& conn );