diff options
author | unknown <msvensson@pilot.mysql.com> | 2008-02-07 08:08:43 +0100 |
---|---|---|
committer | unknown <msvensson@pilot.mysql.com> | 2008-02-07 08:08:43 +0100 |
commit | 5c73339493d5503deefc289c4dab3adadc1dbfb1 (patch) | |
tree | f316e6fe505edf754ec743e3445fe31c4f733cf5 | |
parent | 5aa9c1bd6f5f0106619012f75b086c08172cd33b (diff) | |
download | mariadb-git-5c73339493d5503deefc289c4dab3adadc1dbfb1.tar.gz |
Bug#32025 ndb_waiter does too many roundtrips to ndb_mgmd
ndb/tools/waiter.cpp:
- Only contact ndb_mgmd once per loop
- Program only cares about ndbd nodes -> remove the api and mgm vectors
- Program can not wait for "starting" -> remove that code
- Remove unused includes
- Protect against SIGPIPE(writing to a socket where the mgmsrv was
gone, silently killed the program)
- Don't sleep one second if if all nodes are in the wanted state
- Use 100 milliseconds sleep between each poll
-rw-r--r-- | ndb/tools/waiter.cpp | 127 |
1 files changed, 42 insertions, 85 deletions
diff --git a/ndb/tools/waiter.cpp b/ndb/tools/waiter.cpp index e221a26182e..8226de87b49 100644 --- a/ndb/tools/waiter.cpp +++ b/ndb/tools/waiter.cpp @@ -21,13 +21,11 @@ #include <NdbMain.h> #include <NdbOut.hpp> #include <NdbSleep.h> -#include <kernel/ndb_limits.h> #include <NDBT.hpp> -int -waitClusterStatus(const char* _addr, ndb_mgm_node_status _status, - unsigned int _timeout); +static int +waitClusterStatus(const char* _addr, ndb_mgm_node_status _status); enum ndb_waiter_options { OPT_WAIT_STATUS_NOT_STARTED = NDB_STD_OPTIONS_LAST, @@ -55,12 +53,13 @@ static struct my_option my_long_options[] = "Wait for cluster to enter single user mode", (gptr*) &_single_user, (gptr*) &_single_user, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, - { "timeout", 't', "Timeout to wait", + { "timeout", 't', "Timeout to wait in seconds", (gptr*) &_timeout, (gptr*) &_timeout, 0, GET_INT, REQUIRED_ARG, 120, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; + static void usage() { ndb_std_print_version(); @@ -70,16 +69,18 @@ static void usage() my_print_variables(my_long_options); } + int main(int argc, char** argv){ NDB_INIT(argv[0]); load_defaults("my",load_default_groups,&argc,&argv); const char* _hostName = NULL; - int ho_error; + #ifndef DBUG_OFF opt_debug= "d:t:O,/tmp/ndb_waiter.trace"; #endif - if ((ho_error=handle_options(&argc, &argv, my_long_options, - ndb_std_get_one_option))) + + if (handle_options(&argc, &argv, my_long_options, + ndb_std_get_one_option)) return NDBT_ProgramExit(NDBT_WRONGARGS); _hostName = argv[0]; @@ -105,7 +106,7 @@ int main(int argc, char** argv){ wait_status= NDB_MGM_NODE_STATUS_STARTED; } - if (waitClusterStatus(_hostName, wait_status, _timeout) != 0) + if (waitClusterStatus(_hostName, wait_status) != 0) return NDBT_ProgramExit(NDBT_FAILED); return NDBT_ProgramExit(NDBT_OK); } @@ -118,8 +119,6 @@ int main(int argc, char** argv){ NdbMgmHandle handle= NULL; Vector<ndb_mgm_node_state> ndbNodes; -Vector<ndb_mgm_node_state> mgmNodes; -Vector<ndb_mgm_node_state> apiNodes; int getStatus(){ @@ -128,8 +127,6 @@ getStatus(){ struct ndb_mgm_node_state * node; ndbNodes.clear(); - mgmNodes.clear(); - apiNodes.clear(); while(retries < 10){ status = ndb_mgm_get_status(handle); @@ -153,18 +150,16 @@ getStatus(){ ndbNodes.push_back(*node); break; case NDB_MGM_NODE_TYPE_MGM: - mgmNodes.push_back(*node); + /* Don't care about MGM nodes */ break; case NDB_MGM_NODE_TYPE_API: - apiNodes.push_back(*node); + /* Don't care about API nodes */ break; default: if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN || node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){ retries++; ndbNodes.clear(); - mgmNodes.clear(); - apiNodes.clear(); free(status); status = NULL; count = 0; @@ -183,24 +178,22 @@ getStatus(){ free(status); return 0; } - - g_err << "getStatus failed" << endl; + return -1; } -int +static int waitClusterStatus(const char* _addr, - ndb_mgm_node_status _status, - unsigned int _timeout) + ndb_mgm_node_status _status) { int _startphase = -1; - int _nodes[MAX_NDB_NODES]; - int _num_nodes = 0; + /* Ignore SIGPIPE */ + signal(SIGPIPE, SIG_IGN); handle = ndb_mgm_create_handle(); if (handle == NULL){ - g_err << "handle == NULL" << endl; + g_err << "Could not create ndb_mgm handle" << endl; return -1; } g_info << "Connecting to mgmsrv at " << _addr << endl; @@ -216,19 +209,11 @@ waitClusterStatus(const char* _addr, return -1; } - if (getStatus() != 0) - return -1; - - // Collect all nodes into nodes - for (size_t i = 0; i < ndbNodes.size(); i++){ - _nodes[i] = ndbNodes[i].node_id; - _num_nodes++; - } - - unsigned int attempts = 0; - unsigned int resetAttempts = 0; - const unsigned int MAX_RESET_ATTEMPTS = 10; - bool allInState = false; + int attempts = 0; + int resetAttempts = 0; + const int MAX_RESET_ATTEMPTS = 10; + bool allInState = false; + int timeout_ms= _timeout * 10; /* In number of 100 milliseconds */ while (allInState == false){ if (_timeout > 0 && attempts > _timeout){ /** @@ -236,8 +221,8 @@ waitClusterStatus(const char* _addr, * the state we want */ bool waitMore = false; - /** - * Make special check if we are waiting for + /** + * Make special check if we are waiting for * cluster to become started */ if(_status == NDB_MGM_NODE_STATUS_STARTED){ @@ -252,7 +237,7 @@ waitClusterStatus(const char* _addr, waitMore = false; } - } + } if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){ g_err << "waitNodeState(" @@ -260,7 +245,7 @@ waitClusterStatus(const char* _addr, <<", "<<_startphase<<")" << " timeout after " << attempts <<" attemps" << endl; return -1; - } + } g_err << "waitNodeState(" << ndb_mgm_get_node_status_string(_status) @@ -269,62 +254,34 @@ waitClusterStatus(const char* _addr, << resetAttempts << endl; attempts = 0; resetAttempts++; - } - allInState = true; if (getStatus() != 0){ - g_err << "getStatus != 0" << endl; return -1; } - // ndbout << "waitNodeState; _num_nodes = " << _num_nodes << endl; - // for (int i = 0; i < _num_nodes; i++) - // ndbout << " node["<<i<<"] =" <<_nodes[i] << endl; + /* Assume all nodes are in state(if there is any) */ + allInState = (ndbNodes.size() > 0); - for (int i = 0; i < _num_nodes; i++){ - ndb_mgm_node_state* ndbNode = NULL; - for (size_t n = 0; n < ndbNodes.size(); n++){ - if (ndbNodes[n].node_id == _nodes[i]) - ndbNode = &ndbNodes[n]; - } + /* Loop through all nodes and check their state */ + for (size_t n = 0; n < ndbNodes.size(); n++) { + ndb_mgm_node_state* ndbNode = &ndbNodes[n]; - if(ndbNode == NULL){ - allInState = false; - continue; - } + assert(ndbNode != NULL); - g_info << "State node " << ndbNode->node_id << " " + g_info << "Node " << ndbNode->node_id << ": " << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl; - assert(ndbNode != NULL); - - if(_status == NDB_MGM_NODE_STATUS_STARTING && - ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING && - ndbNode->start_phase >= _startphase) || - (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED))) - continue; - - if (_status == NDB_MGM_NODE_STATUS_STARTING){ - g_info << "status = " - << ndb_mgm_get_node_status_string(ndbNode->node_status) - <<", start_phase="<<ndbNode->start_phase<<endl; - if (ndbNode->node_status != _status) { - if (ndbNode->node_status < _status) - allInState = false; - else - g_info << "node_status(" << (unsigned)ndbNode->node_status - << ") != _status("<< (unsigned)_status << ")" <<endl; - } else if (ndbNode->start_phase < _startphase) - allInState = false; - } else { - if (ndbNode->node_status != _status) + if (ndbNode->node_status != _status) allInState = false; - } } - g_info << "Waiting for cluster enter state " - << ndb_mgm_get_node_status_string(_status)<< endl; - NdbSleep_SecSleep(1); + + if (!allInState) { + g_info << "Waiting for cluster enter state " + << ndb_mgm_get_node_status_string(_status)<< endl; + NdbSleep_MilliSleep(100); + } + attempts++; } return 0; |