diff options
Diffstat (limited to 'ndb/tools/waiter.cpp')
-rw-r--r-- | ndb/tools/waiter.cpp | 253 |
1 files changed, 248 insertions, 5 deletions
diff --git a/ndb/tools/waiter.cpp b/ndb/tools/waiter.cpp index d57daff3aea..7ce2739a157 100644 --- a/ndb/tools/waiter.cpp +++ b/ndb/tools/waiter.cpp @@ -15,17 +15,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "mgmapi.h" +#include <mgmapi.h> #include <string.h> #include <NdbMain.h> #include <NdbOut.hpp> #include <NdbSleep.h> #include <getarg.h> +#include <kernel/ndb_limits.h> +#include "../src/common/mgmcommon/LocalConfig.hpp" - -#include <NdbRestarter.hpp> #include <NDBT.hpp> +int +waitClusterStarted(const char* _addr, unsigned int _timeout= 120); + int main(int argc, const char** argv){ const char* _hostName = NULL; @@ -45,12 +48,252 @@ int main(int argc, const char** argv){ arg_printusage(args, num_args, argv[0], desc); return NDBT_ProgramExit(NDBT_WRONGARGS); } + + char buf[255]; _hostName = argv[optind]; - NdbRestarter restarter(_hostName); + if (_hostName == NULL){ + LocalConfig lcfg; + if(!lcfg.init()) + { + lcfg.printError(); + lcfg.printUsage(); + g_err << "Error parsing local config file" << endl; + return NDBT_ProgramExit(NDBT_FAILED); + } + + for (int i = 0; i<lcfg.items; i++) + { + MgmtSrvrId * m = lcfg.ids[i]; + + switch(m->type){ + case MgmId_TCP: + snprintf(buf, 255, "%s:%d", m->data.tcp.remoteHost, m->data.tcp.port); + _hostName = buf; + break; + case MgmId_File: + break; + default: + break; + } + if (_hostName != NULL) + break; + } + if (_hostName == NULL) + { + g_err << "No management servers configured in local config file" << endl; + return NDBT_ProgramExit(NDBT_FAILED); + } + } - if (restarter.waitClusterStarted() != 0) + if (waitClusterStarted(_hostName) != 0) return NDBT_ProgramExit(NDBT_FAILED); return NDBT_ProgramExit(NDBT_OK); } + +#define MGMERR(h) \ + ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \ + << ", line="<<ndb_mgm_get_latest_error_line(h) \ + << endl; + +NdbMgmHandle handle= NULL; + +Vector<ndb_mgm_node_state> ndbNodes; +Vector<ndb_mgm_node_state> mgmNodes; +Vector<ndb_mgm_node_state> apiNodes; + +int +getStatus(){ + int retries = 0; + struct ndb_mgm_cluster_state * status; + struct ndb_mgm_node_state * node; + + ndbNodes.clear(); + mgmNodes.clear(); + apiNodes.clear(); + + while(retries < 10){ + status = ndb_mgm_get_status(handle); + if (status == NULL){ + ndbout << "status==NULL, retries="<<retries<<endl; + MGMERR(handle); + retries++; + continue; + } + for (int i = 0; i < status->no_of_nodes; i++){ + node = &status->node_states[i]; + switch(node->node_type){ + case NDB_MGM_NODE_TYPE_NDB: + ndbNodes.push_back(*node); + break; + case NDB_MGM_NODE_TYPE_MGM: + mgmNodes.push_back(*node); + break; + case NDB_MGM_NODE_TYPE_API: + apiNodes.push_back(*node); + break; + default: + if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN || + node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){ + retries++; + ndbNodes.clear(); + mgmNodes.clear(); + apiNodes.clear(); + free(status); + status = NULL; + i = status->no_of_nodes; + + ndbout << "kalle"<< endl; + break; + } + abort(); + break; + } + } + if(status == 0){ + ndbout << "status == 0" << endl; + continue; + } + free(status); + return 0; + } + + g_err << "getStatus failed" << endl; + return -1; +} + +int +waitClusterStarted(const char* _addr, unsigned int _timeout) +{ + ndb_mgm_node_status _status = NDB_MGM_NODE_STATUS_STARTED; + int _startphase = -1; + + int _nodes[MAX_NDB_NODES]; + int _num_nodes = 0; + + handle = ndb_mgm_create_handle(); + if (handle == NULL){ + g_err << "handle == NULL" << endl; + return -1; + } + g_info << "Connecting to mgmsrv at " << _addr << endl; + if (ndb_mgm_connect(handle, _addr) == -1) { + MGMERR(handle); + g_err << "Connection to " << _addr << " failed" << endl; + return -1; + } + + if (getStatus() != 0) + return -1; + + // Collect all nodes into nodes + for (size_t i = 0; i < ndbNodes.size(); i++){ + _nodes[i] = ndbNodes[i].node_id; + _num_nodes++; + } + + unsigned int attempts = 0; + unsigned int resetAttempts = 0; + const unsigned int MAX_RESET_ATTEMPTS = 10; + bool allInState = false; + while (allInState == false){ + if (_timeout > 0 && attempts > _timeout){ + /** + * Timeout has expired waiting for the nodes to enter + * the state we want + */ + bool waitMore = false; + /** + * Make special check if we are waiting for + * cluster to become started + */ + if(_status == NDB_MGM_NODE_STATUS_STARTED){ + waitMore = true; + /** + * First check if any node is not starting + * then it's no idea to wait anymore + */ + for (size_t n = 0; n < ndbNodes.size(); n++){ + if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED && + ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING) + waitMore = false; + + } + } + + if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){ + g_err << "waitNodeState(" + << ndb_mgm_get_node_status_string(_status) + <<", "<<_startphase<<")" + << " timeout after " << attempts <<" attemps" << endl; + return -1; + } + + g_err << "waitNodeState(" + << ndb_mgm_get_node_status_string(_status) + <<", "<<_startphase<<")" + << " resetting number of attempts " + << resetAttempts << endl; + attempts = 0; + resetAttempts++; + + } + + allInState = true; + if (getStatus() != 0){ + g_err << "getStatus != 0" << endl; + return -1; + } + + // ndbout << "waitNodeState; _num_nodes = " << _num_nodes << endl; + // for (int i = 0; i < _num_nodes; i++) + // ndbout << " node["<<i<<"] =" <<_nodes[i] << endl; + + for (int i = 0; i < _num_nodes; i++){ + ndb_mgm_node_state* ndbNode = NULL; + for (size_t n = 0; n < ndbNodes.size(); n++){ + if (ndbNodes[n].node_id == _nodes[i]) + ndbNode = &ndbNodes[n]; + } + + if(ndbNode == NULL){ + allInState = false; + continue; + } + + g_info << "State node " << ndbNode->node_id << " " + << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl; + + assert(ndbNode != NULL); + + if(_status == NDB_MGM_NODE_STATUS_STARTING && + ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING && + ndbNode->start_phase >= _startphase) || + (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED))) + continue; + + if (_status == NDB_MGM_NODE_STATUS_STARTING){ + g_info << "status = " + << ndb_mgm_get_node_status_string(ndbNode->node_status) + <<", start_phase="<<ndbNode->start_phase<<endl; + if (ndbNode->node_status != _status) { + if (ndbNode->node_status < _status) + allInState = false; + else + g_info << "node_status(" << ndbNode->node_status + <<") != _status("<<_status<<")"<<endl; + } else if (ndbNode->start_phase < _startphase) + allInState = false; + } else { + if (ndbNode->node_status != _status) + allInState = false; + } + } + g_info << "Waiting for cluster enter state" + << ndb_mgm_get_node_status_string(_status)<< endl; + NdbSleep_SecSleep(1); + attempts++; + } + return 0; +} |