summaryrefslogtreecommitdiff
path: root/ndb/tools/waiter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ndb/tools/waiter.cpp')
-rw-r--r--ndb/tools/waiter.cpp253
1 files changed, 248 insertions, 5 deletions
diff --git a/ndb/tools/waiter.cpp b/ndb/tools/waiter.cpp
index d57daff3aea..7ce2739a157 100644
--- a/ndb/tools/waiter.cpp
+++ b/ndb/tools/waiter.cpp
@@ -15,17 +15,20 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-#include "mgmapi.h"
+#include <mgmapi.h>
#include <string.h>
#include <NdbMain.h>
#include <NdbOut.hpp>
#include <NdbSleep.h>
#include <getarg.h>
+#include <kernel/ndb_limits.h>
+#include "../src/common/mgmcommon/LocalConfig.hpp"
-
-#include <NdbRestarter.hpp>
#include <NDBT.hpp>
+int
+waitClusterStarted(const char* _addr, unsigned int _timeout= 120);
+
int main(int argc, const char** argv){
const char* _hostName = NULL;
@@ -45,12 +48,252 @@ int main(int argc, const char** argv){
arg_printusage(args, num_args, argv[0], desc);
return NDBT_ProgramExit(NDBT_WRONGARGS);
}
+
+ char buf[255];
_hostName = argv[optind];
- NdbRestarter restarter(_hostName);
+ if (_hostName == NULL){
+ LocalConfig lcfg;
+ if(!lcfg.init())
+ {
+ lcfg.printError();
+ lcfg.printUsage();
+ g_err << "Error parsing local config file" << endl;
+ return NDBT_ProgramExit(NDBT_FAILED);
+ }
+
+ for (int i = 0; i<lcfg.items; i++)
+ {
+ MgmtSrvrId * m = lcfg.ids[i];
+
+ switch(m->type){
+ case MgmId_TCP:
+ snprintf(buf, 255, "%s:%d", m->data.tcp.remoteHost, m->data.tcp.port);
+ _hostName = buf;
+ break;
+ case MgmId_File:
+ break;
+ default:
+ break;
+ }
+ if (_hostName != NULL)
+ break;
+ }
+ if (_hostName == NULL)
+ {
+ g_err << "No management servers configured in local config file" << endl;
+ return NDBT_ProgramExit(NDBT_FAILED);
+ }
+ }
- if (restarter.waitClusterStarted() != 0)
+ if (waitClusterStarted(_hostName) != 0)
return NDBT_ProgramExit(NDBT_FAILED);
return NDBT_ProgramExit(NDBT_OK);
}
+
+#define MGMERR(h) \
+ ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
+ << ", line="<<ndb_mgm_get_latest_error_line(h) \
+ << endl;
+
+NdbMgmHandle handle= NULL;
+
+Vector<ndb_mgm_node_state> ndbNodes;
+Vector<ndb_mgm_node_state> mgmNodes;
+Vector<ndb_mgm_node_state> apiNodes;
+
+int
+getStatus(){
+ int retries = 0;
+ struct ndb_mgm_cluster_state * status;
+ struct ndb_mgm_node_state * node;
+
+ ndbNodes.clear();
+ mgmNodes.clear();
+ apiNodes.clear();
+
+ while(retries < 10){
+ status = ndb_mgm_get_status(handle);
+ if (status == NULL){
+ ndbout << "status==NULL, retries="<<retries<<endl;
+ MGMERR(handle);
+ retries++;
+ continue;
+ }
+ for (int i = 0; i < status->no_of_nodes; i++){
+ node = &status->node_states[i];
+ switch(node->node_type){
+ case NDB_MGM_NODE_TYPE_NDB:
+ ndbNodes.push_back(*node);
+ break;
+ case NDB_MGM_NODE_TYPE_MGM:
+ mgmNodes.push_back(*node);
+ break;
+ case NDB_MGM_NODE_TYPE_API:
+ apiNodes.push_back(*node);
+ break;
+ default:
+ if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
+ node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
+ retries++;
+ ndbNodes.clear();
+ mgmNodes.clear();
+ apiNodes.clear();
+ free(status);
+ status = NULL;
+ i = status->no_of_nodes;
+
+ ndbout << "kalle"<< endl;
+ break;
+ }
+ abort();
+ break;
+ }
+ }
+ if(status == 0){
+ ndbout << "status == 0" << endl;
+ continue;
+ }
+ free(status);
+ return 0;
+ }
+
+ g_err << "getStatus failed" << endl;
+ return -1;
+}
+
+int
+waitClusterStarted(const char* _addr, unsigned int _timeout)
+{
+ ndb_mgm_node_status _status = NDB_MGM_NODE_STATUS_STARTED;
+ int _startphase = -1;
+
+ int _nodes[MAX_NDB_NODES];
+ int _num_nodes = 0;
+
+ handle = ndb_mgm_create_handle();
+ if (handle == NULL){
+ g_err << "handle == NULL" << endl;
+ return -1;
+ }
+ g_info << "Connecting to mgmsrv at " << _addr << endl;
+ if (ndb_mgm_connect(handle, _addr) == -1) {
+ MGMERR(handle);
+ g_err << "Connection to " << _addr << " failed" << endl;
+ return -1;
+ }
+
+ if (getStatus() != 0)
+ return -1;
+
+ // Collect all nodes into nodes
+ for (size_t i = 0; i < ndbNodes.size(); i++){
+ _nodes[i] = ndbNodes[i].node_id;
+ _num_nodes++;
+ }
+
+ unsigned int attempts = 0;
+ unsigned int resetAttempts = 0;
+ const unsigned int MAX_RESET_ATTEMPTS = 10;
+ bool allInState = false;
+ while (allInState == false){
+ if (_timeout > 0 && attempts > _timeout){
+ /**
+ * Timeout has expired waiting for the nodes to enter
+ * the state we want
+ */
+ bool waitMore = false;
+ /**
+ * Make special check if we are waiting for
+ * cluster to become started
+ */
+ if(_status == NDB_MGM_NODE_STATUS_STARTED){
+ waitMore = true;
+ /**
+ * First check if any node is not starting
+ * then it's no idea to wait anymore
+ */
+ for (size_t n = 0; n < ndbNodes.size(); n++){
+ if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
+ ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
+ waitMore = false;
+
+ }
+ }
+
+ if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
+ g_err << "waitNodeState("
+ << ndb_mgm_get_node_status_string(_status)
+ <<", "<<_startphase<<")"
+ << " timeout after " << attempts <<" attemps" << endl;
+ return -1;
+ }
+
+ g_err << "waitNodeState("
+ << ndb_mgm_get_node_status_string(_status)
+ <<", "<<_startphase<<")"
+ << " resetting number of attempts "
+ << resetAttempts << endl;
+ attempts = 0;
+ resetAttempts++;
+
+ }
+
+ allInState = true;
+ if (getStatus() != 0){
+ g_err << "getStatus != 0" << endl;
+ return -1;
+ }
+
+ // ndbout << "waitNodeState; _num_nodes = " << _num_nodes << endl;
+ // for (int i = 0; i < _num_nodes; i++)
+ // ndbout << " node["<<i<<"] =" <<_nodes[i] << endl;
+
+ for (int i = 0; i < _num_nodes; i++){
+ ndb_mgm_node_state* ndbNode = NULL;
+ for (size_t n = 0; n < ndbNodes.size(); n++){
+ if (ndbNodes[n].node_id == _nodes[i])
+ ndbNode = &ndbNodes[n];
+ }
+
+ if(ndbNode == NULL){
+ allInState = false;
+ continue;
+ }
+
+ g_info << "State node " << ndbNode->node_id << " "
+ << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
+
+ assert(ndbNode != NULL);
+
+ if(_status == NDB_MGM_NODE_STATUS_STARTING &&
+ ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING &&
+ ndbNode->start_phase >= _startphase) ||
+ (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED)))
+ continue;
+
+ if (_status == NDB_MGM_NODE_STATUS_STARTING){
+ g_info << "status = "
+ << ndb_mgm_get_node_status_string(ndbNode->node_status)
+ <<", start_phase="<<ndbNode->start_phase<<endl;
+ if (ndbNode->node_status != _status) {
+ if (ndbNode->node_status < _status)
+ allInState = false;
+ else
+ g_info << "node_status(" << ndbNode->node_status
+ <<") != _status("<<_status<<")"<<endl;
+ } else if (ndbNode->start_phase < _startphase)
+ allInState = false;
+ } else {
+ if (ndbNode->node_status != _status)
+ allInState = false;
+ }
+ }
+ g_info << "Waiting for cluster enter state"
+ << ndb_mgm_get_node_status_string(_status)<< endl;
+ NdbSleep_SecSleep(1);
+ attempts++;
+ }
+ return 0;
+}