summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <msvensson@pilot.mysql.com>2008-02-07 08:08:43 +0100
committerunknown <msvensson@pilot.mysql.com>2008-02-07 08:08:43 +0100
commit5c73339493d5503deefc289c4dab3adadc1dbfb1 (patch)
treef316e6fe505edf754ec743e3445fe31c4f733cf5
parent5aa9c1bd6f5f0106619012f75b086c08172cd33b (diff)
downloadmariadb-git-5c73339493d5503deefc289c4dab3adadc1dbfb1.tar.gz
Bug#32025 ndb_waiter does too many roundtrips to ndb_mgmd
ndb/tools/waiter.cpp: - Only contact ndb_mgmd once per loop - Program only cares about ndbd nodes -> remove the api and mgm vectors - Program can not wait for "starting" -> remove that code - Remove unused includes - Protect against SIGPIPE(writing to a socket where the mgmsrv was gone, silently killed the program) - Don't sleep one second if if all nodes are in the wanted state - Use 100 milliseconds sleep between each poll
-rw-r--r--ndb/tools/waiter.cpp127
1 files changed, 42 insertions, 85 deletions
diff --git a/ndb/tools/waiter.cpp b/ndb/tools/waiter.cpp
index e221a26182e..8226de87b49 100644
--- a/ndb/tools/waiter.cpp
+++ b/ndb/tools/waiter.cpp
@@ -21,13 +21,11 @@
#include <NdbMain.h>
#include <NdbOut.hpp>
#include <NdbSleep.h>
-#include <kernel/ndb_limits.h>
#include <NDBT.hpp>
-int
-waitClusterStatus(const char* _addr, ndb_mgm_node_status _status,
- unsigned int _timeout);
+static int
+waitClusterStatus(const char* _addr, ndb_mgm_node_status _status);
enum ndb_waiter_options {
OPT_WAIT_STATUS_NOT_STARTED = NDB_STD_OPTIONS_LAST,
@@ -55,12 +53,13 @@ static struct my_option my_long_options[] =
"Wait for cluster to enter single user mode",
(gptr*) &_single_user, (gptr*) &_single_user, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
- { "timeout", 't', "Timeout to wait",
+ { "timeout", 't', "Timeout to wait in seconds",
(gptr*) &_timeout, (gptr*) &_timeout, 0,
GET_INT, REQUIRED_ARG, 120, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
+
static void usage()
{
ndb_std_print_version();
@@ -70,16 +69,18 @@ static void usage()
my_print_variables(my_long_options);
}
+
int main(int argc, char** argv){
NDB_INIT(argv[0]);
load_defaults("my",load_default_groups,&argc,&argv);
const char* _hostName = NULL;
- int ho_error;
+
#ifndef DBUG_OFF
opt_debug= "d:t:O,/tmp/ndb_waiter.trace";
#endif
- if ((ho_error=handle_options(&argc, &argv, my_long_options,
- ndb_std_get_one_option)))
+
+ if (handle_options(&argc, &argv, my_long_options,
+ ndb_std_get_one_option))
return NDBT_ProgramExit(NDBT_WRONGARGS);
_hostName = argv[0];
@@ -105,7 +106,7 @@ int main(int argc, char** argv){
wait_status= NDB_MGM_NODE_STATUS_STARTED;
}
- if (waitClusterStatus(_hostName, wait_status, _timeout) != 0)
+ if (waitClusterStatus(_hostName, wait_status) != 0)
return NDBT_ProgramExit(NDBT_FAILED);
return NDBT_ProgramExit(NDBT_OK);
}
@@ -118,8 +119,6 @@ int main(int argc, char** argv){
NdbMgmHandle handle= NULL;
Vector<ndb_mgm_node_state> ndbNodes;
-Vector<ndb_mgm_node_state> mgmNodes;
-Vector<ndb_mgm_node_state> apiNodes;
int
getStatus(){
@@ -128,8 +127,6 @@ getStatus(){
struct ndb_mgm_node_state * node;
ndbNodes.clear();
- mgmNodes.clear();
- apiNodes.clear();
while(retries < 10){
status = ndb_mgm_get_status(handle);
@@ -153,18 +150,16 @@ getStatus(){
ndbNodes.push_back(*node);
break;
case NDB_MGM_NODE_TYPE_MGM:
- mgmNodes.push_back(*node);
+ /* Don't care about MGM nodes */
break;
case NDB_MGM_NODE_TYPE_API:
- apiNodes.push_back(*node);
+ /* Don't care about API nodes */
break;
default:
if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
retries++;
ndbNodes.clear();
- mgmNodes.clear();
- apiNodes.clear();
free(status);
status = NULL;
count = 0;
@@ -183,24 +178,22 @@ getStatus(){
free(status);
return 0;
}
-
- g_err << "getStatus failed" << endl;
+
return -1;
}
-int
+static int
waitClusterStatus(const char* _addr,
- ndb_mgm_node_status _status,
- unsigned int _timeout)
+ ndb_mgm_node_status _status)
{
int _startphase = -1;
- int _nodes[MAX_NDB_NODES];
- int _num_nodes = 0;
+ /* Ignore SIGPIPE */
+ signal(SIGPIPE, SIG_IGN);
handle = ndb_mgm_create_handle();
if (handle == NULL){
- g_err << "handle == NULL" << endl;
+ g_err << "Could not create ndb_mgm handle" << endl;
return -1;
}
g_info << "Connecting to mgmsrv at " << _addr << endl;
@@ -216,19 +209,11 @@ waitClusterStatus(const char* _addr,
return -1;
}
- if (getStatus() != 0)
- return -1;
-
- // Collect all nodes into nodes
- for (size_t i = 0; i < ndbNodes.size(); i++){
- _nodes[i] = ndbNodes[i].node_id;
- _num_nodes++;
- }
-
- unsigned int attempts = 0;
- unsigned int resetAttempts = 0;
- const unsigned int MAX_RESET_ATTEMPTS = 10;
- bool allInState = false;
+ int attempts = 0;
+ int resetAttempts = 0;
+ const int MAX_RESET_ATTEMPTS = 10;
+ bool allInState = false;
+ int timeout_ms= _timeout * 10; /* In number of 100 milliseconds */
while (allInState == false){
if (_timeout > 0 && attempts > _timeout){
/**
@@ -236,8 +221,8 @@ waitClusterStatus(const char* _addr,
* the state we want
*/
bool waitMore = false;
- /**
- * Make special check if we are waiting for
+ /**
+ * Make special check if we are waiting for
* cluster to become started
*/
if(_status == NDB_MGM_NODE_STATUS_STARTED){
@@ -252,7 +237,7 @@ waitClusterStatus(const char* _addr,
waitMore = false;
}
- }
+ }
if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
g_err << "waitNodeState("
@@ -260,7 +245,7 @@ waitClusterStatus(const char* _addr,
<<", "<<_startphase<<")"
<< " timeout after " << attempts <<" attemps" << endl;
return -1;
- }
+ }
g_err << "waitNodeState("
<< ndb_mgm_get_node_status_string(_status)
@@ -269,62 +254,34 @@ waitClusterStatus(const char* _addr,
<< resetAttempts << endl;
attempts = 0;
resetAttempts++;
-
}
- allInState = true;
if (getStatus() != 0){
- g_err << "getStatus != 0" << endl;
return -1;
}
- // ndbout << "waitNodeState; _num_nodes = " << _num_nodes << endl;
- // for (int i = 0; i < _num_nodes; i++)
- // ndbout << " node["<<i<<"] =" <<_nodes[i] << endl;
+ /* Assume all nodes are in state(if there is any) */
+ allInState = (ndbNodes.size() > 0);
- for (int i = 0; i < _num_nodes; i++){
- ndb_mgm_node_state* ndbNode = NULL;
- for (size_t n = 0; n < ndbNodes.size(); n++){
- if (ndbNodes[n].node_id == _nodes[i])
- ndbNode = &ndbNodes[n];
- }
+ /* Loop through all nodes and check their state */
+ for (size_t n = 0; n < ndbNodes.size(); n++) {
+ ndb_mgm_node_state* ndbNode = &ndbNodes[n];
- if(ndbNode == NULL){
- allInState = false;
- continue;
- }
+ assert(ndbNode != NULL);
- g_info << "State node " << ndbNode->node_id << " "
+ g_info << "Node " << ndbNode->node_id << ": "
<< ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
- assert(ndbNode != NULL);
-
- if(_status == NDB_MGM_NODE_STATUS_STARTING &&
- ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING &&
- ndbNode->start_phase >= _startphase) ||
- (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED)))
- continue;
-
- if (_status == NDB_MGM_NODE_STATUS_STARTING){
- g_info << "status = "
- << ndb_mgm_get_node_status_string(ndbNode->node_status)
- <<", start_phase="<<ndbNode->start_phase<<endl;
- if (ndbNode->node_status != _status) {
- if (ndbNode->node_status < _status)
- allInState = false;
- else
- g_info << "node_status(" << (unsigned)ndbNode->node_status
- << ") != _status("<< (unsigned)_status << ")" <<endl;
- } else if (ndbNode->start_phase < _startphase)
- allInState = false;
- } else {
- if (ndbNode->node_status != _status)
+ if (ndbNode->node_status != _status)
allInState = false;
- }
}
- g_info << "Waiting for cluster enter state "
- << ndb_mgm_get_node_status_string(_status)<< endl;
- NdbSleep_SecSleep(1);
+
+ if (!allInState) {
+ g_info << "Waiting for cluster enter state "
+ << ndb_mgm_get_node_status_string(_status)<< endl;
+ NdbSleep_MilliSleep(100);
+ }
+
attempts++;
}
return 0;