summaryrefslogtreecommitdiff
path: root/storage/ndb/test/src/NdbRestarter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'storage/ndb/test/src/NdbRestarter.cpp')
-rw-r--r--storage/ndb/test/src/NdbRestarter.cpp651
1 files changed, 651 insertions, 0 deletions
diff --git a/storage/ndb/test/src/NdbRestarter.cpp b/storage/ndb/test/src/NdbRestarter.cpp
new file mode 100644
index 00000000000..91c0963feae
--- /dev/null
+++ b/storage/ndb/test/src/NdbRestarter.cpp
@@ -0,0 +1,651 @@
+/* Copyright (C) 2003 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <NdbRestarter.hpp>
+#include <NdbOut.hpp>
+#include <NdbSleep.h>
+#include <NdbTick.h>
+#include <mgmapi_debug.h>
+#include <NDBT_Output.hpp>
+#include <random.h>
+#include <kernel/ndb_limits.h>
+#include <ndb_version.h>
+
+#define MGMERR(h) \
+ ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
+ << ", line="<<ndb_mgm_get_latest_error_line(h) \
+ << endl;
+
+
+NdbRestarter::NdbRestarter(const char* _addr):
+ connected(false),
+ handle(NULL),
+ m_config(0)
+{
+ if (_addr == NULL){
+ addr.assign("");
+ } else {
+ addr.assign(_addr);
+ }
+}
+
+NdbRestarter::~NdbRestarter(){
+ disconnect();
+}
+
+int NdbRestarter::getDbNodeId(int _i){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ for(size_t i = 0; i < ndbNodes.size(); i++){
+ if (i == (unsigned)_i){
+ return ndbNodes[i].node_id;
+ }
+ }
+ return -1;
+}
+
+
+int
+NdbRestarter::restartOneDbNode(int _nodeId,
+ bool inital,
+ bool nostart,
+ bool abort){
+ if (!isConnected())
+ return -1;
+
+ int ret = 0;
+
+ if ((ret = ndb_mgm_restart2(handle, 1, &_nodeId,
+ inital, nostart, abort)) <= 0) {
+ /**
+ * ndb_mgm_restart2 returned error, one reason could
+ * be that the node have not stopped fast enough!
+ * Check status of the node to see if it's on the
+ * way down. If that's the case ignore the error
+ */
+
+ if (getStatus() != 0)
+ return -1;
+
+ g_info << "ndb_mgm_restart2 returned with error, checking node state" << endl;
+
+ for(size_t i = 0; i < ndbNodes.size(); i++){
+ if(ndbNodes[i].node_id == _nodeId){
+ g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl;
+ /* Node found check state */
+ switch(ndbNodes[i].node_status){
+ case NDB_MGM_NODE_STATUS_RESTARTING:
+ case NDB_MGM_NODE_STATUS_SHUTTING_DOWN:
+ return 0;
+ default:
+ break;
+ }
+ }
+ }
+
+ MGMERR(handle);
+ g_err << "Could not stop node with id = "<< _nodeId << endl;
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+NdbRestarter::getMasterNodeId(){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ int min = 0;
+ int node = -1;
+ for(size_t i = 0; i < ndbNodes.size(); i++){
+ if(min == 0 || ndbNodes[i].dynamic_id < min){
+ min = ndbNodes[i].dynamic_id;
+ node = ndbNodes[i].node_id;
+ }
+ }
+
+ return node;
+}
+
+int
+NdbRestarter::getRandomNotMasterNodeId(int rand){
+ int master = getMasterNodeId();
+ if(master == -1)
+ return -1;
+
+ Uint32 counter = 0;
+ rand = rand % ndbNodes.size();
+ while(counter++ < ndbNodes.size() && ndbNodes[rand].node_id == master)
+ rand = (rand + 1) % ndbNodes.size();
+
+ if(ndbNodes[rand].node_id != master)
+ return ndbNodes[rand].node_id;
+ return -1;
+}
+
+int
+NdbRestarter::getRandomNodeOtherNodeGroup(int nodeId, int rand){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ int node_group = -1;
+ for(size_t i = 0; i < ndbNodes.size(); i++){
+ if(ndbNodes[i].node_id == nodeId){
+ node_group = ndbNodes[i].node_group;
+ break;
+ }
+ }
+ if(node_group == -1){
+ return -1;
+ }
+
+ Uint32 counter = 0;
+ rand = rand % ndbNodes.size();
+ while(counter++ < ndbNodes.size() && ndbNodes[rand].node_group == node_group)
+ rand = (rand + 1) % ndbNodes.size();
+
+ if(ndbNodes[rand].node_group != node_group)
+ return ndbNodes[rand].node_id;
+
+ return -1;
+}
+
+int
+NdbRestarter::waitClusterStarted(unsigned int _timeout){
+ return waitClusterState(NDB_MGM_NODE_STATUS_STARTED, _timeout);
+}
+
+int
+NdbRestarter::waitClusterStartPhase(int _startphase, unsigned int _timeout){
+ return waitClusterState(NDB_MGM_NODE_STATUS_STARTING, _timeout, _startphase);
+}
+
+int
+NdbRestarter::waitClusterSingleUser(unsigned int _timeout){
+ return waitClusterState(NDB_MGM_NODE_STATUS_SINGLEUSER, _timeout);
+}
+
+int
+NdbRestarter::waitClusterNoStart(unsigned int _timeout){
+ return waitClusterState(NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout);
+}
+
+int
+NdbRestarter::waitClusterState(ndb_mgm_node_status _status,
+ unsigned int _timeout,
+ int _startphase){
+
+ int nodes[MAX_NDB_NODES];
+ int numNodes = 0;
+
+ if (getStatus() != 0)
+ return -1;
+
+ // Collect all nodes into nodes
+ for (size_t i = 0; i < ndbNodes.size(); i++){
+ nodes[i] = ndbNodes[i].node_id;
+ numNodes++;
+ }
+
+ return waitNodesState(nodes, numNodes, _status, _timeout, _startphase);
+}
+
+
+int
+NdbRestarter::waitNodesState(int * _nodes, int _num_nodes,
+ ndb_mgm_node_status _status,
+ unsigned int _timeout,
+ int _startphase){
+
+ if (!isConnected()){
+ g_err << "!isConnected"<<endl;
+ return -1;
+ }
+
+ unsigned int attempts = 0;
+ unsigned int resetAttempts = 0;
+ const unsigned int MAX_RESET_ATTEMPTS = 10;
+ bool allInState = false;
+ while (allInState == false){
+ if (_timeout > 0 && attempts > _timeout){
+ /**
+ * Timeout has expired waiting for the nodes to enter
+ * the state we want
+ */
+ bool waitMore = false;
+ /**
+ * Make special check if we are waiting for
+ * cluster to become started
+ */
+ if(_status == NDB_MGM_NODE_STATUS_STARTED){
+ waitMore = true;
+ /**
+ * First check if any node is not starting
+ * then it's no idea to wait anymore
+ */
+ for (size_t n = 0; n < ndbNodes.size(); n++){
+ if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
+ ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
+ waitMore = false;
+
+ }
+ }
+
+ if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
+ g_err << "waitNodeState("
+ << ndb_mgm_get_node_status_string(_status)
+ <<", "<<_startphase<<")"
+ << " timeout after " << attempts <<" attemps" << endl;
+ return -1;
+ }
+
+ g_err << "waitNodeState("
+ << ndb_mgm_get_node_status_string(_status)
+ <<", "<<_startphase<<")"
+ << " resetting number of attempts "
+ << resetAttempts << endl;
+ attempts = 0;
+ resetAttempts++;
+
+ }
+
+ allInState = true;
+ if (getStatus() != 0){
+ g_err << "getStatus != 0" << endl;
+ return -1;
+ }
+
+ // ndbout << "waitNodeState; _num_nodes = " << _num_nodes << endl;
+ // for (int i = 0; i < _num_nodes; i++)
+ // ndbout << " node["<<i<<"] =" <<_nodes[i] << endl;
+
+ for (int i = 0; i < _num_nodes; i++){
+ ndb_mgm_node_state* ndbNode = NULL;
+ for (size_t n = 0; n < ndbNodes.size(); n++){
+ if (ndbNodes[n].node_id == _nodes[i])
+ ndbNode = &ndbNodes[n];
+ }
+
+ if(ndbNode == NULL){
+ allInState = false;
+ continue;
+ }
+
+ g_info << "State node " << ndbNode->node_id << " "
+ << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
+
+ assert(ndbNode != NULL);
+
+ if(_status == NDB_MGM_NODE_STATUS_STARTING &&
+ ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING &&
+ ndbNode->start_phase >= _startphase) ||
+ (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED)))
+ continue;
+
+ if (_status == NDB_MGM_NODE_STATUS_STARTING){
+ g_info << "status = "
+ << ndb_mgm_get_node_status_string(ndbNode->node_status)
+ <<", start_phase="<<ndbNode->start_phase<<endl;
+ if (ndbNode->node_status != _status) {
+ if (ndbNode->node_status < _status)
+ allInState = false;
+ else
+ g_info << "node_status(" << ndbNode->node_status
+ <<") != _status("<<_status<<")"<<endl;
+ } else if (ndbNode->start_phase < _startphase)
+ allInState = false;
+ } else {
+ if (ndbNode->node_status != _status)
+ allInState = false;
+ }
+ }
+ g_info << "Waiting for cluster enter state"
+ << ndb_mgm_get_node_status_string(_status)<< endl;
+ NdbSleep_SecSleep(1);
+ attempts++;
+ }
+ return 0;
+}
+
+int NdbRestarter::waitNodesStarted(int * _nodes, int _num_nodes,
+ unsigned int _timeout){
+ return waitNodesState(_nodes, _num_nodes,
+ NDB_MGM_NODE_STATUS_STARTED, _timeout);
+}
+
+int NdbRestarter::waitNodesStartPhase(int * _nodes, int _num_nodes,
+ int _startphase, unsigned int _timeout){
+ return waitNodesState(_nodes, _num_nodes,
+ NDB_MGM_NODE_STATUS_STARTING, _timeout,
+ _startphase);
+}
+
+int NdbRestarter::waitNodesNoStart(int * _nodes, int _num_nodes,
+ unsigned int _timeout){
+ return waitNodesState(_nodes, _num_nodes,
+ NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout);
+}
+
+bool
+NdbRestarter::isConnected(){
+ if (connected == true)
+ return true;
+ return connect() == 0;
+}
+
+int
+NdbRestarter::connect(){
+ disconnect();
+ handle = ndb_mgm_create_handle();
+ if (handle == NULL){
+ g_err << "handle == NULL" << endl;
+ return -1;
+ }
+ g_info << "Connecting to mgmsrv at " << addr.c_str() << endl;
+ if (ndb_mgm_set_connectstring(handle,addr.c_str()))
+ {
+ MGMERR(handle);
+ g_err << "Connection to " << addr.c_str() << " failed" << endl;
+ return -1;
+ }
+
+ if (ndb_mgm_connect(handle, 0, 0, 0) == -1)
+ {
+ MGMERR(handle);
+ g_err << "Connection to " << addr.c_str() << " failed" << endl;
+ return -1;
+ }
+
+ connected = true;
+ return 0;
+}
+
+void
+NdbRestarter::disconnect(){
+ if (handle != NULL){
+ ndb_mgm_disconnect(handle);
+ ndb_mgm_destroy_handle(&handle);
+ }
+ connected = false;
+}
+
+int
+NdbRestarter::getStatus(){
+ int retries = 0;
+ struct ndb_mgm_cluster_state * status;
+ struct ndb_mgm_node_state * node;
+
+ ndbNodes.clear();
+ mgmNodes.clear();
+ apiNodes.clear();
+
+ if (!isConnected())
+ return -1;
+
+ while(retries < 10){
+ status = ndb_mgm_get_status(handle);
+ if (status == NULL){
+ ndbout << "status==NULL, retries="<<retries<<endl;
+ MGMERR(handle);
+ retries++;
+ continue;
+ }
+ for (int i = 0; i < status->no_of_nodes; i++){
+ node = &status->node_states[i];
+ switch(node->node_type){
+ case NDB_MGM_NODE_TYPE_NDB:
+ ndbNodes.push_back(*node);
+ break;
+ case NDB_MGM_NODE_TYPE_MGM:
+ mgmNodes.push_back(*node);
+ break;
+ case NDB_MGM_NODE_TYPE_API:
+ apiNodes.push_back(*node);
+ break;
+ default:
+ if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
+ node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
+ retries++;
+ ndbNodes.clear();
+ mgmNodes.clear();
+ apiNodes.clear();
+ free(status);
+ status = NULL;
+ i = status->no_of_nodes;
+
+ ndbout << "kalle"<< endl;
+ break;
+ }
+ abort();
+ break;
+ }
+ }
+ if(status == 0){
+ ndbout << "status == 0" << endl;
+ continue;
+ }
+ free(status);
+ return 0;
+ }
+
+ g_err << "getStatus failed" << endl;
+ return -1;
+}
+
+
+int NdbRestarter::getNumDbNodes(){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ return ndbNodes.size();
+}
+
+int NdbRestarter::restartAll(bool initial,
+ bool nostart,
+ bool abort){
+
+ if (!isConnected())
+ return -1;
+
+ if (ndb_mgm_restart2(handle, 0, NULL, initial, 1, abort) == -1) {
+ MGMERR(handle);
+ g_err << "Could not restart(stop) all nodes " << endl;
+ // return -1; Continue anyway - Magnus
+ }
+
+ if (waitClusterNoStart(60) != 0){
+ g_err << "Cluster didnt enter STATUS_NOT_STARTED within 60s" << endl;
+ return -1;
+ }
+
+ if(nostart){
+ g_debug << "restartAll: nostart == true" << endl;
+ return 0;
+ }
+
+ if (ndb_mgm_start(handle, 0, NULL) == -1) {
+ MGMERR(handle);
+ g_err << "Could not restart(start) all nodes " << endl;
+ return -1;
+ }
+
+ return 0;
+}
+
+int NdbRestarter::startAll(){
+ if (!isConnected())
+ return -1;
+
+ if (ndb_mgm_start(handle, 0, NULL) == -1) {
+ MGMERR(handle);
+ g_err << "Could not start all nodes " << endl;
+ return -1;
+ }
+
+ return 0;
+
+}
+
+int NdbRestarter::startNodes(int * nodes, int num_nodes){
+ if (!isConnected())
+ return -1;
+
+ if (ndb_mgm_start(handle, num_nodes, nodes) != num_nodes) {
+ MGMERR(handle);
+ g_err << "Could not start all nodes " << endl;
+ return -1;
+ }
+
+ return 0;
+}
+
+int NdbRestarter::insertErrorInNode(int _nodeId, int _error){
+ if (!isConnected())
+ return -1;
+
+ ndb_mgm_reply reply;
+ reply.return_code = 0;
+
+ if (ndb_mgm_insert_error(handle, _nodeId, _error, &reply) == -1){
+ MGMERR(handle);
+ g_err << "Could not insert error in node with id = "<< _nodeId << endl;
+ }
+ if(reply.return_code != 0){
+ g_err << "Error: " << reply.message << endl;
+ }
+ return 0;
+}
+
+int NdbRestarter::insertErrorInAllNodes(int _error){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ int result = 0;
+
+ for(size_t i = 0; i < ndbNodes.size(); i++){
+ g_debug << "inserting error in node " << ndbNodes[i].node_id << endl;
+ if (insertErrorInNode(ndbNodes[i].node_id, _error) == -1)
+ result = -1;
+ }
+ return result;
+
+}
+
+
+
+int NdbRestarter::dumpStateOneNode(int _nodeId, int * _args, int _num_args){
+ if (!isConnected())
+ return -1;
+
+ ndb_mgm_reply reply;
+ reply.return_code = 0;
+
+ if (ndb_mgm_dump_state(handle, _nodeId, _args, _num_args, &reply) == -1){
+ MGMERR(handle);
+ g_err << "Could not dump state in node with id = "<< _nodeId << endl;
+ }
+
+ if(reply.return_code != 0){
+ g_err << "Error: " << reply.message << endl;
+ }
+ return reply.return_code;
+}
+
+int NdbRestarter::dumpStateAllNodes(int * _args, int _num_args){
+ if (!isConnected())
+ return -1;
+
+ if (getStatus() != 0)
+ return -1;
+
+ int result = 0;
+
+ for(size_t i = 0; i < ndbNodes.size(); i++){
+ g_debug << "dumping state in node " << ndbNodes[i].node_id << endl;
+ if (dumpStateOneNode(ndbNodes[i].node_id, _args, _num_args) == -1)
+ result = -1;
+ }
+ return result;
+
+}
+
+
+int NdbRestarter::enterSingleUserMode(int _nodeId){
+ if (!isConnected())
+ return -1;
+
+ ndb_mgm_reply reply;
+ reply.return_code = 0;
+
+ if (ndb_mgm_enter_single_user(handle, _nodeId, &reply) == -1){
+ MGMERR(handle);
+ g_err << "Could not enter single user mode api node = "<< _nodeId << endl;
+ }
+
+ if(reply.return_code != 0){
+ g_err << "Error: " << reply.message << endl;
+ }
+
+ return reply.return_code;
+}
+
+
+int NdbRestarter::exitSingleUserMode(){
+ if (!isConnected())
+ return -1;
+
+ ndb_mgm_reply reply;
+ reply.return_code = 0;
+
+ if (ndb_mgm_exit_single_user(handle, &reply) == -1){
+ MGMERR(handle);
+ g_err << "Could not exit single user mode " << endl;
+ }
+
+ if(reply.return_code != 0){
+ g_err << "Error: " << reply.message << endl;
+ }
+ return reply.return_code;
+}
+
+ndb_mgm_configuration*
+NdbRestarter::getConfig(){
+ if(m_config) return m_config;
+
+ if (!isConnected())
+ return 0;
+ m_config = ndb_mgm_get_configuration(handle, 0);
+ return m_config;
+}
+
+template class Vector<ndb_mgm_node_state>;