diff options
Diffstat (limited to 'storage/ndb/test/src/NdbRestarter.cpp')
-rw-r--r-- | storage/ndb/test/src/NdbRestarter.cpp | 651 |
1 files changed, 651 insertions, 0 deletions
diff --git a/storage/ndb/test/src/NdbRestarter.cpp b/storage/ndb/test/src/NdbRestarter.cpp new file mode 100644 index 00000000000..91c0963feae --- /dev/null +++ b/storage/ndb/test/src/NdbRestarter.cpp @@ -0,0 +1,651 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include <NdbRestarter.hpp> +#include <NdbOut.hpp> +#include <NdbSleep.h> +#include <NdbTick.h> +#include <mgmapi_debug.h> +#include <NDBT_Output.hpp> +#include <random.h> +#include <kernel/ndb_limits.h> +#include <ndb_version.h> + +#define MGMERR(h) \ + ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \ + << ", line="<<ndb_mgm_get_latest_error_line(h) \ + << endl; + + +NdbRestarter::NdbRestarter(const char* _addr): + connected(false), + handle(NULL), + m_config(0) +{ + if (_addr == NULL){ + addr.assign(""); + } else { + addr.assign(_addr); + } +} + +NdbRestarter::~NdbRestarter(){ + disconnect(); +} + +int NdbRestarter::getDbNodeId(int _i){ + if (!isConnected()) + return -1; + + if (getStatus() != 0) + return -1; + + for(size_t i = 0; i < ndbNodes.size(); i++){ + if (i == (unsigned)_i){ + return ndbNodes[i].node_id; + } + } + return -1; +} + + +int +NdbRestarter::restartOneDbNode(int _nodeId, + bool inital, + bool nostart, + bool abort){ + if (!isConnected()) + return -1; + + int ret = 0; + + if ((ret = ndb_mgm_restart2(handle, 1, &_nodeId, + inital, nostart, abort)) <= 0) { + /** + * ndb_mgm_restart2 returned error, one reason could + * be that the node have not stopped fast enough! + * Check status of the node to see if it's on the + * way down. If that's the case ignore the error + */ + + if (getStatus() != 0) + return -1; + + g_info << "ndb_mgm_restart2 returned with error, checking node state" << endl; + + for(size_t i = 0; i < ndbNodes.size(); i++){ + if(ndbNodes[i].node_id == _nodeId){ + g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl; + /* Node found check state */ + switch(ndbNodes[i].node_status){ + case NDB_MGM_NODE_STATUS_RESTARTING: + case NDB_MGM_NODE_STATUS_SHUTTING_DOWN: + return 0; + default: + break; + } + } + } + + MGMERR(handle); + g_err << "Could not stop node with id = "<< _nodeId << endl; + return -1; + } + + return 0; +} + +int +NdbRestarter::getMasterNodeId(){ + if (!isConnected()) + return -1; + + if (getStatus() != 0) + return -1; + + int min = 0; + int node = -1; + for(size_t i = 0; i < ndbNodes.size(); i++){ + if(min == 0 || ndbNodes[i].dynamic_id < min){ + min = ndbNodes[i].dynamic_id; + node = ndbNodes[i].node_id; + } + } + + return node; +} + +int +NdbRestarter::getRandomNotMasterNodeId(int rand){ + int master = getMasterNodeId(); + if(master == -1) + return -1; + + Uint32 counter = 0; + rand = rand % ndbNodes.size(); + while(counter++ < ndbNodes.size() && ndbNodes[rand].node_id == master) + rand = (rand + 1) % ndbNodes.size(); + + if(ndbNodes[rand].node_id != master) + return ndbNodes[rand].node_id; + return -1; +} + +int +NdbRestarter::getRandomNodeOtherNodeGroup(int nodeId, int rand){ + if (!isConnected()) + return -1; + + if (getStatus() != 0) + return -1; + + int node_group = -1; + for(size_t i = 0; i < ndbNodes.size(); i++){ + if(ndbNodes[i].node_id == nodeId){ + node_group = ndbNodes[i].node_group; + break; + } + } + if(node_group == -1){ + return -1; + } + + Uint32 counter = 0; + rand = rand % ndbNodes.size(); + while(counter++ < ndbNodes.size() && ndbNodes[rand].node_group == node_group) + rand = (rand + 1) % ndbNodes.size(); + + if(ndbNodes[rand].node_group != node_group) + return ndbNodes[rand].node_id; + + return -1; +} + +int +NdbRestarter::waitClusterStarted(unsigned int _timeout){ + return waitClusterState(NDB_MGM_NODE_STATUS_STARTED, _timeout); +} + +int +NdbRestarter::waitClusterStartPhase(int _startphase, unsigned int _timeout){ + return waitClusterState(NDB_MGM_NODE_STATUS_STARTING, _timeout, _startphase); +} + +int +NdbRestarter::waitClusterSingleUser(unsigned int _timeout){ + return waitClusterState(NDB_MGM_NODE_STATUS_SINGLEUSER, _timeout); +} + +int +NdbRestarter::waitClusterNoStart(unsigned int _timeout){ + return waitClusterState(NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout); +} + +int +NdbRestarter::waitClusterState(ndb_mgm_node_status _status, + unsigned int _timeout, + int _startphase){ + + int nodes[MAX_NDB_NODES]; + int numNodes = 0; + + if (getStatus() != 0) + return -1; + + // Collect all nodes into nodes + for (size_t i = 0; i < ndbNodes.size(); i++){ + nodes[i] = ndbNodes[i].node_id; + numNodes++; + } + + return waitNodesState(nodes, numNodes, _status, _timeout, _startphase); +} + + +int +NdbRestarter::waitNodesState(int * _nodes, int _num_nodes, + ndb_mgm_node_status _status, + unsigned int _timeout, + int _startphase){ + + if (!isConnected()){ + g_err << "!isConnected"<<endl; + return -1; + } + + unsigned int attempts = 0; + unsigned int resetAttempts = 0; + const unsigned int MAX_RESET_ATTEMPTS = 10; + bool allInState = false; + while (allInState == false){ + if (_timeout > 0 && attempts > _timeout){ + /** + * Timeout has expired waiting for the nodes to enter + * the state we want + */ + bool waitMore = false; + /** + * Make special check if we are waiting for + * cluster to become started + */ + if(_status == NDB_MGM_NODE_STATUS_STARTED){ + waitMore = true; + /** + * First check if any node is not starting + * then it's no idea to wait anymore + */ + for (size_t n = 0; n < ndbNodes.size(); n++){ + if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED && + ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING) + waitMore = false; + + } + } + + if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){ + g_err << "waitNodeState(" + << ndb_mgm_get_node_status_string(_status) + <<", "<<_startphase<<")" + << " timeout after " << attempts <<" attemps" << endl; + return -1; + } + + g_err << "waitNodeState(" + << ndb_mgm_get_node_status_string(_status) + <<", "<<_startphase<<")" + << " resetting number of attempts " + << resetAttempts << endl; + attempts = 0; + resetAttempts++; + + } + + allInState = true; + if (getStatus() != 0){ + g_err << "getStatus != 0" << endl; + return -1; + } + + // ndbout << "waitNodeState; _num_nodes = " << _num_nodes << endl; + // for (int i = 0; i < _num_nodes; i++) + // ndbout << " node["<<i<<"] =" <<_nodes[i] << endl; + + for (int i = 0; i < _num_nodes; i++){ + ndb_mgm_node_state* ndbNode = NULL; + for (size_t n = 0; n < ndbNodes.size(); n++){ + if (ndbNodes[n].node_id == _nodes[i]) + ndbNode = &ndbNodes[n]; + } + + if(ndbNode == NULL){ + allInState = false; + continue; + } + + g_info << "State node " << ndbNode->node_id << " " + << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl; + + assert(ndbNode != NULL); + + if(_status == NDB_MGM_NODE_STATUS_STARTING && + ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING && + ndbNode->start_phase >= _startphase) || + (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED))) + continue; + + if (_status == NDB_MGM_NODE_STATUS_STARTING){ + g_info << "status = " + << ndb_mgm_get_node_status_string(ndbNode->node_status) + <<", start_phase="<<ndbNode->start_phase<<endl; + if (ndbNode->node_status != _status) { + if (ndbNode->node_status < _status) + allInState = false; + else + g_info << "node_status(" << ndbNode->node_status + <<") != _status("<<_status<<")"<<endl; + } else if (ndbNode->start_phase < _startphase) + allInState = false; + } else { + if (ndbNode->node_status != _status) + allInState = false; + } + } + g_info << "Waiting for cluster enter state" + << ndb_mgm_get_node_status_string(_status)<< endl; + NdbSleep_SecSleep(1); + attempts++; + } + return 0; +} + +int NdbRestarter::waitNodesStarted(int * _nodes, int _num_nodes, + unsigned int _timeout){ + return waitNodesState(_nodes, _num_nodes, + NDB_MGM_NODE_STATUS_STARTED, _timeout); +} + +int NdbRestarter::waitNodesStartPhase(int * _nodes, int _num_nodes, + int _startphase, unsigned int _timeout){ + return waitNodesState(_nodes, _num_nodes, + NDB_MGM_NODE_STATUS_STARTING, _timeout, + _startphase); +} + +int NdbRestarter::waitNodesNoStart(int * _nodes, int _num_nodes, + unsigned int _timeout){ + return waitNodesState(_nodes, _num_nodes, + NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout); +} + +bool +NdbRestarter::isConnected(){ + if (connected == true) + return true; + return connect() == 0; +} + +int +NdbRestarter::connect(){ + disconnect(); + handle = ndb_mgm_create_handle(); + if (handle == NULL){ + g_err << "handle == NULL" << endl; + return -1; + } + g_info << "Connecting to mgmsrv at " << addr.c_str() << endl; + if (ndb_mgm_set_connectstring(handle,addr.c_str())) + { + MGMERR(handle); + g_err << "Connection to " << addr.c_str() << " failed" << endl; + return -1; + } + + if (ndb_mgm_connect(handle, 0, 0, 0) == -1) + { + MGMERR(handle); + g_err << "Connection to " << addr.c_str() << " failed" << endl; + return -1; + } + + connected = true; + return 0; +} + +void +NdbRestarter::disconnect(){ + if (handle != NULL){ + ndb_mgm_disconnect(handle); + ndb_mgm_destroy_handle(&handle); + } + connected = false; +} + +int +NdbRestarter::getStatus(){ + int retries = 0; + struct ndb_mgm_cluster_state * status; + struct ndb_mgm_node_state * node; + + ndbNodes.clear(); + mgmNodes.clear(); + apiNodes.clear(); + + if (!isConnected()) + return -1; + + while(retries < 10){ + status = ndb_mgm_get_status(handle); + if (status == NULL){ + ndbout << "status==NULL, retries="<<retries<<endl; + MGMERR(handle); + retries++; + continue; + } + for (int i = 0; i < status->no_of_nodes; i++){ + node = &status->node_states[i]; + switch(node->node_type){ + case NDB_MGM_NODE_TYPE_NDB: + ndbNodes.push_back(*node); + break; + case NDB_MGM_NODE_TYPE_MGM: + mgmNodes.push_back(*node); + break; + case NDB_MGM_NODE_TYPE_API: + apiNodes.push_back(*node); + break; + default: + if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN || + node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){ + retries++; + ndbNodes.clear(); + mgmNodes.clear(); + apiNodes.clear(); + free(status); + status = NULL; + i = status->no_of_nodes; + + ndbout << "kalle"<< endl; + break; + } + abort(); + break; + } + } + if(status == 0){ + ndbout << "status == 0" << endl; + continue; + } + free(status); + return 0; + } + + g_err << "getStatus failed" << endl; + return -1; +} + + +int NdbRestarter::getNumDbNodes(){ + if (!isConnected()) + return -1; + + if (getStatus() != 0) + return -1; + + return ndbNodes.size(); +} + +int NdbRestarter::restartAll(bool initial, + bool nostart, + bool abort){ + + if (!isConnected()) + return -1; + + if (ndb_mgm_restart2(handle, 0, NULL, initial, 1, abort) == -1) { + MGMERR(handle); + g_err << "Could not restart(stop) all nodes " << endl; + // return -1; Continue anyway - Magnus + } + + if (waitClusterNoStart(60) != 0){ + g_err << "Cluster didnt enter STATUS_NOT_STARTED within 60s" << endl; + return -1; + } + + if(nostart){ + g_debug << "restartAll: nostart == true" << endl; + return 0; + } + + if (ndb_mgm_start(handle, 0, NULL) == -1) { + MGMERR(handle); + g_err << "Could not restart(start) all nodes " << endl; + return -1; + } + + return 0; +} + +int NdbRestarter::startAll(){ + if (!isConnected()) + return -1; + + if (ndb_mgm_start(handle, 0, NULL) == -1) { + MGMERR(handle); + g_err << "Could not start all nodes " << endl; + return -1; + } + + return 0; + +} + +int NdbRestarter::startNodes(int * nodes, int num_nodes){ + if (!isConnected()) + return -1; + + if (ndb_mgm_start(handle, num_nodes, nodes) != num_nodes) { + MGMERR(handle); + g_err << "Could not start all nodes " << endl; + return -1; + } + + return 0; +} + +int NdbRestarter::insertErrorInNode(int _nodeId, int _error){ + if (!isConnected()) + return -1; + + ndb_mgm_reply reply; + reply.return_code = 0; + + if (ndb_mgm_insert_error(handle, _nodeId, _error, &reply) == -1){ + MGMERR(handle); + g_err << "Could not insert error in node with id = "<< _nodeId << endl; + } + if(reply.return_code != 0){ + g_err << "Error: " << reply.message << endl; + } + return 0; +} + +int NdbRestarter::insertErrorInAllNodes(int _error){ + if (!isConnected()) + return -1; + + if (getStatus() != 0) + return -1; + + int result = 0; + + for(size_t i = 0; i < ndbNodes.size(); i++){ + g_debug << "inserting error in node " << ndbNodes[i].node_id << endl; + if (insertErrorInNode(ndbNodes[i].node_id, _error) == -1) + result = -1; + } + return result; + +} + + + +int NdbRestarter::dumpStateOneNode(int _nodeId, int * _args, int _num_args){ + if (!isConnected()) + return -1; + + ndb_mgm_reply reply; + reply.return_code = 0; + + if (ndb_mgm_dump_state(handle, _nodeId, _args, _num_args, &reply) == -1){ + MGMERR(handle); + g_err << "Could not dump state in node with id = "<< _nodeId << endl; + } + + if(reply.return_code != 0){ + g_err << "Error: " << reply.message << endl; + } + return reply.return_code; +} + +int NdbRestarter::dumpStateAllNodes(int * _args, int _num_args){ + if (!isConnected()) + return -1; + + if (getStatus() != 0) + return -1; + + int result = 0; + + for(size_t i = 0; i < ndbNodes.size(); i++){ + g_debug << "dumping state in node " << ndbNodes[i].node_id << endl; + if (dumpStateOneNode(ndbNodes[i].node_id, _args, _num_args) == -1) + result = -1; + } + return result; + +} + + +int NdbRestarter::enterSingleUserMode(int _nodeId){ + if (!isConnected()) + return -1; + + ndb_mgm_reply reply; + reply.return_code = 0; + + if (ndb_mgm_enter_single_user(handle, _nodeId, &reply) == -1){ + MGMERR(handle); + g_err << "Could not enter single user mode api node = "<< _nodeId << endl; + } + + if(reply.return_code != 0){ + g_err << "Error: " << reply.message << endl; + } + + return reply.return_code; +} + + +int NdbRestarter::exitSingleUserMode(){ + if (!isConnected()) + return -1; + + ndb_mgm_reply reply; + reply.return_code = 0; + + if (ndb_mgm_exit_single_user(handle, &reply) == -1){ + MGMERR(handle); + g_err << "Could not exit single user mode " << endl; + } + + if(reply.return_code != 0){ + g_err << "Error: " << reply.message << endl; + } + return reply.return_code; +} + +ndb_mgm_configuration* +NdbRestarter::getConfig(){ + if(m_config) return m_config; + + if (!isConnected()) + return 0; + m_config = ndb_mgm_get_configuration(handle, 0); + return m_config; +} + +template class Vector<ndb_mgm_node_state>; |