diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2006-04-07 12:01:08 +0200 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2006-04-07 12:01:08 +0200 |
commit | 978c18b6801834521a21184145ac18194446b46e (patch) | |
tree | 74f92be1428a013a6f3c0b6846ee1d43e74851b3 /ndb | |
parent | f3c792863c4f4ae6d9e5c92ce1755310ad912293 (diff) | |
parent | cfee84be3abe7eaa3b67e02f27b11ec821534674 (diff) | |
download | mariadb-git-978c18b6801834521a21184145ac18194446b46e.tar.gz |
Merge joreland@bk-internal.mysql.com:/home/bk/mysql-5.0-jonas
into perch.ndb.mysql.com:/home/jonas/src/50-jonas
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
Auto merged
Diffstat (limited to 'ndb')
-rw-r--r-- | ndb/include/kernel/signaldata/StopReq.hpp | 3 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp | 5 | ||||
-rw-r--r-- | ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 177 | ||||
-rw-r--r-- | ndb/src/mgmclient/CommandInterpreter.cpp | 232 | ||||
-rw-r--r-- | ndb/src/mgmsrv/MgmtSrvr.cpp | 213 | ||||
-rw-r--r-- | ndb/src/mgmsrv/MgmtSrvr.hpp | 14 | ||||
-rw-r--r-- | ndb/src/mgmsrv/Services.cpp | 48 |
7 files changed, 440 insertions, 252 deletions
diff --git a/ndb/include/kernel/signaldata/StopReq.hpp b/ndb/include/kernel/signaldata/StopReq.hpp index 8a9fde75b6c..70e195961ce 100644 --- a/ndb/include/kernel/signaldata/StopReq.hpp +++ b/ndb/include/kernel/signaldata/StopReq.hpp @@ -92,7 +92,7 @@ class StopRef friend class Ndbcntr; public: - STATIC_CONST( SignalLength = 2 ); + STATIC_CONST( SignalLength = 3 ); enum ErrorCode { OK = 0, @@ -107,6 +107,7 @@ public: public: Uint32 senderData; Uint32 errorCode; + Uint32 masterNodeId; }; inline diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index 3fc24e395b1..c403aad5516 100644 --- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -2125,6 +2125,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ else ref->errorCode = StopRef::NodeShutdownInProgress; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); @@ -2136,6 +2137,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ jam(); ref->errorCode = StopRef::UnsupportedNodeShutdown; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; @@ -2146,6 +2148,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ jam(); ref->errorCode = StopRef::MultiNodeShutdownNotMaster; ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; if (senderRef != RNIL) sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; @@ -2289,6 +2292,7 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ ref->senderData = stopReq.senderData; ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash; + ref->masterNodeId = cntr.cmasterNodeId; const BlockReference bref = stopReq.senderRef; if (bref != RNIL) @@ -2437,6 +2441,7 @@ void Ndbcntr::execABORT_ALL_REF(Signal* signal){ StopRef * const stopRef = (StopRef *)&signal->theData[0]; stopRef->senderData = c_stopRec.stopReq.senderData; stopRef->errorCode = StopRef::TransactionAbortFailed; + stopRef->masterNodeId = cmasterNodeId; sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); } diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index c98833da7b1..9a7256b4a55 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -1214,115 +1214,116 @@ Qmgr::check_startup(Signal* signal) goto start_report; } } - const bool all = c_start.m_starting_nodes.equal(c_definedNodes); - CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; - { - /** - * Check for missing node group directly - */ - char buf[100]; - NdbNodeBitmask check; - check.assign(c_definedNodes); - check.bitANDC(c_start.m_starting_nodes); // Not connected nodes - check.bitOR(c_start.m_starting_nodes_w_log); - - sd->blockRef = reference(); - sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; - sd->mask = check; - EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, - CheckNodeGroups::SignalLength); - - if (sd->output == CheckNodeGroups::Lose) + const bool all = c_start.m_starting_nodes.equal(c_definedNodes); + CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; + { - jam(); - goto missing_nodegroup; + /** + * Check for missing node group directly + */ + char buf[100]; + NdbNodeBitmask check; + check.assign(c_definedNodes); + check.bitANDC(c_start.m_starting_nodes); // Not connected nodes + check.bitOR(c_start.m_starting_nodes_w_log); + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = check; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + if (sd->output == CheckNodeGroups::Lose) + { + jam(); + goto missing_nodegroup; + } } - } - sd->blockRef = reference(); - sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; - sd->mask = c_start.m_starting_nodes; - EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, - CheckNodeGroups::SignalLength); + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); - const Uint32 result = sd->output; + const Uint32 result = sd->output; - sd->blockRef = reference(); - sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; - sd->mask = c_start.m_starting_nodes_w_log; - EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, - CheckNodeGroups::SignalLength); + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes_w_log; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); - const Uint32 result_w_log = sd->output; + const Uint32 result_w_log = sd->output; - if (tmp.equal(c_definedNodes)) - { + if (tmp.equal(c_definedNodes)) + { + /** + * All nodes (wrt no-wait nodes) has connected... + * this means that we will now start or die + */ + jam(); + switch(result_w_log){ + case CheckNodeGroups::Lose: + { + jam(); + goto missing_nodegroup; + } + case CheckNodeGroups::Win: + signal->theData[1] = all ? 0x8001 : 0x8002; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + case CheckNodeGroups::Partitioning: + ndbrequire(result != CheckNodeGroups::Lose); + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + + if (now < partial_timeout) + { + jam(); + signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; + signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + /** - * All nodes (wrt no-wait nodes) has connected... - * this means that we will now start or die - */ - jam(); + * Start partial has passed...check for partitioning... + */ switch(result_w_log){ case CheckNodeGroups::Lose: - { jam(); goto missing_nodegroup; - } - case CheckNodeGroups::Win: - signal->theData[1] = all ? 0x8001 : 0x8002; - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 1; - goto start_report; case CheckNodeGroups::Partitioning: - ndbrequire(result != CheckNodeGroups::Lose); + if (now < partitioned_timeout && result != CheckNodeGroups::Win) + { + signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + } + // Fall through... + case CheckNodeGroups::Win: signal->theData[1] = - all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); report_mask.assign(c_definedNodes); report_mask.bitANDC(c_start.m_starting_nodes); retVal = 1; goto start_report; } } - - if (now < partial_timeout) - { - jam(); - signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; - signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); - report_mask.assign(wait); - retVal = 0; - goto start_report; - } - - /** - * Start partial has passed...check for partitioning... - */ - switch(result_w_log){ - case CheckNodeGroups::Lose: - jam(); - goto missing_nodegroup; - case CheckNodeGroups::Partitioning: - if (now < partitioned_timeout && result != CheckNodeGroups::Win) - { - signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; - signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 0; - goto start_report; - } - // Fall through... - case CheckNodeGroups::Win: - signal->theData[1] = - all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); - report_mask.assign(c_definedNodes); - report_mask.bitANDC(c_start.m_starting_nodes); - retVal = 1; - goto start_report; - } - ndbrequire(false); start_report: diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp index 74d7f879f9c..39c84fd8055 100644 --- a/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/ndb/src/mgmclient/CommandInterpreter.cpp @@ -25,6 +25,7 @@ #endif #include <mgmapi.h> +#include <util/BaseString.hpp> class MgmtSrvr; @@ -70,6 +71,9 @@ private: */ void analyseAfterFirstToken(int processId, char* allAfterFirstTokenCstr); + void executeCommand(Vector<BaseString> &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes); /** * Parse the block specification part of the LOG* commands, * things after LOG*: [BLOCK = {ALL|<blockName>+}] @@ -104,10 +108,14 @@ private: public: void executeStop(int processId, const char* parameters, bool all); + void executeStop(Vector<BaseString> &command_list, unsigned command_pos, + int *node_ids, int no_of_nodes); void executeEnterSingleUser(char* parameters); void executeExitSingleUser(char* parameters); void executeStart(int processId, const char* parameters, bool all); void executeRestart(int processId, const char* parameters, bool all); + void executeRestart(Vector<BaseString> &command_list, unsigned command_pos, + int *node_ids, int no_of_nodes); void executeLogLevel(int processId, const char* parameters, bool all); void executeError(int processId, const char* parameters, bool all); void executeLog(int processId, const char* parameters, bool all); @@ -643,9 +651,16 @@ CommandInterpreter::execute_impl(const char *_line) } } while (do_continue); // if there is anything in the line proceed + Vector<BaseString> command_list; + { + BaseString tmp(line); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); + } char* firstToken = strtok(line, " "); char* allAfterFirstToken = strtok(NULL, ""); - + if (strcasecmp(firstToken, "HELP") == 0 || strcasecmp(firstToken, "?") == 0) { executeHelp(allAfterFirstToken); @@ -723,22 +738,45 @@ CommandInterpreter::execute_impl(const char *_line) analyseAfterFirstToken(-1, allAfterFirstToken); } else { /** - * First token should be a digit, node ID + * First tokens should be digits, node ID's */ - int nodeId; - - if (! convert(firstToken, nodeId)) { + int node_ids[MAX_NODES]; + unsigned pos; + for (pos= 0; pos < command_list.size(); pos++) + { + int node_id; + if (convert(command_list[pos].c_str(), node_id)) + { + if (node_id <= 0) { + ndbout << "Invalid node ID: " << command_list[pos].c_str() + << "." << endl; + DBUG_RETURN(true); + } + node_ids[pos]= node_id; + continue; + } + break; + } + int no_of_nodes= pos; + if (no_of_nodes == 0) + { + /* No digit found */ invalid_command(_line); DBUG_RETURN(true); } - - if (nodeId <= 0) { - ndbout << "Invalid node ID: " << firstToken << "." << endl; + if (pos == command_list.size()) + { + /* No command found */ + invalid_command(_line); DBUG_RETURN(true); } - - analyseAfterFirstToken(nodeId, allAfterFirstToken); - + if (no_of_nodes == 1) + { + analyseAfterFirstToken(node_ids[0], allAfterFirstToken); + DBUG_RETURN(true); + } + executeCommand(command_list, pos, node_ids, no_of_nodes); + DBUG_RETURN(true); } DBUG_RETURN(true); } @@ -808,6 +846,27 @@ CommandInterpreter::analyseAfterFirstToken(int processId, ndbout << endl; } +void +CommandInterpreter::executeCommand(Vector<BaseString> &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) +{ + const char *cmd= command_list[command_pos].c_str(); + if (strcasecmp("STOP", cmd) == 0) + { + executeStop(command_list, command_pos+1, node_ids, no_of_nodes); + return; + } + if (strcasecmp("RESTART", cmd) == 0) + { + executeRestart(command_list, command_pos+1, node_ids, no_of_nodes); + return; + } + ndbout_c("Invalid command: '%s' after multi node id list. " + "Expected STOP or RESTART.", cmd); + return; +} + /** * Get next nodeid larger than the give node_id. node_id will be * set to the next node_id in the list. node_id should be set @@ -1400,24 +1459,60 @@ CommandInterpreter::executeClusterLog(char* parameters) //***************************************************************************** void -CommandInterpreter::executeStop(int processId, const char *, bool all) +CommandInterpreter::executeStop(int processId, const char *parameters, + bool all) { - int result = 0; - if(all) { - result = ndb_mgm_stop(m_mgmsrv, 0, 0); - } else { - result = ndb_mgm_stop(m_mgmsrv, 1, &processId); + Vector<BaseString> command_list; + if (parameters) + { + BaseString tmp(parameters); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); } - if (result < 0) { - ndbout << "Shutdown failed." << endl; + if (all) + executeStop(command_list, 0, 0, 0); + else + executeStop(command_list, 0, &processId, 1); +} + +void +CommandInterpreter::executeStop(Vector<BaseString> &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) +{ + int abort= 0; + for (; command_pos < command_list.size(); command_pos++) + { + const char *item= command_list[command_pos].c_str(); + if (strcasecmp(item, "-A") == 0) + { + abort= 1; + continue; + } + ndbout_c("Invalid option: %s. Expecting -A after STOP", + item); + return; + } + + int result= ndb_mgm_stop2(m_mgmsrv, no_of_nodes, node_ids, abort); + if (result < 0) + { + ndbout_c("Shutdown failed."); printError(); - } else + } + else + { + if (node_ids == 0) + ndbout_c("NDB Cluster has shutdown."); + else { - if(all) - ndbout << "NDB Cluster has shutdown." << endl; - else - ndbout << "Node " << processId << " has shutdown." << endl; + ndbout << "Node"; + for (int i= 0; i < no_of_nodes; i++) + ndbout << " " << node_ids[i]; + ndbout_c(" has shutdown."); } + } } void @@ -1483,47 +1578,74 @@ CommandInterpreter::executeStart(int processId, const char* parameters, void CommandInterpreter::executeRestart(int processId, const char* parameters, - bool all) + bool all) +{ + Vector<BaseString> command_list; + if (parameters) + { + BaseString tmp(parameters); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); + } + if (all) + executeRestart(command_list, 0, 0, 0); + else + executeRestart(command_list, 0, &processId, 1); +} + +void +CommandInterpreter::executeRestart(Vector<BaseString> &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) { int result; - int nostart = 0; - int initialstart = 0; - int abort = 0; - - if(parameters != 0 && strlen(parameters) != 0){ - char * tmpString = my_strdup(parameters,MYF(MY_WME)); - My_auto_ptr<char> ap1(tmpString); - char * tmpPtr = 0; - char * item = strtok_r(tmpString, " ", &tmpPtr); - while(item != NULL){ - if(strcasecmp(item, "-N") == 0) - nostart = 1; - if(strcasecmp(item, "-I") == 0) - initialstart = 1; - if(strcasecmp(item, "-A") == 0) - abort = 1; - item = strtok_r(NULL, " ", &tmpPtr); + int nostart= 0; + int initialstart= 0; + int abort= 0; + + for (; command_pos < command_list.size(); command_pos++) + { + const char *item= command_list[command_pos].c_str(); + if (strcasecmp(item, "-N") == 0) + { + nostart= 1; + continue; + } + if (strcasecmp(item, "-I") == 0) + { + initialstart= 1; + continue; } + if (strcasecmp(item, "-A") == 0) + { + abort= 1; + continue; + } + ndbout_c("Invalid option: %s. Expecting -A,-N or -I after RESTART", + item); + return; } - if(all) { - result = ndb_mgm_restart2(m_mgmsrv, 0, NULL, initialstart, nostart, abort); - } else { - int v[1]; - v[0] = processId; - result = ndb_mgm_restart2(m_mgmsrv, 1, v, initialstart, nostart, abort); - } + result= ndb_mgm_restart2(m_mgmsrv, no_of_nodes, node_ids, + initialstart, nostart, abort); if (result <= 0) { - ndbout.println("Restart failed.", result); + ndbout_c("Restart failed."); printError(); - } else + } + else + { + if (node_ids == 0) + ndbout_c("NDB Cluster is being restarted."); + else { - if(all) - ndbout << "NDB Cluster is being restarted." << endl; - else - ndbout_c("Node %d is being restarted.", processId); + ndbout << "Node"; + for (int i= 0; i < no_of_nodes; i++) + ndbout << " " << node_ids[i]; + ndbout_c(" is being restarted"); } + } } void diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp index 76a045bc806..9b518ba938b 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -294,6 +294,8 @@ static ErrorItem errorTable[] = {MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" }, {MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH, "Node shutdown would cause system crash" }, + {MgmtSrvr::UNSUPPORTED_NODE_SHUTDOWN, + "Unsupported multi node shutdown. Abort option required." }, {MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." }, {MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP, "Operation not allowed while nodes are starting or stopping."}, @@ -312,6 +314,9 @@ int MgmtSrvr::translateStopRef(Uint32 errCode) case StopRef::NodeShutdownWouldCauseSystemCrash: return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH; break; + case StopRef::UnsupportedNodeShutdown: + return UNSUPPORTED_NODE_SHUTDOWN; + break; } return 4999; } @@ -386,8 +391,9 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_server, _ownReference(0), theSignalIdleList(NULL), theWaitState(WAIT_SUBSCRIBE_CONF), + m_local_mgm_handle(0), m_event_listner(this), - m_local_mgm_handle(0) + m_master_node(0) { DBUG_ENTER("MgmtSrvr::MgmtSrvr"); @@ -677,23 +683,16 @@ MgmtSrvr::~MgmtSrvr() int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond) { - if(nodeId == 0) - return 0; - - if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB) + if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB) return WRONG_PROCESS_TYPE; - // Check if we have contact with it if(unCond){ if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected) return 0; - return NO_CONTACT_WITH_PROCESS; } - if (theFacade->get_node_alive(nodeId) == 0) { - return NO_CONTACT_WITH_PROCESS; - } else { + else if (theFacade->get_node_alive(nodeId) == true) return 0; - } + return NO_CONTACT_WITH_PROCESS; } void report_unknown_signal(SimpleSignal *signal) @@ -935,7 +934,7 @@ int MgmtSrvr::sendStopMgmd(NodeId nodeId, * distributed communication up. */ -int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, +int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids, NodeBitmask &stoppedNodes, Uint32 singleUserNodeId, bool abort, @@ -945,6 +944,12 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, bool initialStart) { int error = 0; + DBUG_ENTER("MgmtSrvr::sendSTOP_REQ"); + DBUG_PRINT("enter", ("no of nodes: %d singleUseNodeId: %d " + "abort: %d stop: %d restart: %d " + "nostart: %d initialStart: %d", + node_ids.size(), singleUserNodeId, + abort, stop, restart, nostart, initialStart)); stoppedNodes.clear(); @@ -982,36 +987,49 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, // send the signals NodeBitmask nodes; - if (nodeId) + NodeId nodeId= 0; + int use_master_node= 0; + int do_send= 0; + int do_stop_self= 0; + NdbNodeBitmask nodes_to_stop; { - if(nodeId==getOwnNodeId()) - { - if(restart) - g_RestartServer= true; - g_StopServer= true; - return 0; - } - if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB) + for (unsigned i= 0; i < node_ids.size(); i++) { - int r; - if((r= okToSendTo(nodeId, true)) != 0) - return r; - if (ss.sendSignal(nodeId, &ssig) != SEND_OK) - return SEND_OR_RECEIVE_FAILED; + nodeId= node_ids[i]; + if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_MGM) + nodes_to_stop.set(nodeId); + else if (nodeId != getOwnNodeId()) + { + error= sendStopMgmd(nodeId, abort, stop, restart, + nostart, initialStart); + if (error == 0) + stoppedNodes.set(nodeId); + } + else + do_stop_self= 1;; } - else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM) + } + int no_of_nodes_to_stop= nodes_to_stop.count(); + if (node_ids.size()) + { + if (no_of_nodes_to_stop) { - error= sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart); - if(error==0) - stoppedNodes.set(nodeId); - return error; + do_send= 1; + if (no_of_nodes_to_stop == 1) + { + nodeId= nodes_to_stop.find(0); + } + else // multi node stop, send to master + { + use_master_node= 1; + nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes); + StopReq::setStopNodes(stopReq->requestInfo, 1); + } } - else - return WRONG_PROCESS_TYPE; - nodes.set(nodeId); } else { + nodeId= 0; while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) { if(okToSendTo(nodeId, true) == 0) @@ -1032,8 +1050,30 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, } // now wait for the replies - while (!nodes.isclear()) + while (!nodes.isclear() || do_send) { + if (do_send) + { + int r; + assert(nodes.count() == 0); + if (use_master_node) + nodeId= m_master_node; + if ((r= okToSendTo(nodeId, true)) != 0) + { + bool next; + if (!use_master_node) + DBUG_RETURN(r); + m_master_node= nodeId= 0; + while((next= getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && + (r= okToSendTo(nodeId, true)) != 0); + if (!next) + DBUG_RETURN(NO_CONTACT_WITH_DB_NODES); + } + if (ss.sendSignal(nodeId, &ssig) != SEND_OK) + DBUG_RETURN(SEND_OR_RECEIVE_FAILED); + nodes.set(nodeId); + do_send= 0; + } SimpleSignal *signal = ss.waitFor(); int gsn = signal->readSignalNumber(); switch (gsn) { @@ -1045,6 +1085,13 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, #endif assert(nodes.get(nodeId)); nodes.clear(nodeId); + if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster) + { + assert(use_master_node); + m_master_node= ref->masterNodeId; + do_send= 1; + continue; + } error = translateStopRef(ref->errorCode); break; } @@ -1055,9 +1102,16 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ndbout_c("Node %d single user mode", nodeId); #endif assert(nodes.get(nodeId)); - assert(singleUserNodeId != 0); + if (singleUserNodeId != 0) + { + stoppedNodes.set(nodeId); + } + else + { + assert(no_of_nodes_to_stop > 1); + stoppedNodes.bitOR(nodes_to_stop); + } nodes.clear(nodeId); - stoppedNodes.set(nodeId); break; } case GSN_NF_COMPLETEREP:{ @@ -1096,17 +1150,24 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, #ifdef VM_TRACE ndbout_c("Unknown signal %d", gsn); #endif - return SEND_OR_RECEIVE_FAILED; + DBUG_RETURN(SEND_OR_RECEIVE_FAILED); } } - return error; + if (!error && do_stop_self) + { + if (restart) + g_RestartServer= true; + g_StopServer= true; + } + DBUG_RETURN(error); } /* - * Stop one node + * Stop one nodes */ -int MgmtSrvr::stopNode(int nodeId, bool abort) +int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids, + int *stopCount, bool abort) { if (!abort) { @@ -1121,14 +1182,17 @@ int MgmtSrvr::stopNode(int nodeId, bool abort) } } NodeBitmask nodes; - return sendSTOP_REQ(nodeId, - nodes, - 0, - abort, - false, - false, - false, - false); + int ret= sendSTOP_REQ(node_ids, + nodes, + 0, + abort, + false, + false, + false, + false); + if (stopCount) + *stopCount= nodes.count(); + return ret; } /* @@ -1138,7 +1202,8 @@ int MgmtSrvr::stopNode(int nodeId, bool abort) int MgmtSrvr::stop(int * stopCount, bool abort) { NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector<NodeId> node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, 0, abort, @@ -1169,7 +1234,8 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) return OPERATION_NOT_ALLOWED_START_STOP; } NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector<NodeId> node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, singleUserNodeId, false, @@ -1186,18 +1252,22 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) * Perform node restart */ -int MgmtSrvr::restartNode(int nodeId, bool nostart, bool initialStart, - bool abort) +int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids, + int * stopCount, bool nostart, + bool initialStart, bool abort) { NodeBitmask nodes; - return sendSTOP_REQ(nodeId, - nodes, - 0, - abort, - false, - true, - nostart, - initialStart); + int ret= sendSTOP_REQ(node_ids, + nodes, + 0, + abort, + false, + true, + nostart, + initialStart); + if (stopCount) + *stopCount = nodes.count(); + return ret; } /* @@ -1208,7 +1278,8 @@ int MgmtSrvr::restart(bool nostart, bool initialStart, bool abort, int * stopCount ) { NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector<NodeId> node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, 0, abort, @@ -2135,12 +2206,16 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) SignalSender ss(theFacade); ss.lock(); // lock will be released on exit - bool next; - NodeId nodeId = 0; - while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && - theFacade->get_node_alive(nodeId) == false); - - if(!next) return NO_CONTACT_WITH_DB_NODES; + NodeId nodeId = m_master_node; + if (okToSendTo(nodeId, false) != 0) + { + bool next; + nodeId = m_master_node = 0; + while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && + okToSendTo(nodeId, false) != 0); + if(!next) + return NO_CONTACT_WITH_DB_NODES; + } SimpleSignal ssig; BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend()); @@ -2208,7 +2283,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) const BackupRef * const ref = CAST_CONSTPTR(BackupRef, signal->getDataPtr()); if(ref->errorCode == BackupRef::IAmNotMaster){ - nodeId = refToNode(ref->masterRef); + m_master_node = nodeId = refToNode(ref->masterRef); #ifdef VM_TRACE ndbout_c("I'm not master resending to %d", nodeId); #endif diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp index 46bdb112cb9..fe1603a1953 100644 --- a/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -176,6 +176,7 @@ public: STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 ); STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 ); + STATIC_CONST( UNSUPPORTED_NODE_SHUTDOWN = 5031 ); STATIC_CONST( NODE_NOT_API_NODE = 5062 ); STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 ); @@ -252,7 +253,7 @@ public: * @param processId: Id of the DB process to stop * @return 0 if succeeded, otherwise: as stated above, plus: */ - int stopNode(int nodeId, bool abort = false); + int stopNodes(const Vector<NodeId> &node_ids, int *stopCount, bool abort); /** * Stop the system @@ -286,11 +287,12 @@ public: int start(int processId); /** - * Restart a node + * Restart nodes * @param processId: Id of the DB process to start */ - int restartNode(int processId, bool nostart, bool initialStart, - bool abort = false); + int restartNodes(const Vector<NodeId> &node_ids, + int *stopCount, bool nostart, + bool initialStart, bool abort); /** * Restart the system @@ -494,7 +496,7 @@ private: bool nostart, bool initialStart); - int sendSTOP_REQ(NodeId nodeId, + int sendSTOP_REQ(const Vector<NodeId> &node_ids, NodeBitmask &stoppedNodes, Uint32 singleUserNodeId, bool abort, @@ -653,6 +655,8 @@ private: friend class Ndb_mgmd_event_service; Ndb_mgmd_event_service m_event_listner; + NodeId m_master_node; + /** * Handles the thread wich upon a 'Node is started' event will * set the node's previous loglevel settings. diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp index 3564c5c40ba..a80827abd8f 100644 --- a/ndb/src/mgmsrv/Services.cpp +++ b/ndb/src/mgmsrv/Services.cpp @@ -866,14 +866,11 @@ MgmApiSession::restart(Parser<MgmApiSession>::Context &, } int restarted = 0; - int result = 0; - - for(size_t i = 0; i < nodes.size(); i++) - if((result = m_mgmsrv.restartNode(nodes[i], - nostart != 0, - initialstart != 0, - abort != 0)) == 0) - restarted++; + int result= m_mgmsrv.restartNodes(nodes, + &restarted, + nostart != 0, + initialstart != 0, + abort != 0); m_output->println("restart reply"); if(result != 0){ @@ -998,7 +995,12 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, args.get("node", (const char **)&nodes_str); if(nodes_str == NULL) + { + m_output->println("stop reply"); + m_output->println("result: empty node list"); + m_output->println(""); return; + } args.get("abort", &abort); char *p, *last; @@ -1008,29 +1010,10 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, nodes.push_back(atoi(p)); } - int stop_self= 0; - size_t i; - - for(i=0; i < nodes.size(); i++) { - if (nodes[i] == m_mgmsrv.getOwnNodeId()) { - stop_self= 1; - if (i != nodes.size()-1) { - m_output->println("stop reply"); - m_output->println("result: server must be stopped last"); - m_output->println(""); - return; - } - } - } - - int stopped = 0, result = 0; - - for(i=0; i < nodes.size(); i++) - if (nodes[i] != m_mgmsrv.getOwnNodeId()) { - if((result = m_mgmsrv.stopNode(nodes[i], abort != 0)) == 0) - stopped++; - } else - stopped++; + int stopped= 0; + int result= 0; + if (nodes.size()) + result= m_mgmsrv.stopNodes(nodes, &stopped, abort != 0); m_output->println("stop reply"); if(result != 0) @@ -1039,9 +1022,6 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, m_output->println("result: Ok"); m_output->println("stopped: %d", stopped); m_output->println(""); - - if (stop_self) - g_StopServer= true; } |