summaryrefslogtreecommitdiff
path: root/ndb
diff options
context:
space:
mode:
authorunknown <jonas@perch.ndb.mysql.com>2006-04-07 12:01:08 +0200
committerunknown <jonas@perch.ndb.mysql.com>2006-04-07 12:01:08 +0200
commit978c18b6801834521a21184145ac18194446b46e (patch)
tree74f92be1428a013a6f3c0b6846ee1d43e74851b3 /ndb
parentf3c792863c4f4ae6d9e5c92ce1755310ad912293 (diff)
parentcfee84be3abe7eaa3b67e02f27b11ec821534674 (diff)
downloadmariadb-git-978c18b6801834521a21184145ac18194446b46e.tar.gz
Merge joreland@bk-internal.mysql.com:/home/bk/mysql-5.0-jonas
into perch.ndb.mysql.com:/home/jonas/src/50-jonas ndb/src/kernel/blocks/qmgr/QmgrMain.cpp: Auto merged
Diffstat (limited to 'ndb')
-rw-r--r--ndb/include/kernel/signaldata/StopReq.hpp3
-rw-r--r--ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp5
-rw-r--r--ndb/src/kernel/blocks/qmgr/QmgrMain.cpp177
-rw-r--r--ndb/src/mgmclient/CommandInterpreter.cpp232
-rw-r--r--ndb/src/mgmsrv/MgmtSrvr.cpp213
-rw-r--r--ndb/src/mgmsrv/MgmtSrvr.hpp14
-rw-r--r--ndb/src/mgmsrv/Services.cpp48
7 files changed, 440 insertions, 252 deletions
diff --git a/ndb/include/kernel/signaldata/StopReq.hpp b/ndb/include/kernel/signaldata/StopReq.hpp
index 8a9fde75b6c..70e195961ce 100644
--- a/ndb/include/kernel/signaldata/StopReq.hpp
+++ b/ndb/include/kernel/signaldata/StopReq.hpp
@@ -92,7 +92,7 @@ class StopRef
friend class Ndbcntr;
public:
- STATIC_CONST( SignalLength = 2 );
+ STATIC_CONST( SignalLength = 3 );
enum ErrorCode {
OK = 0,
@@ -107,6 +107,7 @@ public:
public:
Uint32 senderData;
Uint32 errorCode;
+ Uint32 masterNodeId;
};
inline
diff --git a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
index 3fc24e395b1..c403aad5516 100644
--- a/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
+++ b/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
@@ -2125,6 +2125,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
else
ref->errorCode = StopRef::NodeShutdownInProgress;
ref->senderData = senderData;
+ ref->masterNodeId = cmasterNodeId;
if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
@@ -2136,6 +2137,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
jam();
ref->errorCode = StopRef::UnsupportedNodeShutdown;
ref->senderData = senderData;
+ ref->masterNodeId = cmasterNodeId;
if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
return;
@@ -2146,6 +2148,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
jam();
ref->errorCode = StopRef::MultiNodeShutdownNotMaster;
ref->senderData = senderData;
+ ref->masterNodeId = cmasterNodeId;
if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
return;
@@ -2289,6 +2292,7 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
ref->senderData = stopReq.senderData;
ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash;
+ ref->masterNodeId = cntr.cmasterNodeId;
const BlockReference bref = stopReq.senderRef;
if (bref != RNIL)
@@ -2437,6 +2441,7 @@ void Ndbcntr::execABORT_ALL_REF(Signal* signal){
StopRef * const stopRef = (StopRef *)&signal->theData[0];
stopRef->senderData = c_stopRec.stopReq.senderData;
stopRef->errorCode = StopRef::TransactionAbortFailed;
+ stopRef->masterNodeId = cmasterNodeId;
sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
}
diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
index c98833da7b1..9a7256b4a55 100644
--- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
+++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
@@ -1214,115 +1214,116 @@ Qmgr::check_startup(Signal* signal)
goto start_report;
}
}
- const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
- CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
-
{
- /**
- * Check for missing node group directly
- */
- char buf[100];
- NdbNodeBitmask check;
- check.assign(c_definedNodes);
- check.bitANDC(c_start.m_starting_nodes); // Not connected nodes
- check.bitOR(c_start.m_starting_nodes_w_log);
-
- sd->blockRef = reference();
- sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
- sd->mask = check;
- EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
- CheckNodeGroups::SignalLength);
-
- if (sd->output == CheckNodeGroups::Lose)
+ const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
+ CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
+
{
- jam();
- goto missing_nodegroup;
+ /**
+ * Check for missing node group directly
+ */
+ char buf[100];
+ NdbNodeBitmask check;
+ check.assign(c_definedNodes);
+ check.bitANDC(c_start.m_starting_nodes); // Not connected nodes
+ check.bitOR(c_start.m_starting_nodes_w_log);
+
+ sd->blockRef = reference();
+ sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
+ sd->mask = check;
+ EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
+ CheckNodeGroups::SignalLength);
+
+ if (sd->output == CheckNodeGroups::Lose)
+ {
+ jam();
+ goto missing_nodegroup;
+ }
}
- }
- sd->blockRef = reference();
- sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
- sd->mask = c_start.m_starting_nodes;
- EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
- CheckNodeGroups::SignalLength);
+ sd->blockRef = reference();
+ sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
+ sd->mask = c_start.m_starting_nodes;
+ EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
+ CheckNodeGroups::SignalLength);
- const Uint32 result = sd->output;
+ const Uint32 result = sd->output;
- sd->blockRef = reference();
- sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
- sd->mask = c_start.m_starting_nodes_w_log;
- EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
- CheckNodeGroups::SignalLength);
+ sd->blockRef = reference();
+ sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
+ sd->mask = c_start.m_starting_nodes_w_log;
+ EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
+ CheckNodeGroups::SignalLength);
- const Uint32 result_w_log = sd->output;
+ const Uint32 result_w_log = sd->output;
- if (tmp.equal(c_definedNodes))
- {
+ if (tmp.equal(c_definedNodes))
+ {
+ /**
+ * All nodes (wrt no-wait nodes) has connected...
+ * this means that we will now start or die
+ */
+ jam();
+ switch(result_w_log){
+ case CheckNodeGroups::Lose:
+ {
+ jam();
+ goto missing_nodegroup;
+ }
+ case CheckNodeGroups::Win:
+ signal->theData[1] = all ? 0x8001 : 0x8002;
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 1;
+ goto start_report;
+ case CheckNodeGroups::Partitioning:
+ ndbrequire(result != CheckNodeGroups::Lose);
+ signal->theData[1] =
+ all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 1;
+ goto start_report;
+ }
+ }
+
+ if (now < partial_timeout)
+ {
+ jam();
+ signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3;
+ signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
+ report_mask.assign(wait);
+ retVal = 0;
+ goto start_report;
+ }
+
/**
- * All nodes (wrt no-wait nodes) has connected...
- * this means that we will now start or die
- */
- jam();
+ * Start partial has passed...check for partitioning...
+ */
switch(result_w_log){
case CheckNodeGroups::Lose:
- {
jam();
goto missing_nodegroup;
- }
- case CheckNodeGroups::Win:
- signal->theData[1] = all ? 0x8001 : 0x8002;
- report_mask.assign(c_definedNodes);
- report_mask.bitANDC(c_start.m_starting_nodes);
- retVal = 1;
- goto start_report;
case CheckNodeGroups::Partitioning:
- ndbrequire(result != CheckNodeGroups::Lose);
+ if (now < partitioned_timeout && result != CheckNodeGroups::Win)
+ {
+ signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5;
+ signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
+ report_mask.assign(c_definedNodes);
+ report_mask.bitANDC(c_start.m_starting_nodes);
+ retVal = 0;
+ goto start_report;
+ }
+ // Fall through...
+ case CheckNodeGroups::Win:
signal->theData[1] =
- all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
+ all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 1;
goto start_report;
}
}
-
- if (now < partial_timeout)
- {
- jam();
- signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3;
- signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
- report_mask.assign(wait);
- retVal = 0;
- goto start_report;
- }
-
- /**
- * Start partial has passed...check for partitioning...
- */
- switch(result_w_log){
- case CheckNodeGroups::Lose:
- jam();
- goto missing_nodegroup;
- case CheckNodeGroups::Partitioning:
- if (now < partitioned_timeout && result != CheckNodeGroups::Win)
- {
- signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5;
- signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
- report_mask.assign(c_definedNodes);
- report_mask.bitANDC(c_start.m_starting_nodes);
- retVal = 0;
- goto start_report;
- }
- // Fall through...
- case CheckNodeGroups::Win:
- signal->theData[1] =
- all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
- report_mask.assign(c_definedNodes);
- report_mask.bitANDC(c_start.m_starting_nodes);
- retVal = 1;
- goto start_report;
- }
-
ndbrequire(false);
start_report:
diff --git a/ndb/src/mgmclient/CommandInterpreter.cpp b/ndb/src/mgmclient/CommandInterpreter.cpp
index 74d7f879f9c..39c84fd8055 100644
--- a/ndb/src/mgmclient/CommandInterpreter.cpp
+++ b/ndb/src/mgmclient/CommandInterpreter.cpp
@@ -25,6 +25,7 @@
#endif
#include <mgmapi.h>
+#include <util/BaseString.hpp>
class MgmtSrvr;
@@ -70,6 +71,9 @@ private:
*/
void analyseAfterFirstToken(int processId, char* allAfterFirstTokenCstr);
+ void executeCommand(Vector<BaseString> &command_list,
+ unsigned command_pos,
+ int *node_ids, int no_of_nodes);
/**
* Parse the block specification part of the LOG* commands,
* things after LOG*: [BLOCK = {ALL|<blockName>+}]
@@ -104,10 +108,14 @@ private:
public:
void executeStop(int processId, const char* parameters, bool all);
+ void executeStop(Vector<BaseString> &command_list, unsigned command_pos,
+ int *node_ids, int no_of_nodes);
void executeEnterSingleUser(char* parameters);
void executeExitSingleUser(char* parameters);
void executeStart(int processId, const char* parameters, bool all);
void executeRestart(int processId, const char* parameters, bool all);
+ void executeRestart(Vector<BaseString> &command_list, unsigned command_pos,
+ int *node_ids, int no_of_nodes);
void executeLogLevel(int processId, const char* parameters, bool all);
void executeError(int processId, const char* parameters, bool all);
void executeLog(int processId, const char* parameters, bool all);
@@ -643,9 +651,16 @@ CommandInterpreter::execute_impl(const char *_line)
}
} while (do_continue);
// if there is anything in the line proceed
+ Vector<BaseString> command_list;
+ {
+ BaseString tmp(line);
+ tmp.split(command_list);
+ for (unsigned i= 0; i < command_list.size();)
+ command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
+ }
char* firstToken = strtok(line, " ");
char* allAfterFirstToken = strtok(NULL, "");
-
+
if (strcasecmp(firstToken, "HELP") == 0 ||
strcasecmp(firstToken, "?") == 0) {
executeHelp(allAfterFirstToken);
@@ -723,22 +738,45 @@ CommandInterpreter::execute_impl(const char *_line)
analyseAfterFirstToken(-1, allAfterFirstToken);
} else {
/**
- * First token should be a digit, node ID
+ * First tokens should be digits, node ID's
*/
- int nodeId;
-
- if (! convert(firstToken, nodeId)) {
+ int node_ids[MAX_NODES];
+ unsigned pos;
+ for (pos= 0; pos < command_list.size(); pos++)
+ {
+ int node_id;
+ if (convert(command_list[pos].c_str(), node_id))
+ {
+ if (node_id <= 0) {
+ ndbout << "Invalid node ID: " << command_list[pos].c_str()
+ << "." << endl;
+ DBUG_RETURN(true);
+ }
+ node_ids[pos]= node_id;
+ continue;
+ }
+ break;
+ }
+ int no_of_nodes= pos;
+ if (no_of_nodes == 0)
+ {
+ /* No digit found */
invalid_command(_line);
DBUG_RETURN(true);
}
-
- if (nodeId <= 0) {
- ndbout << "Invalid node ID: " << firstToken << "." << endl;
+ if (pos == command_list.size())
+ {
+ /* No command found */
+ invalid_command(_line);
DBUG_RETURN(true);
}
-
- analyseAfterFirstToken(nodeId, allAfterFirstToken);
-
+ if (no_of_nodes == 1)
+ {
+ analyseAfterFirstToken(node_ids[0], allAfterFirstToken);
+ DBUG_RETURN(true);
+ }
+ executeCommand(command_list, pos, node_ids, no_of_nodes);
+ DBUG_RETURN(true);
}
DBUG_RETURN(true);
}
@@ -808,6 +846,27 @@ CommandInterpreter::analyseAfterFirstToken(int processId,
ndbout << endl;
}
+void
+CommandInterpreter::executeCommand(Vector<BaseString> &command_list,
+ unsigned command_pos,
+ int *node_ids, int no_of_nodes)
+{
+ const char *cmd= command_list[command_pos].c_str();
+ if (strcasecmp("STOP", cmd) == 0)
+ {
+ executeStop(command_list, command_pos+1, node_ids, no_of_nodes);
+ return;
+ }
+ if (strcasecmp("RESTART", cmd) == 0)
+ {
+ executeRestart(command_list, command_pos+1, node_ids, no_of_nodes);
+ return;
+ }
+ ndbout_c("Invalid command: '%s' after multi node id list. "
+ "Expected STOP or RESTART.", cmd);
+ return;
+}
+
/**
* Get next nodeid larger than the give node_id. node_id will be
* set to the next node_id in the list. node_id should be set
@@ -1400,24 +1459,60 @@ CommandInterpreter::executeClusterLog(char* parameters)
//*****************************************************************************
void
-CommandInterpreter::executeStop(int processId, const char *, bool all)
+CommandInterpreter::executeStop(int processId, const char *parameters,
+ bool all)
{
- int result = 0;
- if(all) {
- result = ndb_mgm_stop(m_mgmsrv, 0, 0);
- } else {
- result = ndb_mgm_stop(m_mgmsrv, 1, &processId);
+ Vector<BaseString> command_list;
+ if (parameters)
+ {
+ BaseString tmp(parameters);
+ tmp.split(command_list);
+ for (unsigned i= 0; i < command_list.size();)
+ command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
}
- if (result < 0) {
- ndbout << "Shutdown failed." << endl;
+ if (all)
+ executeStop(command_list, 0, 0, 0);
+ else
+ executeStop(command_list, 0, &processId, 1);
+}
+
+void
+CommandInterpreter::executeStop(Vector<BaseString> &command_list,
+ unsigned command_pos,
+ int *node_ids, int no_of_nodes)
+{
+ int abort= 0;
+ for (; command_pos < command_list.size(); command_pos++)
+ {
+ const char *item= command_list[command_pos].c_str();
+ if (strcasecmp(item, "-A") == 0)
+ {
+ abort= 1;
+ continue;
+ }
+ ndbout_c("Invalid option: %s. Expecting -A after STOP",
+ item);
+ return;
+ }
+
+ int result= ndb_mgm_stop2(m_mgmsrv, no_of_nodes, node_ids, abort);
+ if (result < 0)
+ {
+ ndbout_c("Shutdown failed.");
printError();
- } else
+ }
+ else
+ {
+ if (node_ids == 0)
+ ndbout_c("NDB Cluster has shutdown.");
+ else
{
- if(all)
- ndbout << "NDB Cluster has shutdown." << endl;
- else
- ndbout << "Node " << processId << " has shutdown." << endl;
+ ndbout << "Node";
+ for (int i= 0; i < no_of_nodes; i++)
+ ndbout << " " << node_ids[i];
+ ndbout_c(" has shutdown.");
}
+ }
}
void
@@ -1483,47 +1578,74 @@ CommandInterpreter::executeStart(int processId, const char* parameters,
void
CommandInterpreter::executeRestart(int processId, const char* parameters,
- bool all)
+ bool all)
+{
+ Vector<BaseString> command_list;
+ if (parameters)
+ {
+ BaseString tmp(parameters);
+ tmp.split(command_list);
+ for (unsigned i= 0; i < command_list.size();)
+ command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
+ }
+ if (all)
+ executeRestart(command_list, 0, 0, 0);
+ else
+ executeRestart(command_list, 0, &processId, 1);
+}
+
+void
+CommandInterpreter::executeRestart(Vector<BaseString> &command_list,
+ unsigned command_pos,
+ int *node_ids, int no_of_nodes)
{
int result;
- int nostart = 0;
- int initialstart = 0;
- int abort = 0;
-
- if(parameters != 0 && strlen(parameters) != 0){
- char * tmpString = my_strdup(parameters,MYF(MY_WME));
- My_auto_ptr<char> ap1(tmpString);
- char * tmpPtr = 0;
- char * item = strtok_r(tmpString, " ", &tmpPtr);
- while(item != NULL){
- if(strcasecmp(item, "-N") == 0)
- nostart = 1;
- if(strcasecmp(item, "-I") == 0)
- initialstart = 1;
- if(strcasecmp(item, "-A") == 0)
- abort = 1;
- item = strtok_r(NULL, " ", &tmpPtr);
+ int nostart= 0;
+ int initialstart= 0;
+ int abort= 0;
+
+ for (; command_pos < command_list.size(); command_pos++)
+ {
+ const char *item= command_list[command_pos].c_str();
+ if (strcasecmp(item, "-N") == 0)
+ {
+ nostart= 1;
+ continue;
+ }
+ if (strcasecmp(item, "-I") == 0)
+ {
+ initialstart= 1;
+ continue;
}
+ if (strcasecmp(item, "-A") == 0)
+ {
+ abort= 1;
+ continue;
+ }
+ ndbout_c("Invalid option: %s. Expecting -A,-N or -I after RESTART",
+ item);
+ return;
}
- if(all) {
- result = ndb_mgm_restart2(m_mgmsrv, 0, NULL, initialstart, nostart, abort);
- } else {
- int v[1];
- v[0] = processId;
- result = ndb_mgm_restart2(m_mgmsrv, 1, v, initialstart, nostart, abort);
- }
+ result= ndb_mgm_restart2(m_mgmsrv, no_of_nodes, node_ids,
+ initialstart, nostart, abort);
if (result <= 0) {
- ndbout.println("Restart failed.", result);
+ ndbout_c("Restart failed.");
printError();
- } else
+ }
+ else
+ {
+ if (node_ids == 0)
+ ndbout_c("NDB Cluster is being restarted.");
+ else
{
- if(all)
- ndbout << "NDB Cluster is being restarted." << endl;
- else
- ndbout_c("Node %d is being restarted.", processId);
+ ndbout << "Node";
+ for (int i= 0; i < no_of_nodes; i++)
+ ndbout << " " << node_ids[i];
+ ndbout_c(" is being restarted");
}
+ }
}
void
diff --git a/ndb/src/mgmsrv/MgmtSrvr.cpp b/ndb/src/mgmsrv/MgmtSrvr.cpp
index 76a045bc806..9b518ba938b 100644
--- a/ndb/src/mgmsrv/MgmtSrvr.cpp
+++ b/ndb/src/mgmsrv/MgmtSrvr.cpp
@@ -294,6 +294,8 @@ static ErrorItem errorTable[] =
{MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" },
{MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH,
"Node shutdown would cause system crash" },
+ {MgmtSrvr::UNSUPPORTED_NODE_SHUTDOWN,
+ "Unsupported multi node shutdown. Abort option required." },
{MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." },
{MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP,
"Operation not allowed while nodes are starting or stopping."},
@@ -312,6 +314,9 @@ int MgmtSrvr::translateStopRef(Uint32 errCode)
case StopRef::NodeShutdownWouldCauseSystemCrash:
return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
break;
+ case StopRef::UnsupportedNodeShutdown:
+ return UNSUPPORTED_NODE_SHUTDOWN;
+ break;
}
return 4999;
}
@@ -386,8 +391,9 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_server,
_ownReference(0),
theSignalIdleList(NULL),
theWaitState(WAIT_SUBSCRIBE_CONF),
+ m_local_mgm_handle(0),
m_event_listner(this),
- m_local_mgm_handle(0)
+ m_master_node(0)
{
DBUG_ENTER("MgmtSrvr::MgmtSrvr");
@@ -677,23 +683,16 @@ MgmtSrvr::~MgmtSrvr()
int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
{
- if(nodeId == 0)
- return 0;
-
- if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
+ if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
return WRONG_PROCESS_TYPE;
-
// Check if we have contact with it
if(unCond){
if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected)
return 0;
- return NO_CONTACT_WITH_PROCESS;
}
- if (theFacade->get_node_alive(nodeId) == 0) {
- return NO_CONTACT_WITH_PROCESS;
- } else {
+ else if (theFacade->get_node_alive(nodeId) == true)
return 0;
- }
+ return NO_CONTACT_WITH_PROCESS;
}
void report_unknown_signal(SimpleSignal *signal)
@@ -935,7 +934,7 @@ int MgmtSrvr::sendStopMgmd(NodeId nodeId,
* distributed communication up.
*/
-int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
+int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
NodeBitmask &stoppedNodes,
Uint32 singleUserNodeId,
bool abort,
@@ -945,6 +944,12 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
bool initialStart)
{
int error = 0;
+ DBUG_ENTER("MgmtSrvr::sendSTOP_REQ");
+ DBUG_PRINT("enter", ("no of nodes: %d singleUseNodeId: %d "
+ "abort: %d stop: %d restart: %d "
+ "nostart: %d initialStart: %d",
+ node_ids.size(), singleUserNodeId,
+ abort, stop, restart, nostart, initialStart));
stoppedNodes.clear();
@@ -982,36 +987,49 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
// send the signals
NodeBitmask nodes;
- if (nodeId)
+ NodeId nodeId= 0;
+ int use_master_node= 0;
+ int do_send= 0;
+ int do_stop_self= 0;
+ NdbNodeBitmask nodes_to_stop;
{
- if(nodeId==getOwnNodeId())
- {
- if(restart)
- g_RestartServer= true;
- g_StopServer= true;
- return 0;
- }
- if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
+ for (unsigned i= 0; i < node_ids.size(); i++)
{
- int r;
- if((r= okToSendTo(nodeId, true)) != 0)
- return r;
- if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
- return SEND_OR_RECEIVE_FAILED;
+ nodeId= node_ids[i];
+ if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_MGM)
+ nodes_to_stop.set(nodeId);
+ else if (nodeId != getOwnNodeId())
+ {
+ error= sendStopMgmd(nodeId, abort, stop, restart,
+ nostart, initialStart);
+ if (error == 0)
+ stoppedNodes.set(nodeId);
+ }
+ else
+ do_stop_self= 1;;
}
- else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
+ }
+ int no_of_nodes_to_stop= nodes_to_stop.count();
+ if (node_ids.size())
+ {
+ if (no_of_nodes_to_stop)
{
- error= sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart);
- if(error==0)
- stoppedNodes.set(nodeId);
- return error;
+ do_send= 1;
+ if (no_of_nodes_to_stop == 1)
+ {
+ nodeId= nodes_to_stop.find(0);
+ }
+ else // multi node stop, send to master
+ {
+ use_master_node= 1;
+ nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes);
+ StopReq::setStopNodes(stopReq->requestInfo, 1);
+ }
}
- else
- return WRONG_PROCESS_TYPE;
- nodes.set(nodeId);
}
else
{
+ nodeId= 0;
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
{
if(okToSendTo(nodeId, true) == 0)
@@ -1032,8 +1050,30 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
}
// now wait for the replies
- while (!nodes.isclear())
+ while (!nodes.isclear() || do_send)
{
+ if (do_send)
+ {
+ int r;
+ assert(nodes.count() == 0);
+ if (use_master_node)
+ nodeId= m_master_node;
+ if ((r= okToSendTo(nodeId, true)) != 0)
+ {
+ bool next;
+ if (!use_master_node)
+ DBUG_RETURN(r);
+ m_master_node= nodeId= 0;
+ while((next= getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
+ (r= okToSendTo(nodeId, true)) != 0);
+ if (!next)
+ DBUG_RETURN(NO_CONTACT_WITH_DB_NODES);
+ }
+ if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
+ DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
+ nodes.set(nodeId);
+ do_send= 0;
+ }
SimpleSignal *signal = ss.waitFor();
int gsn = signal->readSignalNumber();
switch (gsn) {
@@ -1045,6 +1085,13 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
#endif
assert(nodes.get(nodeId));
nodes.clear(nodeId);
+ if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster)
+ {
+ assert(use_master_node);
+ m_master_node= ref->masterNodeId;
+ do_send= 1;
+ continue;
+ }
error = translateStopRef(ref->errorCode);
break;
}
@@ -1055,9 +1102,16 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
ndbout_c("Node %d single user mode", nodeId);
#endif
assert(nodes.get(nodeId));
- assert(singleUserNodeId != 0);
+ if (singleUserNodeId != 0)
+ {
+ stoppedNodes.set(nodeId);
+ }
+ else
+ {
+ assert(no_of_nodes_to_stop > 1);
+ stoppedNodes.bitOR(nodes_to_stop);
+ }
nodes.clear(nodeId);
- stoppedNodes.set(nodeId);
break;
}
case GSN_NF_COMPLETEREP:{
@@ -1096,17 +1150,24 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
#ifdef VM_TRACE
ndbout_c("Unknown signal %d", gsn);
#endif
- return SEND_OR_RECEIVE_FAILED;
+ DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
}
}
- return error;
+ if (!error && do_stop_self)
+ {
+ if (restart)
+ g_RestartServer= true;
+ g_StopServer= true;
+ }
+ DBUG_RETURN(error);
}
/*
- * Stop one node
+ * Stop one nodes
*/
-int MgmtSrvr::stopNode(int nodeId, bool abort)
+int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
+ int *stopCount, bool abort)
{
if (!abort)
{
@@ -1121,14 +1182,17 @@ int MgmtSrvr::stopNode(int nodeId, bool abort)
}
}
NodeBitmask nodes;
- return sendSTOP_REQ(nodeId,
- nodes,
- 0,
- abort,
- false,
- false,
- false,
- false);
+ int ret= sendSTOP_REQ(node_ids,
+ nodes,
+ 0,
+ abort,
+ false,
+ false,
+ false,
+ false);
+ if (stopCount)
+ *stopCount= nodes.count();
+ return ret;
}
/*
@@ -1138,7 +1202,8 @@ int MgmtSrvr::stopNode(int nodeId, bool abort)
int MgmtSrvr::stop(int * stopCount, bool abort)
{
NodeBitmask nodes;
- int ret = sendSTOP_REQ(0,
+ Vector<NodeId> node_ids;
+ int ret = sendSTOP_REQ(node_ids,
nodes,
0,
abort,
@@ -1169,7 +1234,8 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
return OPERATION_NOT_ALLOWED_START_STOP;
}
NodeBitmask nodes;
- int ret = sendSTOP_REQ(0,
+ Vector<NodeId> node_ids;
+ int ret = sendSTOP_REQ(node_ids,
nodes,
singleUserNodeId,
false,
@@ -1186,18 +1252,22 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
* Perform node restart
*/
-int MgmtSrvr::restartNode(int nodeId, bool nostart, bool initialStart,
- bool abort)
+int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
+ int * stopCount, bool nostart,
+ bool initialStart, bool abort)
{
NodeBitmask nodes;
- return sendSTOP_REQ(nodeId,
- nodes,
- 0,
- abort,
- false,
- true,
- nostart,
- initialStart);
+ int ret= sendSTOP_REQ(node_ids,
+ nodes,
+ 0,
+ abort,
+ false,
+ true,
+ nostart,
+ initialStart);
+ if (stopCount)
+ *stopCount = nodes.count();
+ return ret;
}
/*
@@ -1208,7 +1278,8 @@ int MgmtSrvr::restart(bool nostart, bool initialStart,
bool abort, int * stopCount )
{
NodeBitmask nodes;
- int ret = sendSTOP_REQ(0,
+ Vector<NodeId> node_ids;
+ int ret = sendSTOP_REQ(node_ids,
nodes,
0,
abort,
@@ -2135,12 +2206,16 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
SignalSender ss(theFacade);
ss.lock(); // lock will be released on exit
- bool next;
- NodeId nodeId = 0;
- while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
- theFacade->get_node_alive(nodeId) == false);
-
- if(!next) return NO_CONTACT_WITH_DB_NODES;
+ NodeId nodeId = m_master_node;
+ if (okToSendTo(nodeId, false) != 0)
+ {
+ bool next;
+ nodeId = m_master_node = 0;
+ while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
+ okToSendTo(nodeId, false) != 0);
+ if(!next)
+ return NO_CONTACT_WITH_DB_NODES;
+ }
SimpleSignal ssig;
BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend());
@@ -2208,7 +2283,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
const BackupRef * const ref =
CAST_CONSTPTR(BackupRef, signal->getDataPtr());
if(ref->errorCode == BackupRef::IAmNotMaster){
- nodeId = refToNode(ref->masterRef);
+ m_master_node = nodeId = refToNode(ref->masterRef);
#ifdef VM_TRACE
ndbout_c("I'm not master resending to %d", nodeId);
#endif
diff --git a/ndb/src/mgmsrv/MgmtSrvr.hpp b/ndb/src/mgmsrv/MgmtSrvr.hpp
index 46bdb112cb9..fe1603a1953 100644
--- a/ndb/src/mgmsrv/MgmtSrvr.hpp
+++ b/ndb/src/mgmsrv/MgmtSrvr.hpp
@@ -176,6 +176,7 @@ public:
STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 );
STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 );
+ STATIC_CONST( UNSUPPORTED_NODE_SHUTDOWN = 5031 );
STATIC_CONST( NODE_NOT_API_NODE = 5062 );
STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 );
@@ -252,7 +253,7 @@ public:
* @param processId: Id of the DB process to stop
* @return 0 if succeeded, otherwise: as stated above, plus:
*/
- int stopNode(int nodeId, bool abort = false);
+ int stopNodes(const Vector<NodeId> &node_ids, int *stopCount, bool abort);
/**
* Stop the system
@@ -286,11 +287,12 @@ public:
int start(int processId);
/**
- * Restart a node
+ * Restart nodes
* @param processId: Id of the DB process to start
*/
- int restartNode(int processId, bool nostart, bool initialStart,
- bool abort = false);
+ int restartNodes(const Vector<NodeId> &node_ids,
+ int *stopCount, bool nostart,
+ bool initialStart, bool abort);
/**
* Restart the system
@@ -494,7 +496,7 @@ private:
bool nostart,
bool initialStart);
- int sendSTOP_REQ(NodeId nodeId,
+ int sendSTOP_REQ(const Vector<NodeId> &node_ids,
NodeBitmask &stoppedNodes,
Uint32 singleUserNodeId,
bool abort,
@@ -653,6 +655,8 @@ private:
friend class Ndb_mgmd_event_service;
Ndb_mgmd_event_service m_event_listner;
+ NodeId m_master_node;
+
/**
* Handles the thread wich upon a 'Node is started' event will
* set the node's previous loglevel settings.
diff --git a/ndb/src/mgmsrv/Services.cpp b/ndb/src/mgmsrv/Services.cpp
index 3564c5c40ba..a80827abd8f 100644
--- a/ndb/src/mgmsrv/Services.cpp
+++ b/ndb/src/mgmsrv/Services.cpp
@@ -866,14 +866,11 @@ MgmApiSession::restart(Parser<MgmApiSession>::Context &,
}
int restarted = 0;
- int result = 0;
-
- for(size_t i = 0; i < nodes.size(); i++)
- if((result = m_mgmsrv.restartNode(nodes[i],
- nostart != 0,
- initialstart != 0,
- abort != 0)) == 0)
- restarted++;
+ int result= m_mgmsrv.restartNodes(nodes,
+ &restarted,
+ nostart != 0,
+ initialstart != 0,
+ abort != 0);
m_output->println("restart reply");
if(result != 0){
@@ -998,7 +995,12 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
args.get("node", (const char **)&nodes_str);
if(nodes_str == NULL)
+ {
+ m_output->println("stop reply");
+ m_output->println("result: empty node list");
+ m_output->println("");
return;
+ }
args.get("abort", &abort);
char *p, *last;
@@ -1008,29 +1010,10 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
nodes.push_back(atoi(p));
}
- int stop_self= 0;
- size_t i;
-
- for(i=0; i < nodes.size(); i++) {
- if (nodes[i] == m_mgmsrv.getOwnNodeId()) {
- stop_self= 1;
- if (i != nodes.size()-1) {
- m_output->println("stop reply");
- m_output->println("result: server must be stopped last");
- m_output->println("");
- return;
- }
- }
- }
-
- int stopped = 0, result = 0;
-
- for(i=0; i < nodes.size(); i++)
- if (nodes[i] != m_mgmsrv.getOwnNodeId()) {
- if((result = m_mgmsrv.stopNode(nodes[i], abort != 0)) == 0)
- stopped++;
- } else
- stopped++;
+ int stopped= 0;
+ int result= 0;
+ if (nodes.size())
+ result= m_mgmsrv.stopNodes(nodes, &stopped, abort != 0);
m_output->println("stop reply");
if(result != 0)
@@ -1039,9 +1022,6 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
m_output->println("result: Ok");
m_output->println("stopped: %d", stopped);
m_output->println("");
-
- if (stop_self)
- g_StopServer= true;
}