diff options
author | unknown <jonas@perch.ndb.mysql.com> | 2006-04-08 11:12:02 +0200 |
---|---|---|
committer | unknown <jonas@perch.ndb.mysql.com> | 2006-04-08 11:12:02 +0200 |
commit | 2e385bcbab0d7408927c2127779a12c7de91ae70 (patch) | |
tree | a677309c2f6687d83c379fbb015aafacf1e1928d /storage | |
parent | 0157830e9419aec3bf95ade4a9a5e251b99b660b (diff) | |
parent | bb074466c7f750601c7ba861f4b264f1075ccfd3 (diff) | |
download | mariadb-git-2e385bcbab0d7408927c2127779a12c7de91ae70.tar.gz |
Merge perch.ndb.mysql.com:/home/jonas/src/51-work
into perch.ndb.mysql.com:/home/jonas/src/mysql-5.1-new
libmysql/libmysql.c:
Auto merged
mysql-test/mysql-test-run.pl:
Auto merged
sql-common/client.c:
Auto merged
Diffstat (limited to 'storage')
24 files changed, 2078 insertions, 401 deletions
diff --git a/storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp b/storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp index f33c991249f..ab51ed17bc3 100644 --- a/storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp +++ b/storage/ndb/include/kernel/signaldata/CmRegSignalData.hpp @@ -30,12 +30,17 @@ class CmRegReq { friend class Qmgr; public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 5 + NdbNodeBitmask::Size ); private: Uint32 blockRef; Uint32 nodeId; - Uint32 version; // See ndb_version.h + Uint32 version; // See ndb_version.h + + Uint32 start_type; // As specified by cmd-line or mgm, NodeState::StartType + Uint32 latest_gci; // 0 means no fs + Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ + // to be part of restart }; /** @@ -59,8 +64,7 @@ private: * The dynamic id that the node reciving this signal has */ Uint32 dynamicId; - - Uint32 allNdbNodes[NdbNodeBitmask::Size]; + Uint32 allNdbNodes[NdbNodeBitmask::Size]; }; /** @@ -73,7 +77,7 @@ class CmRegRef { friend class Qmgr; public: - STATIC_CONST( SignalLength = 4 ); + STATIC_CONST( SignalLength = 7 + NdbNodeBitmask::Size ); enum ErrorCode { ZBUSY = 0, /* Only the president can send this */ @@ -85,14 +89,27 @@ public: * as president. */ ZNOT_PRESIDENT = 5, /* We are not president */ ZNOT_DEAD = 6, /* We are not dead when we are starting */ - ZINCOMPATIBLE_VERSION = 7 + ZINCOMPATIBLE_VERSION = 7, + ZINCOMPATIBLE_START_TYPE = 8 }; private: Uint32 blockRef; Uint32 nodeId; Uint32 errorCode; + /** + * Applicable if ZELECTION + */ Uint32 presidentCandidate; + Uint32 candidate_latest_gci; // 0 means non + + /** + * Data for sending node sending node + */ + Uint32 latest_gci; + Uint32 start_type; + Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_ + // to be part of restart }; class CmAdd { diff --git a/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp b/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp index ef165b6c16f..3e3d926a999 100644 --- a/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp +++ b/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp @@ -64,6 +64,7 @@ public: // 19 NDBFS Fipple with O_SYNC, O_CREATE etc. // 20-24 BACKUP NdbcntrTestStopOnError = 25, + NdbcntrStopNodes = 70, // 100-105 TUP and ACC // 200-240 UTIL // 300-305 TRIX diff --git a/storage/ndb/include/kernel/signaldata/FailRep.hpp b/storage/ndb/include/kernel/signaldata/FailRep.hpp index 44577f07fdc..f2250f1af73 100644 --- a/storage/ndb/include/kernel/signaldata/FailRep.hpp +++ b/storage/ndb/include/kernel/signaldata/FailRep.hpp @@ -18,6 +18,7 @@ #define FAIL_REP_HPP #include "SignalData.hpp" +#include <NodeBitmask.hpp> /** * @@ -27,6 +28,7 @@ class FailRep { * Sender(s) & Reciver(s) */ friend class Qmgr; + friend class Ndbcntr; /** * For printing @@ -35,7 +37,8 @@ class FailRep { public: STATIC_CONST( SignalLength = 2 ); - + STATIC_CONST( ExtraLength = 1 + NdbNodeBitmask::Size ); + enum FailCause { ZOWN_FAILURE=0, ZOTHER_NODE_WHEN_WE_START=1, @@ -43,13 +46,20 @@ public: ZSTART_IN_REGREQ=3, ZHEARTBEAT_FAILURE=4, ZLINK_FAILURE=5, - ZOTHERNODE_FAILED_DURING_START=6 + ZOTHERNODE_FAILED_DURING_START=6, + ZMULTI_NODE_SHUTDOWN = 7, + ZPARTITIONED_CLUSTER = 8 }; - + private: Uint32 failNodeId; Uint32 failCause; + /** + * Used when failCause == ZPARTITIONED_CLUSTER + */ + Uint32 president; + Uint32 partition[NdbNodeBitmask::Size]; }; diff --git a/storage/ndb/include/kernel/signaldata/StopReq.hpp b/storage/ndb/include/kernel/signaldata/StopReq.hpp index 8e6a0b90a91..70e195961ce 100644 --- a/storage/ndb/include/kernel/signaldata/StopReq.hpp +++ b/storage/ndb/include/kernel/signaldata/StopReq.hpp @@ -32,7 +32,7 @@ class StopReq friend class MgmtSrvr; public: - STATIC_CONST( SignalLength = 9 ); + STATIC_CONST( SignalLength = 9 + NdbNodeBitmask::Size); public: Uint32 senderRef; @@ -49,29 +49,34 @@ public: Int32 readOperationTimeout; // Timeout before read operations are aborted Int32 operationTimeout; // Timeout before all operations are aborted + Uint32 nodes[NdbNodeBitmask::Size]; + static void setSystemStop(Uint32 & requestInfo, bool value); static void setPerformRestart(Uint32 & requestInfo, bool value); static void setNoStart(Uint32 & requestInfo, bool value); static void setInitialStart(Uint32 & requestInfo, bool value); - static void setEscalateOnNodeFail(Uint32 & requestInfo, bool value); /** * Don't perform "graceful" shutdown/restart... */ static void setStopAbort(Uint32 & requestInfo, bool value); + static void setStopNodes(Uint32 & requestInfo, bool value); static bool getSystemStop(const Uint32 & requestInfo); static bool getPerformRestart(const Uint32 & requestInfo); static bool getNoStart(const Uint32 & requestInfo); static bool getInitialStart(const Uint32 & requestInfo); - static bool getEscalateOnNodeFail(const Uint32 & requestInfo); static bool getStopAbort(const Uint32 & requestInfo); + static bool getStopNodes(const Uint32 & requestInfo); }; struct StopConf { STATIC_CONST( SignalLength = 2 ); Uint32 senderData; - Uint32 nodeState; + union { + Uint32 nodeState; + Uint32 nodeId; + }; }; class StopRef @@ -87,19 +92,22 @@ class StopRef friend class Ndbcntr; public: - STATIC_CONST( SignalLength = 2 ); + STATIC_CONST( SignalLength = 3 ); enum ErrorCode { OK = 0, NodeShutdownInProgress = 1, SystemShutdownInProgress = 2, NodeShutdownWouldCauseSystemCrash = 3, - TransactionAbortFailed = 4 + TransactionAbortFailed = 4, + UnsupportedNodeShutdown = 5, + MultiNodeShutdownNotMaster = 6 }; public: Uint32 senderData; Uint32 errorCode; + Uint32 masterNodeId; }; inline @@ -132,16 +140,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo) inline bool -StopReq::getEscalateOnNodeFail(const Uint32 & requestInfo) +StopReq::getStopAbort(const Uint32 & requestInfo) { - return requestInfo & 16; + return requestInfo & 32; } inline bool -StopReq::getStopAbort(const Uint32 & requestInfo) +StopReq::getStopNodes(const Uint32 & requestInfo) { - return requestInfo & 32; + return requestInfo & 64; } @@ -187,24 +195,23 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value) inline void -StopReq::setEscalateOnNodeFail(Uint32 & requestInfo, bool value) +StopReq::setStopAbort(Uint32 & requestInfo, bool value) { if(value) - requestInfo |= 16; + requestInfo |= 32; else - requestInfo &= ~16; + requestInfo &= ~32; } inline void -StopReq::setStopAbort(Uint32 & requestInfo, bool value) +StopReq::setStopNodes(Uint32 & requestInfo, bool value) { if(value) - requestInfo |= 32; + requestInfo |= 64; else - requestInfo &= ~32; + requestInfo &= ~64; } - #endif diff --git a/storage/ndb/include/kernel/signaldata/WaitGCP.hpp b/storage/ndb/include/kernel/signaldata/WaitGCP.hpp index ebed28714d2..be2a5b9d5f0 100644 --- a/storage/ndb/include/kernel/signaldata/WaitGCP.hpp +++ b/storage/ndb/include/kernel/signaldata/WaitGCP.hpp @@ -46,7 +46,9 @@ public: Complete = 1, ///< Wait for a GCP to complete CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed CompleteIfRunning = 3, ///< Wait for ongoing GCP - CurrentGCI = 8 ///< Immediately return current GCI + CurrentGCI = 8, ///< Immediately return current GCI + BlockStartGcp = 9, + UnblockStartGcp = 10 }; Uint32 senderRef; @@ -70,11 +72,12 @@ class WaitGCPConf { //friend class Grep::PSCoord; public: - STATIC_CONST( SignalLength = 2 ); + STATIC_CONST( SignalLength = 3 ); public: Uint32 senderData; Uint32 gcp; + Uint32 blockStatus; }; class WaitGCPRef { diff --git a/storage/ndb/include/mgmapi/ndb_logevent.h b/storage/ndb/include/mgmapi/ndb_logevent.h index df4c228ec8a..73dd192e305 100644 --- a/storage/ndb/include/mgmapi/ndb_logevent.h +++ b/storage/ndb/include/mgmapi/ndb_logevent.h @@ -169,9 +169,13 @@ extern "C" { NDB_LE_BackupAborted = 57, /** NDB_MGM_EVENT_CATEGORY_INFO */ - NDB_LE_EventBufferStatus = 58 + NDB_LE_EventBufferStatus = 58, /* 59 used */ + + /** NDB_MGM_EVENT_CATEGORY_STARTUP */ + NDB_LE_StartReport = 60 + /* 60 unused */ /* 61 unused */ /* 62 unused */ @@ -637,6 +641,13 @@ extern "C" { unsigned type; unsigned node_id; } SingleUser; + /** Log even data @ref NDB_LE_StartReport */ + struct { + unsigned report_type; + unsigned remaining_time; + unsigned bitmask_size; + unsigned bitmask_data[1]; + } StartReport; #ifndef DOXYGEN_FIX }; #else diff --git a/storage/ndb/src/common/debugger/EventLogger.cpp b/storage/ndb/src/common/debugger/EventLogger.cpp index cfa06e96085..db9b8417c0d 100644 --- a/storage/ndb/src/common/debugger/EventLogger.cpp +++ b/storage/ndb/src/common/debugger/EventLogger.cpp @@ -743,6 +743,90 @@ void getTextSingleUser(QQQQ) { } } +void getTextStartReport(QQQQ) { + Uint32 time = theData[2]; + Uint32 sz = theData[3]; + char mask1[100]; + char mask2[100]; + char mask3[100]; + char mask4[100]; + BitmaskImpl::getText(sz, theData + 4 + (0 * sz), mask1); + BitmaskImpl::getText(sz, theData + 4 + (1 * sz), mask2); + BitmaskImpl::getText(sz, theData + 4 + (2 * sz), mask3); + BitmaskImpl::getText(sz, theData + 4 + (3 * sz), mask4); + switch(theData[1]){ + case 1: // Wait initial + BaseString::snprintf + (m_text, m_text_len, + "Initial start, waiting for %s to connect, " + " nodes [ all: %s connected: %s no-wait: %s ]", + mask4, mask1, mask2, mask3); + break; + case 2: // Wait partial + BaseString::snprintf + (m_text, m_text_len, + "Waiting until nodes: %s connects, " + "nodes [ all: %s connected: %s no-wait: %s ]", + mask4, mask1, mask2, mask3); + break; + case 3: // Wait partial timeout + BaseString::snprintf + (m_text, m_text_len, + "Waiting %u sec for nodes %s to connect, " + "nodes [ all: %s connected: %s no-wait: %s ]", + + time, mask4, mask1, mask2, mask3); + break; + case 4: // Wait partioned + BaseString::snprintf + (m_text, m_text_len, + "Waiting for non partitioned start, " + "nodes [ all: %s connected: %s missing: %s no-wait: %s ]", + + mask1, mask2, mask4, mask3); + break; + case 5: + BaseString::snprintf + (m_text, m_text_len, + "Waiting %u sec for non partitioned start, " + "nodes [ all: %s connected: %s missing: %s no-wait: %s ]", + + time, mask1, mask2, mask4, mask3); + break; + case 0x8000: // Do initial + BaseString::snprintf + (m_text, m_text_len, + "Initial start with nodes %s [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + case 0x8001: // Do start + BaseString::snprintf + (m_text, m_text_len, + "Start with all nodes %s", + mask2); + break; + case 0x8002: // Do partial + BaseString::snprintf + (m_text, m_text_len, + "Start with nodes %s [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + case 0x8003: // Do partioned + BaseString::snprintf + (m_text, m_text_len, + "Start potentially partitioned with nodes %s " + " [ missing: %s no-wait: %s ]", + mask2, mask4, mask3); + break; + default: + BaseString::snprintf + (m_text, m_text_len, + "Unknown startreport: 0x%x [ %s %s %s %s ]", + theData[1], + mask1, mask2, mask3, mask4); + } +} + #if 0 BaseString::snprintf(m_text, m_text_len, @@ -791,6 +875,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = { ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(UNDORecordsExecuted, LogLevel::llStartUp, 15, Logger::LL_INFO ), + ROW(StartReport, LogLevel::llStartUp, 4, Logger::LL_INFO ), // NODERESTART ROW(NR_CopyDict, LogLevel::llNodeRestart, 8, Logger::LL_INFO ), diff --git a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index 6d3df7b2047..c3c35ac4ab2 100644 --- a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -134,6 +134,9 @@ Cmvmi::~Cmvmi() m_shared_page_pool.clear(); } +#ifdef ERROR_INSERT +NodeBitmask c_error_9000_nodes_mask; +#endif void Cmvmi::execNDB_TAMPER(Signal* signal) { @@ -441,21 +444,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) const Uint32 len = signal->getLength(); if(len == 2){ - globalTransporterRegistry.do_connect(tStartingNode); - globalTransporterRegistry.setIOState(tStartingNode, HaltIO); - //----------------------------------------------------- - // Report that the connection to the node is opened - //----------------------------------------------------- - signal->theData[0] = NDB_LE_CommunicationOpened; - signal->theData[1] = tStartingNode; - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); - //----------------------------------------------------- +#ifdef ERROR_INSERT + if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode))) +#endif + { + globalTransporterRegistry.do_connect(tStartingNode); + globalTransporterRegistry.setIOState(tStartingNode, HaltIO); + + //----------------------------------------------------- + // Report that the connection to the node is opened + //----------------------------------------------------- + signal->theData[0] = NDB_LE_CommunicationOpened; + signal->theData[1] = tStartingNode; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); + //----------------------------------------------------- + } } else { for(unsigned int i = 1; i < MAX_NODES; i++ ) { jam(); if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){ jam(); + +#ifdef ERROR_INSERT + if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i)) + continue; +#endif + globalTransporterRegistry.do_connect(i); globalTransporterRegistry.setIOState(i, HaltIO); @@ -1064,7 +1079,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; - if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){ + Uint32 arg = dumpState->args[0]; + if (arg == DumpStateOrd::CmvmiDumpConnections){ for(unsigned int i = 1; i < MAX_NODES; i++ ){ const char* nodeTypeStr = ""; switch(getNodeInfo(i).m_type){ @@ -1094,7 +1110,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } - if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){ + if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){ infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", g_sectionSegmentPool.getSize(), g_sectionSegmentPool.getNoOfFree()); @@ -1131,7 +1147,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) return; } - if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert) + if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert) { if(signal->getLength() == 1) { @@ -1151,7 +1167,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) } } - if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) { + if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) { unsigned i; Uint32 loopCount = dumpState->args[1]; const unsigned len0 = 11; @@ -1179,6 +1195,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2); } +#ifdef ERROR_INSERT + if (arg == 9000) + { + SET_ERROR_INSERT_VALUE(9000); + for (Uint32 i = 1; i<signal->getLength(); i++) + c_error_9000_nodes_mask.set(signal->theData[i]); + } + + if (arg == 9001) + { + CLEAR_ERROR_INSERT_VALUE; + for (Uint32 i = 0; i<MAX_NODES; i++) + { + if (c_error_9000_nodes_mask.get(i)) + { + signal->theData[0] = 0; + signal->theData[1] = i; + EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2); + } + } + c_error_9000_nodes_mask.clear(); + } +#endif + #ifdef VM_TRACE #if 0 { diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp index aaf27b351be..a6ec3749606 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp @@ -58,6 +58,7 @@ void Dbdih::initData() cwaitLcpSr = false; c_blockCommit = false; c_blockCommitNo = 1; + cntrlblockref = RNIL; }//Dbdih::initData() void Dbdih::initRecords() diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 95c691f0c6b..9936217bbf3 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -11995,7 +11995,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Uint32 tmngNode; Uint32 tmngNodeGroup; Uint32 tmngLimit; - Uint32 i; + Uint32 i, j; /**----------------------------------------------------------------------- * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED @@ -12041,6 +12041,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup); }//if }//for + + for (i = 0; i<cnoOfNodeGroups; i++) + { + jam(); + bool alive = false; + NodeGroupRecordPtr NGPtr; + NGPtr.i = i; + ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord); + for (j = 0; j<NGPtr.p->nodeCount; j++) + { + jam(); + mngNodeptr.i = NGPtr.p->nodesInGroup[j]; + ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord); + if (checkNodeAlive(NGPtr.p->nodesInGroup[j])) + { + alive = true; + break; + } + } + + if (!alive) + { + char buf[255]; + BaseString::snprintf + (buf, sizeof(buf), + "Illegal initial start, no alive node in nodegroup %u", i); + progError(__LINE__, + NDBD_EXIT_SR_RESTARTCONFLICT, + buf); + + } + } }//Dbdih::makeNodeGroups() /** @@ -12851,7 +12883,6 @@ void Dbdih::sendStartFragreq(Signal* signal, void Dbdih::setInitialActiveStatus() { NodeRecordPtr siaNodeptr; - Uint32 tsiaNodeActiveStatus; Uint32 tsiaNoActiveNodes; tsiaNoActiveNodes = csystemnodes - cnoHotSpare; @@ -12859,39 +12890,34 @@ void Dbdih::setInitialActiveStatus() SYSFILE->nodeStatus[i] = 0; for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) { ptrAss(siaNodeptr, nodeRecord); - if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) { + switch(siaNodeptr.p->nodeStatus){ + case NodeRecord::ALIVE: + case NodeRecord::DEAD: if (tsiaNoActiveNodes == 0) { jam(); siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare; } else { jam(); tsiaNoActiveNodes = tsiaNoActiveNodes - 1; - siaNodeptr.p->activeStatus = Sysfile::NS_Active; - }//if - } else { - jam(); - siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; - }//if - switch (siaNodeptr.p->activeStatus) { - case Sysfile::NS_Active: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_Active; - break; - case Sysfile::NS_HotSpare: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_HotSpare; - break; - case Sysfile::NS_NotDefined: - jam(); - tsiaNodeActiveStatus = Sysfile::NS_NotDefined; + if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) + { + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_Active; + } + else + { + siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver; + } + } break; default: - ndbrequire(false); - return; + jam(); + siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined; break; - }//switch - Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus, - tsiaNodeActiveStatus); + }//if + Sysfile::setNodeStatus(siaNodeptr.i, + SYSFILE->nodeStatus, + siaNodeptr.p->activeStatus); }//for }//Dbdih::setInitialActiveStatus() @@ -14613,11 +14639,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) jam(); conf->senderData = senderData; conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); return; }//if + if (requestType == WaitGCPReq::BlockStartGcp) + { + jam(); + conf->senderData = senderData; + conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; + sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, + WaitGCPConf::SignalLength, JBB); + cgcpOrderBlocked = 1; + return; + } + + if (requestType == WaitGCPReq::UnblockStartGcp) + { + jam(); + conf->senderData = senderData; + conf->gcp = cnewgcp; + conf->blockStatus = cgcpOrderBlocked; + sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, + WaitGCPConf::SignalLength, JBB); + cgcpOrderBlocked = 0; + return; + } + if(isMaster()) { /** * Master @@ -14629,6 +14680,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) jam(); conf->senderData = senderData; conf->gcp = coldgcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); return; @@ -14715,6 +14767,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal) conf->senderData = ptr.p->clientData; conf->gcp = gcp; + conf->blockStatus = cgcpOrderBlocked; sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); @@ -14782,6 +14835,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal) c_waitGCPMasterList.next(ptr); conf->senderData = clientData; + conf->blockStatus = cgcpOrderBlocked; sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal, WaitGCPConf::SignalLength, JBB); diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp b/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp index ffebce36757..da6aeaf8e8f 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp @@ -204,6 +204,7 @@ private: void execWAIT_GCP_CONF(Signal* signal); void execSTOP_REQ(Signal* signal); + void execSTOP_CONF(Signal* signal); void execRESUME_REQ(Signal* signal); void execCHANGE_NODE_STATE_CONF(Signal* signal); @@ -339,6 +340,16 @@ public: void progError(int line, int cause, const char * extra) { cntr.progError(line, cause, extra); } + + enum StopNodesStep { + SR_BLOCK_GCP_START_GCP = 0, + SR_WAIT_COMPLETE_GCP = 1, + SR_UNBLOCK_GCP_START_GCP = 2, + SR_QMGR_STOP_REQ = 3, + SR_WAIT_NODE_FAILURES = 4, + SR_CLUSTER_SHUTDOWN = 12 + } m_state; + SignalCounter m_stop_req_counter; }; private: StopRecord c_stopRec; diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp index d2e2f1d2335..ed37622657d 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp @@ -88,6 +88,7 @@ Ndbcntr::Ndbcntr(Block_context& ctx): addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF); addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ); + addRecSignal(GSN_STOP_CONF, &Ndbcntr::execSTOP_CONF); addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ); addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF); diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index eced400d480..b0372ec89a2 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -42,6 +42,8 @@ #include <signaldata/FsRemoveReq.hpp> #include <signaldata/ReadConfig.hpp> +#include <signaldata/FailRep.hpp> + #include <AttributeHeader.hpp> #include <Configuration.hpp> #include <DebuggerNames.hpp> @@ -853,17 +855,9 @@ Ndbcntr::trySystemRestart(Signal* signal){ return false; } - if(!allNodes && c_start.m_startPartialTimeout > now){ - jam(); - return false; - } - NodeState::StartType srType = NodeState::ST_SYSTEM_RESTART; - if(c_start.m_waiting.equal(c_start.m_withoutLog)){ - if(!allNodes){ - jam(); - return false; - } + if(c_start.m_waiting.equal(c_start.m_withoutLog)) + { jam(); srType = NodeState::ST_INITIAL_START; c_start.m_starting = c_start.m_withoutLog; // Used for starting... @@ -893,10 +887,6 @@ Ndbcntr::trySystemRestart(Signal* signal){ ndbrequire(false); // All nodes -> partitioning, which is not allowed } - if(c_start.m_startPartitionedTimeout > now){ - jam(); - return false; - } break; } @@ -1512,13 +1502,74 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) sendSignal(QMGR_REF, GSN_NODE_FAILREP, signal, NodeFailRep::SignalLength, JBB); + if (c_stopRec.stopReq.senderRef) + { + jam(); + switch(c_stopRec.m_state){ + case StopRecord::SR_WAIT_NODE_FAILURES: + { + jam(); + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + tmp.bitANDC(allFailed); + tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + + if (tmp.isclear()) + { + jam(); + if (c_stopRec.stopReq.senderRef != RNIL) + { + jam(); + StopConf * const stopConf = (StopConf *)&signal->theData[0]; + stopConf->senderData = c_stopRec.stopReq.senderData; + stopConf->nodeState = (Uint32) NodeState::SL_SINGLEUSER; + sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_CONF, signal, + StopConf::SignalLength, JBB); + } + + c_stopRec.stopReq.senderRef = 0; + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::UnblockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBA); + } + break; + } + case StopRecord::SR_QMGR_STOP_REQ: + { + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + tmp.bitANDC(allFailed); + + if (tmp.isclear()) + { + Uint32 nodeId = allFailed.find(0); + tmp.set(nodeId); + + StopConf* conf = (StopConf*)signal->getDataPtrSend(); + conf->senderData = c_stopRec.stopReq.senderData; + conf->nodeId = nodeId; + sendSignal(reference(), + GSN_STOP_CONF, signal, StopConf::SignalLength, JBB); + } + + tmp.copyto(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + + break; + } + } + } + + signal->theData[0] = NDB_LE_NODE_FAILREP; + signal->theData[2] = 0; + Uint32 nodeId = 0; while(!allFailed.isclear()){ nodeId = allFailed.find(nodeId + 1); allFailed.clear(nodeId); - signal->theData[0] = NDB_LE_NODE_FAILREP; signal->theData[1] = nodeId; - signal->theData[2] = 0; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); }//for @@ -1964,13 +2015,15 @@ void Ndbcntr::execDUMP_STATE_ORD(Signal* signal) { DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; - if(signal->theData[0] == 13){ + Uint32 arg = dumpState->args[0]; + + if(arg == 13){ infoEvent("Cntr: cstartPhase = %d, cinternalStartphase = %d, block = %d", cstartPhase, cinternalStartphase, cndbBlocksCount); infoEvent("Cntr: cmasterNodeId = %d", cmasterNodeId); } - if (dumpState->args[0] == DumpStateOrd::NdbcntrTestStopOnError){ + if (arg == DumpStateOrd::NdbcntrTestStopOnError){ if (m_ctx.m_config.stopOnError() == true) ((Configuration&)m_ctx.m_config).stopOnError(false); @@ -1983,6 +2036,28 @@ Ndbcntr::execDUMP_STATE_ORD(Signal* signal) SystemError::SignalLength, JBA); } + if (arg == DumpStateOrd::NdbcntrStopNodes) + { + NdbNodeBitmask mask; + for(Uint32 i = 1; i<signal->getLength(); i++) + mask.set(signal->theData[i]); + + StopReq* req = (StopReq*)signal->getDataPtrSend(); + req->senderRef = RNIL; + req->senderData = 123; + req->requestInfo = 0; + req->singleuser = 0; + req->singleUserApi = 0; + mask.copyto(NdbNodeBitmask::Size, req->nodes); + StopReq::setPerformRestart(req->requestInfo, 1); + StopReq::setNoStart(req->requestInfo, 1); + StopReq::setStopNodes(req->requestInfo, 1); + StopReq::setStopAbort(req->requestInfo, 1); + + sendSignal(reference(), GSN_STOP_REQ, signal, + StopReq::SignalLength, JBB); + return; + } }//Ndbcntr::execDUMP_STATE_ORD() @@ -2043,9 +2118,12 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ Uint32 senderData = req->senderData; BlockReference senderRef = req->senderRef; bool abort = StopReq::getStopAbort(req->requestInfo); + bool stopnodes = StopReq::getStopNodes(req->requestInfo); - if(getNodeState().startLevel < NodeState::SL_STARTED || - abort && !singleuser){ + if(!singleuser && + (getNodeState().startLevel < NodeState::SL_STARTED || + (abort && !stopnodes))) + { /** * Node is not started yet * @@ -2087,21 +2165,74 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ else ref->errorCode = StopRef::NodeShutdownInProgress; ref->senderData = senderData; - sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + ref->masterNodeId = cmasterNodeId; + + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + return; + } + + if (stopnodes && !abort) + { + jam(); + ref->errorCode = StopRef::UnsupportedNodeShutdown; + ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + return; + } + + if (stopnodes && cmasterNodeId != getOwnNodeId()) + { + jam(); + ref->errorCode = StopRef::MultiNodeShutdownNotMaster; + ref->senderData = senderData; + ref->masterNodeId = cmasterNodeId; + if (senderRef != RNIL) + sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); return; } c_stopRec.stopReq = * req; c_stopRec.stopInitiatedTime = NdbTick_CurrentMillisecond(); - if(!singleuser) { - if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) { + if (stopnodes) + { + jam(); + + if(!c_stopRec.checkNodeFail(signal)) + { + jam(); + return; + } + + char buf[100]; + NdbNodeBitmask mask; + mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + infoEvent("Initiating shutdown abort of %s", mask.getText(buf)); + ndbout_c("Initiating shutdown abort of %s", mask.getText(buf)); + + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_BLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::BlockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + return; + } + else if(!singleuser) + { + if(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)) + { jam(); - if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ + if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)) + { ((Configuration&)m_ctx.m_config).stopOnError(false); } } - if(!c_stopRec.checkNodeFail(signal)){ + if(!c_stopRec.checkNodeFail(signal)) + { jam(); return; } @@ -2171,7 +2302,17 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ */ NodeBitmask ndbMask; ndbMask.assign(cntr.c_startedNodes); - ndbMask.clear(cntr.getOwnNodeId()); + + if (StopReq::getStopNodes(stopReq.requestInfo)) + { + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, stopReq.nodes); + ndbMask.bitANDC(tmp); + } + else + { + ndbMask.clear(cntr.getOwnNodeId()); + } CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; sd->blockRef = cntr.reference(); @@ -2191,9 +2332,11 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ ref->senderData = stopReq.senderData; ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash; + ref->masterNodeId = cntr.cmasterNodeId; const BlockReference bref = stopReq.senderRef; - cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); + if (bref != RNIL) + cntr.sendSignal(bref, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); stopReq.senderRef = 0; @@ -2243,23 +2386,23 @@ Ndbcntr::StopRecord::checkTcTimeout(Signal* signal){ if(stopReq.getSystemStop(stopReq.requestInfo) || stopReq.singleuser){ jam(); if(stopReq.singleuser) - { - jam(); - AbortAllReq * req = (AbortAllReq*)&signal->theData[0]; - req->senderRef = cntr.reference(); - req->senderData = 12; - cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal, - AbortAllReq::SignalLength, JBB); - } + { + jam(); + AbortAllReq * req = (AbortAllReq*)&signal->theData[0]; + req->senderRef = cntr.reference(); + req->senderData = 12; + cntr.sendSignal(DBTC_REF, GSN_ABORT_ALL_REQ, signal, + AbortAllReq::SignalLength, JBB); + } else - { - WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; - req->senderRef = cntr.reference(); - req->senderData = 12; - req->requestType = WaitGCPReq::CompleteForceStart; - cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, - WaitGCPReq::SignalLength, JBB); - } + { + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = cntr.reference(); + req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN; + req->requestType = WaitGCPReq::CompleteForceStart; + cntr.sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + } } else { jam(); StopPermReq * req = (StopPermReq*)&signal->theData[0]; @@ -2338,6 +2481,7 @@ void Ndbcntr::execABORT_ALL_REF(Signal* signal){ StopRef * const stopRef = (StopRef *)&signal->theData[0]; stopRef->senderData = c_stopRec.stopReq.senderData; stopRef->errorCode = StopRef::TransactionAbortFailed; + stopRef->masterNodeId = cmasterNodeId; sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); } @@ -2421,7 +2565,7 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){ WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; req->senderRef = reference(); - req->senderData = 12; + req->senderData = StopRecord::SR_CLUSTER_SHUTDOWN; req->requestType = WaitGCPReq::CompleteForceStart; sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, WaitGCPReq::SignalLength, JBB); @@ -2430,29 +2574,129 @@ void Ndbcntr::execWAIT_GCP_REF(Signal* signal){ void Ndbcntr::execWAIT_GCP_CONF(Signal* signal){ jamEntry(); - ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)); - NodeState newState(NodeState::SL_STOPPING_3, true); + WaitGCPConf* conf = (WaitGCPConf*)signal->getDataPtr(); - /** - * Inform QMGR so that arbitrator won't kill us - */ - NodeStateRep * rep = (NodeStateRep *)&signal->theData[0]; - rep->nodeState = newState; - rep->nodeState.masterNodeId = cmasterNodeId; - rep->nodeState.setNodeGroup(c_nodeGroup); - EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal, NodeStateRep::SignalLength); - - if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ - jam(); - StartOrd * startOrd = (StartOrd *)&signal->theData[0]; - startOrd->restartInfo = c_stopRec.stopReq.requestInfo; - sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500, - StartOrd::SignalLength); - } else { + switch(conf->senderData){ + case StopRecord::SR_BLOCK_GCP_START_GCP: + { + jam(); + /** + * + */ + if(!c_stopRec.checkNodeFail(signal)) + { + jam(); + goto unblock; + } + + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_WAIT_COMPLETE_GCP; + req->requestType = WaitGCPReq::CompleteIfRunning; + + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); + return; + } + case StopRecord::SR_UNBLOCK_GCP_START_GCP: + { + jam(); + return; + } + case StopRecord::SR_WAIT_COMPLETE_GCP: + { jam(); - sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1); + if(!c_stopRec.checkNodeFail(signal)) + { + jam(); + goto unblock; + } + + NdbNodeBitmask tmp; + tmp.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + c_stopRec.m_stop_req_counter = tmp; + NodeReceiverGroup rg(QMGR, tmp); + StopReq * stopReq = (StopReq *)&signal->theData[0]; + * stopReq = c_stopRec.stopReq; + stopReq->senderRef = reference(); + sendSignal(rg, GSN_STOP_REQ, signal, StopReq::SignalLength, JBA); + c_stopRec.m_state = StopRecord::SR_QMGR_STOP_REQ; + return; + } + case StopRecord::SR_CLUSTER_SHUTDOWN: + { + jam(); + break; + } + } + + { + ndbrequire(StopReq::getSystemStop(c_stopRec.stopReq.requestInfo)); + NodeState newState(NodeState::SL_STOPPING_3, true); + + /** + * Inform QMGR so that arbitrator won't kill us + */ + NodeStateRep * rep = (NodeStateRep *)&signal->theData[0]; + rep->nodeState = newState; + rep->nodeState.masterNodeId = cmasterNodeId; + rep->nodeState.setNodeGroup(c_nodeGroup); + EXECUTE_DIRECT(QMGR, GSN_NODE_STATE_REP, signal, + NodeStateRep::SignalLength); + + if(StopReq::getPerformRestart(c_stopRec.stopReq.requestInfo)){ + jam(); + StartOrd * startOrd = (StartOrd *)&signal->theData[0]; + startOrd->restartInfo = c_stopRec.stopReq.requestInfo; + sendSignalWithDelay(CMVMI_REF, GSN_START_ORD, signal, 500, + StartOrd::SignalLength); + } else { + jam(); + sendSignalWithDelay(CMVMI_REF, GSN_STOP_ORD, signal, 500, 1); + } + return; + } + +unblock: + WaitGCPReq * req = (WaitGCPReq*)&signal->theData[0]; + req->senderRef = reference(); + req->senderData = StopRecord::SR_UNBLOCK_GCP_START_GCP; + req->requestType = WaitGCPReq::UnblockStartGcp; + sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, + WaitGCPReq::SignalLength, JBB); +} + +void +Ndbcntr::execSTOP_CONF(Signal* signal) +{ + jamEntry(); + StopConf *conf = (StopConf*)signal->getDataPtr(); + ndbrequire(c_stopRec.m_state == StopRecord::SR_QMGR_STOP_REQ); + c_stopRec.m_stop_req_counter.clearWaitingFor(conf->nodeId); + if (c_stopRec.m_stop_req_counter.done()) + { + char buf[100]; + NdbNodeBitmask mask; + mask.assign(NdbNodeBitmask::Size, c_stopRec.stopReq.nodes); + infoEvent("Stopping of %s", mask.getText(buf)); + ndbout_c("Stopping of %s", mask.getText(buf)); + + /** + * Kill any node... + */ + FailRep * const failRep = (FailRep *)&signal->theData[0]; + failRep->failCause = FailRep::ZMULTI_NODE_SHUTDOWN; + NodeReceiverGroup rg(QMGR, c_clusterNodes); + Uint32 nodeId = 0; + while ((nodeId = NdbNodeBitmask::find(c_stopRec.stopReq.nodes, nodeId+1)) + != NdbNodeBitmask::NotFound) + { + failRep->failNodeId = nodeId; + sendSignal(rg, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA); + } + c_stopRec.m_state = StopRecord::SR_WAIT_NODE_FAILURES; + return; } - return; } void Ndbcntr::execSTTORRY(Signal* signal){ diff --git a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index 5b69a0a921c..70c0fdfc988 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -33,6 +33,7 @@ #include <SafeCounter.hpp> #include <RequestTracker.hpp> +#include <signaldata/StopReq.hpp> #include "timer.hpp" @@ -53,6 +54,7 @@ #define ZAPI_HB_HANDLING 3 #define ZTIMER_HANDLING 4 #define ZARBIT_HANDLING 5 +#define ZSTART_FAILURE_LIMIT 6 /* Error Codes ------------------------------*/ #define ZERRTOOMANY 1101 @@ -104,18 +106,42 @@ public: }; struct StartRecord { - void reset(){ m_startKey++; m_startNode = 0;} + void reset(){ + m_startKey++; + m_startNode = 0; + m_gsn = RNIL; + m_nodes.clearWaitingFor(); + } Uint32 m_startKey; Uint32 m_startNode; Uint64 m_startTimeout; Uint32 m_gsn; SignalCounter m_nodes; - } c_start; + Uint32 m_latest_gci; + Uint32 m_start_type; + NdbNodeBitmask m_skip_nodes; + NdbNodeBitmask m_starting_nodes; + NdbNodeBitmask m_starting_nodes_w_log; + + Uint16 m_president_candidate; + Uint32 m_president_candidate_gci; + Uint16 m_regReqReqSent; + Uint16 m_regReqReqRecv; + } c_start; + NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NodeBitmask c_connectedNodes; // All kinds of connected nodes + + /** + * Nodes which we're checking for partitioned cluster + * + * i.e. nodes that connect to use, when we already have elected president + */ + NdbNodeBitmask c_readnodes_nodes; + Uint32 c_maxDynamicId; // Records @@ -208,6 +234,7 @@ private: void execPRES_TOCONF(Signal* signal); void execDISCONNECT_REP(Signal* signal); void execSYSTEM_ERROR(Signal* signal); + void execSTOP_REQ(Signal* signal); // Received signals void execDUMP_STATE_ORD(Signal* signal); @@ -222,7 +249,12 @@ private: void execREAD_NODESREQ(Signal* signal); void execSET_VAR_REQ(Signal* signal); + void execREAD_NODESREF(Signal* signal); + void execREAD_NODESCONF(Signal* signal); + void execDIH_RESTARTREF(Signal* signal); + void execDIH_RESTARTCONF(Signal* signal); + void execAPI_VERSION_REQ(Signal* signal); void execAPI_BROADCAST_REP(Signal* signal); @@ -244,6 +276,9 @@ private: void execARBIT_STOPREP(Signal* signal); // Statement blocks + void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); + Uint32 check_startup(Signal* signal); + void node_failed(Signal* signal, Uint16 aFailedNode); void checkStartInterface(Signal* signal); void failReport(Signal* signal, @@ -261,8 +296,9 @@ private: // Generated statement blocks void startphase1(Signal* signal); - void electionWon(); + void electionWon(Signal* signal); void cmInfoconf010Lab(Signal* signal); + void apiHbHandlingLab(Signal* signal); void timerHandlingLab(Signal* signal); void hbReceivedLab(Signal* signal); @@ -364,12 +400,12 @@ private: /* Status flags ----------------------------------*/ Uint32 c_restartPartialTimeout; + Uint32 c_restartPartionedTimeout; + Uint32 c_restartFailureTimeout; + Uint64 c_start_election_time; Uint16 creadyDistCom; - Uint16 c_regReqReqSent; - Uint16 c_regReqReqRecv; - Uint64 c_stopElectionTime; - Uint16 cpresidentCandidate; + Uint16 cdelayRegreq; Uint16 cpresidentAlive; Uint16 cnoFailedNodes; @@ -397,7 +433,7 @@ private: Uint16 cfailedNodes[MAX_NDB_NODES]; Uint16 cprepFailedNodes[MAX_NDB_NODES]; Uint16 ccommitFailedNodes[MAX_NDB_NODES]; - + struct OpAllocNodeIdReq { RequestTracker m_tracker; AllocNodeIdReq m_req; @@ -406,6 +442,9 @@ private: }; struct OpAllocNodeIdReq opAllocNodeIdReq; + + StopReq c_stopReq; + bool check_multi_node_shutdown(Signal* signal); }; #endif diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp index 5d964acc016..6ee24561b0a 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp @@ -35,9 +35,8 @@ void Qmgr::initData() Uint32 hbDBAPI = 500; setHbApiDelay(hbDBAPI); - - c_connectedNodes.clear(); c_connectedNodes.set(getOwnNodeId()); + c_stopReq.senderRef = 0; }//Qmgr::initData() void Qmgr::initRecords() @@ -52,6 +51,7 @@ Qmgr::Qmgr(Block_context& ctx) // Transit signals addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD); + addRecSignal(GSN_STOP_REQ, &Qmgr::execSTOP_REQ); addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG); addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB); addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT); @@ -101,6 +101,12 @@ Qmgr::Qmgr(Block_context& ctx) addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF); addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP); + addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF); + addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF); + + addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF); + addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF); + initData(); }//Qmgr::Qmgr() diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 69447745757..8772e00f027 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -56,6 +56,33 @@ #define DEBUG_START3(signal, msg) #endif +/** + * c_start.m_gsn = GSN_CM_REGREQ + * Possible for all nodes + * c_start.m_nodes contains all nodes in config + * + * c_start.m_gsn = GSN_CM_NODEINFOREQ; + * Set when receiving CM_REGCONF + * State possible for starting node only (not in cluster) + * + * c_start.m_nodes contains all node in alive cluster that + * that has not replied to GSN_CM_NODEINFOREQ + * passed by president in GSN_CM_REGCONF + * + * c_start.m_gsn = GSN_CM_ADD + * Possible for president only + * Set when receiving and accepting CM_REGREQ (to include node) + * + * c_start.m_nodes contains all nodes in alive cluster + starting node + * that has not replied to GSN_CM_ADD + * by sending GSN_CM_ACKADD + * + * c_start.m_gsn = GSN_CM_NODEINFOCONF + * Possible for non presidents only + * c_start.m_nodes contains a node that has been accepted by president + * but has not connected to us yet + */ + // Signal entries and statement blocks /* 4 P R O G R A M */ /*******************************/ @@ -119,6 +146,30 @@ void Qmgr::execCONTINUEB(Signal* signal) runArbitThread(signal); return; break; + case ZSTART_FAILURE_LIMIT:{ + if (cpresident != ZNIL) + { + jam(); + return; + } + Uint64 now = NdbTick_CurrentMillisecond(); + if (now > (c_start_election_time + c_restartFailureTimeout)) + { + jam(); + BaseString tmp; + tmp.append("Shutting down node as total restart time exceeds " + " StartFailureTimeout as set in config file "); + if(c_restartFailureTimeout == ~0) + tmp.append(" 0 (inifinite)"); + else + tmp.appfmt(" %d", c_restartFailureTimeout); + + progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str()); + } + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + return; + } default: jam(); // ZCOULD_NOT_OCCUR_ERROR; @@ -246,14 +297,28 @@ void Qmgr::startphase1(Signal* signal) nodePtr.p->phase = ZSTARTING; nodePtr.p->blockRef = reference(); c_connectedNodes.set(nodePtr.i); + + signal->theData[0] = reference(); + sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB); + return; +} - signal->theData[0] = 0; // no answer - signal->theData[1] = 0; // no id - signal->theData[2] = NodeInfo::DB; - sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); +void +Qmgr::execDIH_RESTARTREF(Signal*signal) +{ + jamEntry(); + c_start.m_latest_gci = 0; + execCM_INFOCONF(signal); +} + +void +Qmgr::execDIH_RESTARTCONF(Signal*signal) +{ + jamEntry(); + + c_start.m_latest_gci = signal->theData[1]; execCM_INFOCONF(signal); - return; } void Qmgr::setHbDelay(UintR aHbDelay) @@ -280,18 +345,24 @@ void Qmgr::execCONNECT_REP(Signal* signal) { jamEntry(); const Uint32 nodeId = signal->theData[0]; + + if (ERROR_INSERTED(931)) + { + jam(); + ndbout_c("Discarding CONNECT_REP(%d)", nodeId); + infoEvent("Discarding CONNECT_REP(%d)", nodeId); + return; + } + c_connectedNodes.set(nodeId); NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); switch(nodePtr.p->phase){ - case ZSTARTING: case ZRUNNING: + ndbrequire(!c_clusterNodes.get(nodeId)); + case ZSTARTING: jam(); - if(!c_start.m_nodes.isWaitingFor(nodeId)){ - jam(); - return; - } break; case ZPREPARE_FAIL: case ZFAIL_CLOSING: @@ -303,59 +374,126 @@ void Qmgr::execCONNECT_REP(Signal* signal) case ZAPI_INACTIVE: return; } - + + if (getNodeInfo(nodeId).getType() != NodeInfo::DB) + { + jam(); + return; + } + switch(c_start.m_gsn){ case GSN_CM_REGREQ: jam(); sendCmRegReq(signal, nodeId); + + /** + * We're waiting for CM_REGCONF c_start.m_nodes contains all configured + * nodes + */ + ndbrequire(nodePtr.p->phase == ZSTARTING); + ndbrequire(c_start.m_nodes.isWaitingFor(nodeId)); return; case GSN_CM_NODEINFOREQ: jam(); - sendCmNodeInfoReq(signal, nodeId, nodePtr.p); + + if (c_start.m_nodes.isWaitingFor(nodeId)) + { + jam(); + ndbrequire(getOwnNodeId() != cpresident); + ndbrequire(nodePtr.p->phase == ZSTARTING); + sendCmNodeInfoReq(signal, nodeId, nodePtr.p); + return; + } return; - case GSN_CM_ADD:{ + case GSN_CM_NODEINFOCONF:{ jam(); - - ndbrequire(getOwnNodeId() != cpresident); - c_start.m_nodes.clearWaitingFor(nodeId); - c_start.m_gsn = RNIL; - NodeRecPtr addNodePtr; - addNodePtr.i = nodeId; - ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); - cmAddPrepare(signal, addNodePtr, nodePtr.p); - return; + ndbrequire(getOwnNodeId() != cpresident); + ndbrequire(nodePtr.p->phase == ZRUNNING); + if (c_start.m_nodes.isWaitingFor(nodeId)) + { + jam(); + c_start.m_nodes.clearWaitingFor(nodeId); + c_start.m_gsn = RNIL; + + NodeRecPtr addNodePtr; + addNodePtr.i = nodeId; + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); + cmAddPrepare(signal, addNodePtr, nodePtr.p); + return; + } } default: - return; + (void)1; } + + ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId)); + ndbrequire(!c_readnodes_nodes.get(nodeId)); + c_readnodes_nodes.set(nodeId); + signal->theData[0] = reference(); + sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA); return; }//Qmgr::execCONNECT_REP() +void +Qmgr::execREAD_NODESCONF(Signal* signal) +{ + check_readnodes_reply(signal, + refToNode(signal->getSendersBlockRef()), + GSN_READ_NODESCONF); +} + +void +Qmgr::execREAD_NODESREF(Signal* signal) +{ + check_readnodes_reply(signal, + refToNode(signal->getSendersBlockRef()), + GSN_READ_NODESREF); +} + /*******************************/ /* CM_INFOCONF */ /*******************************/ void Qmgr::execCM_INFOCONF(Signal* signal) { + /** + * Open communcation to all DB nodes + */ + signal->theData[0] = 0; // no answer + signal->theData[1] = 0; // no id + signal->theData[2] = NodeInfo::DB; + sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB); + cpresident = ZNIL; - cpresidentCandidate = getOwnNodeId(); cpresidentAlive = ZFALSE; - c_stopElectionTime = NdbTick_CurrentMillisecond(); - c_stopElectionTime += c_restartPartialTimeout; + c_start_election_time = NdbTick_CurrentMillisecond(); + + signal->theData[0] = ZSTART_FAILURE_LIMIT; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1); + cmInfoconf010Lab(signal); return; }//Qmgr::execCM_INFOCONF() +Uint32 g_start_type = 0; +NdbNodeBitmask g_nowait_nodes; // Set by clo + void Qmgr::cmInfoconf010Lab(Signal* signal) { c_start.m_startKey = 0; c_start.m_startNode = getOwnNodeId(); c_start.m_nodes.clearWaitingFor(); c_start.m_gsn = GSN_CM_REGREQ; + c_start.m_starting_nodes.clear(); + c_start.m_starting_nodes_w_log.clear(); + c_start.m_regReqReqSent = 0; + c_start.m_regReqReqRecv = 0; + c_start.m_skip_nodes = g_nowait_nodes; + c_start.m_skip_nodes.bitAND(c_definedNodes); + c_start.m_start_type = g_start_type; NodeRecPtr nodePtr; - c_regReqReqSent = c_regReqReqRecv = 0; cnoOfNodes = 0; for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); @@ -390,14 +528,18 @@ void Qmgr::cmInfoconf010Lab(Signal* signal) void Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ - c_regReqReqSent++; - CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0]; - cmRegReq->blockRef = reference(); - cmRegReq->nodeId = getOwnNodeId(); - cmRegReq->version = NDB_VERSION; + CmRegReq * req = (CmRegReq *)&signal->theData[0]; + req->blockRef = reference(); + req->nodeId = getOwnNodeId(); + req->version = NDB_VERSION; + req->latest_gci = c_start.m_latest_gci; + req->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes); const Uint32 ref = calcQmgrBlockRef(nodeId); sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB); DEBUG_START(GSN_CM_REGREQ, nodeId, ""); + + c_start.m_regReqReqSent++; } /* @@ -437,6 +579,18 @@ Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){ /*******************************/ /* CM_REGREQ */ /*******************************/ +static +int +check_start_type(Uint32 starting, Uint32 own) +{ + if (starting == (1 << NodeState::ST_INITIAL_START) && + ((own & (1 << NodeState::ST_INITIAL_START)) == 0)) + { + return 1; + } + return 0; +} + void Qmgr::execCM_REGREQ(Signal* signal) { DEBUG_START3(signal, ""); @@ -448,6 +602,17 @@ void Qmgr::execCM_REGREQ(Signal* signal) const BlockReference Tblockref = cmRegReq->blockRef; const Uint32 startingVersion = cmRegReq->version; addNodePtr.i = cmRegReq->nodeId; + Uint32 gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; + + if (signal->getLength() == CmRegReq::SignalLength) + { + jam(); + gci = cmRegReq->latest_gci; + start_type = cmRegReq->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes); + } if (creadyDistCom == ZFALSE) { jam(); @@ -461,11 +626,19 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } - ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); - - if (cpresident != getOwnNodeId()){ + if (check_start_type(start_type, c_start.m_start_type)) + { + jam(); + sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE); + return; + } + + if (cpresident != getOwnNodeId()) + { jam(); - if (cpresident == ZNIL) { + + if (cpresident == ZNIL) + { /*** * We don't know the president. * If the node to be added has lower node id @@ -473,13 +646,18 @@ void Qmgr::execCM_REGREQ(Signal* signal) * candidate */ jam(); - if (addNodePtr.i < cpresidentCandidate) { + if (gci > c_start.m_president_candidate_gci || + (gci == c_start.m_president_candidate_gci && + addNodePtr.i < c_start.m_president_candidate)) + { jam(); - cpresidentCandidate = addNodePtr.i; - }//if + c_start.m_president_candidate = addNodePtr.i; + c_start.m_president_candidate_gci = gci; + } sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION); return; - } + } + /** * We are not the president. * We know the president. @@ -489,7 +667,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (c_start.m_startNode != 0){ + if (c_start.m_startNode != 0) + { jam(); /** * President busy by adding another node @@ -498,7 +677,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (ctoStatus == Q_ACTIVE) { + if (ctoStatus == Q_ACTIVE) + { jam(); /** * Active taking over as president @@ -507,7 +687,8 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; }//if - if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) { + if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB) + { jam(); /** * The new node is not in config file @@ -516,13 +697,15 @@ void Qmgr::execCM_REGREQ(Signal* signal) return; } + ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec); Phase phase = addNodePtr.p->phase; - if (phase != ZINIT){ + if (phase != ZINIT) + { jam(); DEBUG("phase = " << phase); sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD); return; - }//if + } jam(); /** @@ -594,7 +777,12 @@ void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef, ref->blockRef = reference(); ref->nodeId = getOwnNodeId(); ref->errorCode = Terror; - ref->presidentCandidate = (cpresident == ZNIL ? cpresidentCandidate : cpresident); + ref->presidentCandidate = + (cpresident == ZNIL ? c_start.m_president_candidate : cpresident); + ref->candidate_latest_gci = c_start.m_president_candidate_gci; + ref->latest_gci = c_start.m_latest_gci; + ref->start_type = c_start.m_start_type; + c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes); sendSignal(TBRef, GSN_CM_REGREF, signal, CmRegRef::SignalLength, JBB); DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), ""); @@ -622,22 +810,33 @@ void Qmgr::execCM_REGCONF(Signal* signal) jamEntry(); const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0]; + Uint32 presidentNodeId = cmRegConf->presidentNodeId; if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) { jam(); char buf[128]; - BaseString::snprintf(buf,sizeof(buf),"incompatible version own=0x%x other=0x%x, shutting down", NDB_VERSION, cmRegConf->presidentVersion); + BaseString::snprintf(buf,sizeof(buf), + "incompatible version own=0x%x other=0x%x, " + " shutting down", + NDB_VERSION, cmRegConf->presidentVersion); systemErrorLab(signal, __LINE__, buf); return; } - + myNodePtr.i = getOwnNodeId(); + ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); + + ndbrequire(c_start.m_gsn == GSN_CM_REGREQ); + ndbrequire(myNodePtr.p->phase = ZSTARTING); + cpdistref = cmRegConf->presidentBlockRef; cpresident = cmRegConf->presidentNodeId; UintR TdynamicId = cmRegConf->dynamicId; c_maxDynamicId = TdynamicId; c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes); + myNodePtr.p->ndynamicId = TdynamicId; + /*--------------------------------------------------------------*/ // Send this as an EVENT REPORT to inform about hearing about // other NDB node proclaiming to be president. @@ -648,10 +847,6 @@ void Qmgr::execCM_REGCONF(Signal* signal) signal->theData[3] = TdynamicId; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - myNodePtr.i = getOwnNodeId(); - ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); - myNodePtr.p->ndynamicId = TdynamicId; - for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { jam(); if (c_clusterNodes.get(nodePtr.i)){ @@ -675,6 +870,84 @@ void Qmgr::execCM_REGCONF(Signal* signal) }//Qmgr::execCM_REGCONF() void +Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn) +{ + NodeRecPtr myNodePtr; + myNodePtr.i = getOwnNodeId(); + ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec); + + NodeRecPtr nodePtr; + nodePtr.i = nodeId; + ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec); + + ndbrequire(c_readnodes_nodes.get(nodeId)); + ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr(); + if (gsn == GSN_READ_NODESREF) + { + jam(); +retry: + signal->theData[0] = reference(); + sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA); + return; + } + + if (conf->masterNodeId == ZNIL) + { + jam(); + goto retry; + } + + Uint32 president = conf->masterNodeId; + if (president == cpresident) + { + jam(); + c_readnodes_nodes.clear(nodeId); + return; + } + + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), + "Partitioned cluster! check StartPartialTimeout, " + " node %d thinks %d is president, " + " I think president is: %d", + nodeId, president, cpresident); + + ndbout_c(buf); + CRASH_INSERTION(933); + + if (getNodeState().startLevel == NodeState::SL_STARTED) + { + jam(); + NdbNodeBitmask part; + part.assign(NdbNodeBitmask::Size, conf->clusterNodes); + FailRep* rep = (FailRep*)signal->getDataPtrSend(); + rep->failCause = FailRep::ZPARTITIONED_CLUSTER; + rep->president = cpresident; + c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partition); + Uint32 ref = calcQmgrBlockRef(nodeId); + Uint32 i = 0; + while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound) + { + if (i == nodeId) + continue; + rep->failNodeId = i; + sendSignal(ref, GSN_FAIL_REP, signal, FailRep::SignalLength, JBA); + } + rep->failNodeId = nodeId; + sendSignal(ref, GSN_FAIL_REP, signal, FailRep::SignalLength, JBB); + return; + } + + CRASH_INSERTION(932); + + progError(__LINE__, + NDBD_EXIT_ARBIT_SHUTDOWN, + buf); + + ndbrequire(false); +} + +void Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend(); req->nodeId = getOwnNodeId(); @@ -703,26 +976,105 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){ /*******************************/ /* CM_REGREF */ /*******************************/ +static +const char * +get_start_type_string(Uint32 st) +{ + static char buf[256]; + + if (st == 0) + { + return "<ANY>"; + } + else + { + buf[0] = 0; + for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++) + { + if (st & (1 << i)) + { + if (buf[0]) + strcat(buf, "/"); + switch(i){ + case NodeState::ST_INITIAL_START: + strcat(buf, "inital start"); + break; + case NodeState::ST_SYSTEM_RESTART: + strcat(buf, "system restart"); + break; + case NodeState::ST_NODE_RESTART: + strcat(buf, "node restart"); + break; + case NodeState::ST_INITIAL_NODE_RESTART: + strcat(buf, "initial node restart"); + break; + } + } + } + return buf; + } +} + void Qmgr::execCM_REGREF(Signal* signal) { jamEntry(); - c_regReqReqRecv++; - // Ignore block reference in data[0] - UintR TaddNodeno = signal->theData[1]; - UintR TrefuseReason = signal->theData[2]; - Uint32 candidate = signal->theData[3]; + CmRegRef* ref = (CmRegRef*)signal->getDataPtr(); + UintR TaddNodeno = ref->nodeId; + UintR TrefuseReason = ref->errorCode; + Uint32 candidate = ref->presidentCandidate; + Uint32 node_gci = 1; + Uint32 candidate_gci = 1; + Uint32 start_type = ~0; + NdbNodeBitmask skip_nodes; DEBUG_START3(signal, TrefuseReason); - - if(candidate != cpresidentCandidate){ + + if (signal->getLength() == CmRegRef::SignalLength) + { jam(); - c_regReqReqRecv = ~0; + node_gci = ref->latest_gci; + candidate_gci = ref->candidate_latest_gci; + start_type = ref->start_type; + skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes); } + + c_start.m_regReqReqRecv++; + // Ignore block reference in data[0] + + if(candidate != c_start.m_president_candidate) + { + jam(); + c_start.m_regReqReqRecv = ~0; + } + + c_start.m_starting_nodes.set(TaddNodeno); + if (node_gci) + { + jam(); + c_start.m_starting_nodes_w_log.set(TaddNodeno); + } + + skip_nodes.bitAND(c_definedNodes); + c_start.m_skip_nodes.bitOR(skip_nodes); + + char buf[100]; switch (TrefuseReason) { case CmRegRef::ZINCOMPATIBLE_VERSION: jam(); - systemErrorLab(signal, __LINE__, "incompatible version, connection refused by running ndb node"); + systemErrorLab(signal, __LINE__, + "incompatible version, " + "connection refused by running ndb node"); + case CmRegRef::ZINCOMPATIBLE_START_TYPE: + jam(); + BaseString::snprintf(buf, sizeof(buf), + "incompatible start type detected: node %d" + " reports %s(%d) my start type: %s(%d)", + TaddNodeno, + get_start_type_string(start_type), start_type, + get_start_type_string(c_start.m_start_type), + c_start.m_start_type); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); break; case CmRegRef::ZBUSY: case CmRegRef::ZBUSY_TO_PRES: @@ -741,14 +1093,18 @@ void Qmgr::execCM_REGREF(Signal* signal) break; case CmRegRef::ZELECTION: jam(); - if (cpresidentCandidate > TaddNodeno) { + if (candidate_gci > c_start.m_president_candidate_gci || + (candidate_gci == c_start.m_president_candidate_gci && + candidate < c_start.m_president_candidate)) + { jam(); //---------------------------------------- /* We may already have a candidate */ /* choose the lowest nodeno */ //---------------------------------------- signal->theData[3] = 2; - cpresidentCandidate = TaddNodeno; + c_start.m_president_candidate = candidate; + c_start.m_president_candidate_gci = candidate_gci; } else { signal->theData[3] = 4; }//if @@ -776,32 +1132,34 @@ void Qmgr::execCM_REGREF(Signal* signal) //----------------------------------------- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - if(cpresidentAlive == ZTRUE){ + if(cpresidentAlive == ZTRUE) + { jam(); - DEBUG(""); + DEBUG("cpresidentAlive"); return; } - if(c_regReqReqSent != c_regReqReqRecv){ + if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv) + { jam(); - DEBUG( c_regReqReqSent << " != " << c_regReqReqRecv); + DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv); return; } - if(cpresidentCandidate != getOwnNodeId()){ + if(c_start.m_president_candidate != getOwnNodeId()) + { jam(); - DEBUG(""); + DEBUG("i'm not the candidate"); return; } - + /** - * All configured nodes has agreed + * All connected nodes has agreed */ - Uint64 now = NdbTick_CurrentMillisecond(); - if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){ + if(check_startup(signal)) + { jam(); - electionWon(); - sendSttorryLab(signal); + electionWon(signal); /** * Start timer handling @@ -813,8 +1171,193 @@ void Qmgr::execCM_REGREF(Signal* signal) return; }//Qmgr::execCM_REGREF() +Uint32 +Qmgr::check_startup(Signal* signal) +{ + Uint64 now = NdbTick_CurrentMillisecond(); + Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout; + Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout; + + /** + * First see if we should wait more... + */ + NdbNodeBitmask tmp; + tmp.bitOR(c_start.m_skip_nodes); + tmp.bitOR(c_start.m_starting_nodes); + + NdbNodeBitmask wait; + wait.assign(c_definedNodes); + wait.bitANDC(tmp); + + Uint32 retVal = 0; + NdbNodeBitmask report_mask; + + if ((c_start.m_latest_gci == 0) || + (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START))) + { + if (!tmp.equal(c_definedNodes)) + { + jam(); + signal->theData[1] = 1; + signal->theData[2] = ~0; + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + else + { + jam(); + signal->theData[1] = 0x8000; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + { + const bool all = c_start.m_starting_nodes.equal(c_definedNodes); + CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0]; + + { + /** + * Check for missing node group directly + */ + char buf[100]; + NdbNodeBitmask check; + check.assign(c_definedNodes); + check.bitANDC(c_start.m_starting_nodes); // Not connected nodes + check.bitOR(c_start.m_starting_nodes_w_log); + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = check; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + if (sd->output == CheckNodeGroups::Lose) + { + jam(); + goto missing_nodegroup; + } + } + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result = sd->output; + + sd->blockRef = reference(); + sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; + sd->mask = c_start.m_starting_nodes_w_log; + EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, + CheckNodeGroups::SignalLength); + + const Uint32 result_w_log = sd->output; + + if (tmp.equal(c_definedNodes)) + { + /** + * All nodes (wrt no-wait nodes) has connected... + * this means that we will now start or die + */ + jam(); + switch(result_w_log){ + case CheckNodeGroups::Lose: + { + jam(); + goto missing_nodegroup; + } + case CheckNodeGroups::Win: + signal->theData[1] = all ? 0x8001 : 0x8002; + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + case CheckNodeGroups::Partitioning: + ndbrequire(result != CheckNodeGroups::Lose); + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + + if (now < partial_timeout) + { + jam(); + signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3; + signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000); + report_mask.assign(wait); + retVal = 0; + goto start_report; + } + + /** + * Start partial has passed...check for partitioning... + */ + switch(result_w_log){ + case CheckNodeGroups::Lose: + jam(); + goto missing_nodegroup; + case CheckNodeGroups::Partitioning: + if (now < partitioned_timeout && result != CheckNodeGroups::Win) + { + signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5; + signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 0; + goto start_report; + } + // Fall through... + case CheckNodeGroups::Win: + signal->theData[1] = + all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); + report_mask.assign(c_definedNodes); + report_mask.bitANDC(c_start.m_starting_nodes); + retVal = 1; + goto start_report; + } + } + ndbrequire(false); + +start_report: + jam(); + { + Uint32 sz = NdbNodeBitmask::Size; + signal->theData[0] = NDB_LE_StartReport; + signal->theData[3] = sz; + Uint32* ptr = signal->theData+4; + c_definedNodes.copyto(sz, ptr); ptr += sz; + c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz; + c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz; + report_mask.copyto(sz, ptr); ptr+= sz; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, + 4+4*NdbNodeBitmask::Size, JBB); + } + return retVal; + +missing_nodegroup: + jam(); + char buf[100], mask1[100], mask2[100]; + c_start.m_starting_nodes.getText(mask1); + tmp.assign(c_start.m_starting_nodes); + tmp.bitANDC(c_start.m_starting_nodes_w_log); + tmp.getText(mask2); + BaseString::snprintf(buf, sizeof(buf), + "Unable to start missing node group! " + " starting: %s (missing fs for: %s)", + mask1, mask2); + progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf); +} + void -Qmgr::electionWon(){ +Qmgr::electionWon(Signal* signal){ NodeRecPtr myNodePtr; cpresident = getOwnNodeId(); /* This node becomes president. */ myNodePtr.i = getOwnNodeId(); @@ -831,8 +1374,21 @@ Qmgr::electionWon(){ c_clusterNodes.set(getOwnNodeId()); cpresidentAlive = ZTRUE; - c_stopElectionTime = ~0; + c_start_election_time = ~0; c_start.reset(); + + signal->theData[0] = NDB_LE_CM_REGCONF; + signal->theData[1] = getOwnNodeId(); + signal->theData[2] = cpresident; + signal->theData[3] = 1; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); + + c_start.m_starting_nodes.clear(getOwnNodeId()); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } } /* @@ -846,7 +1402,14 @@ Qmgr::electionWon(){ /*--------------------------------------------------------------*/ void Qmgr::regreqTimeLimitLab(Signal* signal) { - if(cpresident == ZNIL){ + if(cpresident == ZNIL) + { + if (c_start.m_president_candidate == ZNIL) + { + jam(); + c_start.m_president_candidate = getOwnNodeId(); + } + cmInfoconf010Lab(signal); } }//Qmgr::regreqTimelimitLab() @@ -967,7 +1530,7 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){ ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD); c_start.m_nodes.clearWaitingFor(); c_start.m_nodes.setWaitingFor(nodePtr.i); - c_start.m_gsn = GSN_CM_ADD; + c_start.m_gsn = GSN_CM_NODEINFOCONF; #else warningEvent("Enabling communication to CM_ADD node %u state=%d", nodePtr.i, @@ -1256,6 +1819,17 @@ void Qmgr::execCM_ACKADD(Signal* signal) */ handleArbitNdbAdd(signal, addNodePtr.i); c_start.reset(); + + if (c_start.m_starting_nodes.get(addNodePtr.i)) + { + jam(); + c_start.m_starting_nodes.clear(addNodePtr.i); + if (c_start.m_starting_nodes.isclear()) + { + jam(); + sendSttorryLab(signal); + } + } return; }//switch ndbrequire(false); @@ -1409,7 +1983,8 @@ void Qmgr::initData(Signal* signal) cnoPrepFailedNodes = 0; creadyDistCom = ZFALSE; cpresident = ZNIL; - cpresidentCandidate = ZNIL; + c_start.m_president_candidate = ZNIL; + c_start.m_president_candidate_gci = 0; cpdistref = 0; cneighbourh = ZNIL; cneighbourl = ZNIL; @@ -1437,15 +2012,33 @@ void Qmgr::initData(Signal* signal) Uint32 hbDBAPI = 1500; Uint32 arbitTimeout = 1000; c_restartPartialTimeout = 30000; + c_restartPartionedTimeout = 60000; + c_restartFailureTimeout = ~0; ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB); ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI); ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout); ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT, &c_restartPartialTimeout); - if(c_restartPartialTimeout == 0){ + ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT, + &c_restartPartionedTimeout); + ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT, + &c_restartFailureTimeout); + + if(c_restartPartialTimeout == 0) + { c_restartPartialTimeout = ~0; } + if (c_restartPartionedTimeout ==0) + { + c_restartPartionedTimeout = ~0; + } + + if (c_restartFailureTimeout == 0) + { + c_restartFailureTimeout = ~0; + } + setHbDelay(hbDBDB); setHbApiDelay(hbDBAPI); setArbitTimeout(arbitTimeout); @@ -1872,10 +2465,23 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) const Uint32 nodeId = rep->nodeId; const Uint32 err = rep->err; c_connectedNodes.clear(nodeId); - + c_readnodes_nodes.clear(nodeId); + NodeRecPtr nodePtr; nodePtr.i = getOwnNodeId(); ptrCheckGuard(nodePtr, MAX_NODES, nodeRec); + + char buf[100]; + if (getNodeInfo(nodeId).getType() == NodeInfo::DB && + getNodeState().startLevel < NodeState::SL_STARTED) + { + jam(); + CRASH_INSERTION(932); + BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); + ndbrequire(false); + } + switch(nodePtr.p->phase){ case ZRUNNING: jam(); @@ -1893,9 +2499,12 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) case ZAPI_ACTIVE: ndbrequire(false); case ZAPI_INACTIVE: + { + BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); + progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); ndbrequire(false); } - + } node_failed(signal, nodeId); }//DISCONNECT_REP @@ -2153,10 +2762,20 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, failedNodePtr.i = aFailedNode; ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); + FailRep* rep = (FailRep*)signal->getDataPtr(); + + if (check_multi_node_shutdown(signal)) + { + jam(); + return; + } + if (failedNodePtr.i == getOwnNodeId()) { jam(); + Uint32 code = 0; const char * msg = 0; + char extra[100]; switch(aFailCause){ case FailRep::ZOWN_FAILURE: msg = "Own failure"; @@ -2177,17 +2796,51 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, case FailRep::ZLINK_FAILURE: msg = "Connection failure"; break; + case FailRep::ZPARTITIONED_CLUSTER: + { + code = NDBD_EXIT_ARBIT_SHUTDOWN; + char buf1[100], buf2[100]; + c_clusterNodes.getText(buf1); + if (signal->getLength()== FailRep::SignalLength + FailRep::ExtraLength && + signal->header.theVerId_signalNumber == GSN_FAIL_REP) + { + jam(); + NdbNodeBitmask part; + part.assign(NdbNodeBitmask::Size, rep->partition); + part.getText(buf2); + BaseString::snprintf(extra, sizeof(extra), + "Partitioned cluster!" + " Our cluster: %s other cluster: %s", + buf1, buf2); + } + else + { + jam(); + BaseString::snprintf(extra, sizeof(extra), + "Partitioned cluster!" + " Our cluster: %s ", buf1); + } + msg = extra; + break; + } + case FailRep::ZMULTI_NODE_SHUTDOWN: + msg = "Multi node shutdown"; + break; + default: + msg = "<UNKNOWN>"; } - char buf[100]; - BaseString::snprintf(buf, 100, + CRASH_INSERTION(932); + + char buf[255]; + BaseString::snprintf(buf, sizeof(buf), "We(%u) have been declared dead by %u reason: %s(%u)", getOwnNodeId(), refToNode(signal->getSendersBlockRef()), - aFailCause, - msg ? msg : "<Unknown>"); - - progError(__LINE__, 0, buf); + msg ? msg : "<Unknown>", + aFailCause); + + progError(__LINE__, code, buf); return; }//if @@ -2244,7 +2897,13 @@ void Qmgr::execPREP_FAILREQ(Signal* signal) { NodeRecPtr myNodePtr; jamEntry(); - + + if (check_multi_node_shutdown(signal)) + { + jam(); + return; + } + PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0]; BlockReference Tblockref = prepFail->xxxBlockRef; @@ -3896,6 +4555,7 @@ Qmgr::stateArbitCrash(Signal* signal) if (! (arbitRec.getTimediff() > getArbitTimeout())) return; #endif + CRASH_INSERTION(932); progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN, "Arbitrator decided to shutdown this node"); } @@ -3959,8 +4619,10 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal) case 1: infoEvent("creadyDistCom = %d, cpresident = %d\n", creadyDistCom, cpresident); - infoEvent("cpresidentAlive = %d, cpresidentCand = %d\n", - cpresidentAlive, cpresidentCandidate); + infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n", + cpresidentAlive, + c_start.m_president_candidate, + c_start.m_president_candidate_gci); infoEvent("ctoStatus = %d\n", ctoStatus); for(Uint32 i = 1; i<MAX_NDB_NODES; i++){ if(getNodeInfo(i).getType() == NodeInfo::DB){ @@ -4221,3 +4883,43 @@ Qmgr::completeAllocNodeIdReq(Signal *signal) sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal, AllocNodeIdConf::SignalLength, JBB); } + +void +Qmgr::execSTOP_REQ(Signal* signal) +{ + jamEntry(); + c_stopReq = * (StopReq*)signal->getDataPtr(); + + if (c_stopReq.senderRef) + { + jam(); + ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId())); + + StopConf *conf = (StopConf*)signal->getDataPtrSend(); + conf->senderData = c_stopReq.senderData; + conf->nodeState = getOwnNodeId(); + sendSignal(c_stopReq.senderRef, + GSN_STOP_CONF, signal, StopConf::SignalLength, JBA); + } +} + +bool +Qmgr::check_multi_node_shutdown(Signal* signal) +{ + if (c_stopReq.senderRef && + NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId())) + { + jam(); + if(StopReq::getPerformRestart(c_stopReq.requestInfo)) + { + jam(); + StartOrd * startOrd = (StartOrd *)&signal->theData[0]; + startOrd->restartInfo = c_stopReq.requestInfo; + EXECUTE_DIRECT(CMVMI, GSN_START_ORD, signal, 2); + } else { + EXECUTE_DIRECT(CMVMI, GSN_STOP_ORD, signal, 1); + } + return true; + } + return false; +} diff --git a/storage/ndb/src/kernel/vm/Configuration.cpp b/storage/ndb/src/kernel/vm/Configuration.cpp index 5f3b601023f..227ad6d6893 100644 --- a/storage/ndb/src/kernel/vm/Configuration.cpp +++ b/storage/ndb/src/kernel/vm/Configuration.cpp @@ -55,6 +55,12 @@ enum ndbd_options { NDB_STD_OPTS_VARS; // XXX should be my_bool ??? static int _daemon, _no_daemon, _foreground, _initial, _no_start; +static int _initialstart; +static const char* _nowait_nodes; + +extern Uint32 g_start_type; +extern NdbNodeBitmask g_nowait_nodes; + /** * Arguments to NDB process */ @@ -82,6 +88,14 @@ static struct my_option my_long_options[] = " (implies --nodaemon)", (gptr*) &_foreground, (gptr*) &_foreground, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, + { "nowait-nodes", NO_ARG, + "Nodes that will not be waited for during start", + (gptr*) &_nowait_nodes, (gptr*) &_nowait_nodes, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + { "initial-start", NO_ARG, + "Perform initial start", + (gptr*) &_initialstart, (gptr*) &_initialstart, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; static void short_usage_sub(void) @@ -150,6 +164,37 @@ Configuration::init(int argc, char** argv) globalData.ownId= 0; + if (_nowait_nodes) + { + BaseString str(_nowait_nodes); + Vector<BaseString> arr; + str.split(arr, ","); + for (Uint32 i = 0; i<arr.size(); i++) + { + char *endptr = 0; + long val = strtol(arr[i].c_str(), &endptr, 10); + if (*endptr) + { + ndbout_c("Unable to parse nowait-nodes argument: %s : %s", + arr[i].c_str(), _nowait_nodes); + exit(-1); + } + if (! (val > 0 && val < MAX_NDB_NODES)) + { + ndbout_c("Invalid nodeid specified in nowait-nodes: %d : %s", + val, _nowait_nodes); + exit(-1); + } + g_nowait_nodes.set(val); + } + } + + if (_initialstart) + { + _initialStart = true; + g_start_type |= (1 << NodeState::ST_INITIAL_START); + } + return true; } diff --git a/storage/ndb/src/mgmclient/CommandInterpreter.cpp b/storage/ndb/src/mgmclient/CommandInterpreter.cpp index 5496b2dadc3..cfe3374ed06 100644 --- a/storage/ndb/src/mgmclient/CommandInterpreter.cpp +++ b/storage/ndb/src/mgmclient/CommandInterpreter.cpp @@ -18,6 +18,7 @@ #include <my_sys.h> #include <Vector.hpp> #include <mgmapi.h> +#include <util/BaseString.hpp> class MgmtSrvr; @@ -63,6 +64,9 @@ private: */ void analyseAfterFirstToken(int processId, char* allAfterFirstTokenCstr); + void executeCommand(Vector<BaseString> &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes); /** * Parse the block specification part of the LOG* commands, * things after LOG*: [BLOCK = {ALL|<blockName>+}] @@ -97,10 +101,14 @@ private: public: void executeStop(int processId, const char* parameters, bool all); + void executeStop(Vector<BaseString> &command_list, unsigned command_pos, + int *node_ids, int no_of_nodes); void executeEnterSingleUser(char* parameters); void executeExitSingleUser(char* parameters); void executeStart(int processId, const char* parameters, bool all); void executeRestart(int processId, const char* parameters, bool all); + void executeRestart(Vector<BaseString> &command_list, unsigned command_pos, + int *node_ids, int no_of_nodes); void executeLogLevel(int processId, const char* parameters, bool all); void executeError(int processId, const char* parameters, bool all); void executeLog(int processId, const char* parameters, bool all); @@ -583,9 +591,16 @@ CommandInterpreter::execute_impl(const char *_line) } } while (do_continue); // if there is anything in the line proceed + Vector<BaseString> command_list; + { + BaseString tmp(line); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); + } char* firstToken = strtok(line, " "); char* allAfterFirstToken = strtok(NULL, ""); - + if (strcasecmp(firstToken, "HELP") == 0 || strcasecmp(firstToken, "?") == 0) { executeHelp(allAfterFirstToken); @@ -656,22 +671,45 @@ CommandInterpreter::execute_impl(const char *_line) analyseAfterFirstToken(-1, allAfterFirstToken); } else { /** - * First token should be a digit, node ID + * First tokens should be digits, node ID's */ - int nodeId; - - if (! convert(firstToken, nodeId)) { + int node_ids[MAX_NODES]; + unsigned pos; + for (pos= 0; pos < command_list.size(); pos++) + { + int node_id; + if (convert(command_list[pos].c_str(), node_id)) + { + if (node_id <= 0) { + ndbout << "Invalid node ID: " << command_list[pos].c_str() + << "." << endl; + DBUG_RETURN(true); + } + node_ids[pos]= node_id; + continue; + } + break; + } + int no_of_nodes= pos; + if (no_of_nodes == 0) + { + /* No digit found */ invalid_command(_line); DBUG_RETURN(true); } - - if (nodeId <= 0) { - ndbout << "Invalid node ID: " << firstToken << "." << endl; + if (pos == command_list.size()) + { + /* No command found */ + invalid_command(_line); DBUG_RETURN(true); } - - analyseAfterFirstToken(nodeId, allAfterFirstToken); - + if (no_of_nodes == 1) + { + analyseAfterFirstToken(node_ids[0], allAfterFirstToken); + DBUG_RETURN(true); + } + executeCommand(command_list, pos, node_ids, no_of_nodes); + DBUG_RETURN(true); } DBUG_RETURN(true); } @@ -741,6 +779,27 @@ CommandInterpreter::analyseAfterFirstToken(int processId, ndbout << endl; } +void +CommandInterpreter::executeCommand(Vector<BaseString> &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) +{ + const char *cmd= command_list[command_pos].c_str(); + if (strcasecmp("STOP", cmd) == 0) + { + executeStop(command_list, command_pos+1, node_ids, no_of_nodes); + return; + } + if (strcasecmp("RESTART", cmd) == 0) + { + executeRestart(command_list, command_pos+1, node_ids, no_of_nodes); + return; + } + ndbout_c("Invalid command: '%s' after multi node id list. " + "Expected STOP or RESTART.", cmd); + return; +} + /** * Get next nodeid larger than the give node_id. node_id will be * set to the next node_id in the list. node_id should be set @@ -1326,24 +1385,60 @@ CommandInterpreter::executeClusterLog(char* parameters) //***************************************************************************** void -CommandInterpreter::executeStop(int processId, const char *, bool all) +CommandInterpreter::executeStop(int processId, const char *parameters, + bool all) { - int result = 0; - if(all) { - result = ndb_mgm_stop(m_mgmsrv, 0, 0); - } else { - result = ndb_mgm_stop(m_mgmsrv, 1, &processId); + Vector<BaseString> command_list; + if (parameters) + { + BaseString tmp(parameters); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); } - if (result < 0) { - ndbout << "Shutdown failed." << endl; + if (all) + executeStop(command_list, 0, 0, 0); + else + executeStop(command_list, 0, &processId, 1); +} + +void +CommandInterpreter::executeStop(Vector<BaseString> &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) +{ + int abort= 0; + for (; command_pos < command_list.size(); command_pos++) + { + const char *item= command_list[command_pos].c_str(); + if (strcasecmp(item, "-A") == 0) + { + abort= 1; + continue; + } + ndbout_c("Invalid option: %s. Expecting -A after STOP", + item); + return; + } + + int result= ndb_mgm_stop2(m_mgmsrv, no_of_nodes, node_ids, abort); + if (result < 0) + { + ndbout_c("Shutdown failed."); printError(); - } else + } + else + { + if (node_ids == 0) + ndbout_c("NDB Cluster has shutdown."); + else { - if(all) - ndbout << "NDB Cluster has shutdown." << endl; - else - ndbout << "Node " << processId << " has shutdown." << endl; + ndbout << "Node"; + for (int i= 0; i < no_of_nodes; i++) + ndbout << " " << node_ids[i]; + ndbout_c(" has shutdown."); } + } } void @@ -1409,47 +1504,74 @@ CommandInterpreter::executeStart(int processId, const char* parameters, void CommandInterpreter::executeRestart(int processId, const char* parameters, - bool all) + bool all) +{ + Vector<BaseString> command_list; + if (parameters) + { + BaseString tmp(parameters); + tmp.split(command_list); + for (unsigned i= 0; i < command_list.size();) + command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0); + } + if (all) + executeRestart(command_list, 0, 0, 0); + else + executeRestart(command_list, 0, &processId, 1); +} + +void +CommandInterpreter::executeRestart(Vector<BaseString> &command_list, + unsigned command_pos, + int *node_ids, int no_of_nodes) { int result; - int nostart = 0; - int initialstart = 0; - int abort = 0; - - if(parameters != 0 && strlen(parameters) != 0){ - char * tmpString = my_strdup(parameters,MYF(MY_WME)); - My_auto_ptr<char> ap1(tmpString); - char * tmpPtr = 0; - char * item = strtok_r(tmpString, " ", &tmpPtr); - while(item != NULL){ - if(strcasecmp(item, "-N") == 0) - nostart = 1; - if(strcasecmp(item, "-I") == 0) - initialstart = 1; - if(strcasecmp(item, "-A") == 0) - abort = 1; - item = strtok_r(NULL, " ", &tmpPtr); + int nostart= 0; + int initialstart= 0; + int abort= 0; + + for (; command_pos < command_list.size(); command_pos++) + { + const char *item= command_list[command_pos].c_str(); + if (strcasecmp(item, "-N") == 0) + { + nostart= 1; + continue; } + if (strcasecmp(item, "-I") == 0) + { + initialstart= 1; + continue; + } + if (strcasecmp(item, "-A") == 0) + { + abort= 1; + continue; + } + ndbout_c("Invalid option: %s. Expecting -A,-N or -I after RESTART", + item); + return; } - if(all) { - result = ndb_mgm_restart2(m_mgmsrv, 0, NULL, initialstart, nostart, abort); - } else { - int v[1]; - v[0] = processId; - result = ndb_mgm_restart2(m_mgmsrv, 1, v, initialstart, nostart, abort); - } + result= ndb_mgm_restart2(m_mgmsrv, no_of_nodes, node_ids, + initialstart, nostart, abort); if (result <= 0) { - ndbout.println("Restart failed.", result); + ndbout_c("Restart failed."); printError(); - } else + } + else + { + if (node_ids == 0) + ndbout_c("NDB Cluster is being restarted."); + else { - if(all) - ndbout << "NDB Cluster is being restarted." << endl; - else - ndbout_c("Node %d is being restarted.", processId); + ndbout << "Node"; + for (int i= 0; i < no_of_nodes; i++) + ndbout << " " << node_ids[i]; + ndbout_c(" is being restarted"); } + } } void diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp index 5bdc62207f5..48094800f9a 100644 --- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp @@ -294,6 +294,8 @@ static ErrorItem errorTable[] = {MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" }, {MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH, "Node shutdown would cause system crash" }, + {MgmtSrvr::UNSUPPORTED_NODE_SHUTDOWN, + "Unsupported multi node shutdown. Abort option required." }, {MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." }, {MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP, "Operation not allowed while nodes are starting or stopping."}, @@ -312,6 +314,9 @@ int MgmtSrvr::translateStopRef(Uint32 errCode) case StopRef::NodeShutdownWouldCauseSystemCrash: return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH; break; + case StopRef::UnsupportedNodeShutdown: + return UNSUPPORTED_NODE_SHUTDOWN; + break; } return 4999; } @@ -386,8 +391,9 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_server, _ownReference(0), theSignalIdleList(NULL), theWaitState(WAIT_SUBSCRIBE_CONF), + m_local_mgm_handle(0), m_event_listner(this), - m_local_mgm_handle(0) + m_master_node(0) { DBUG_ENTER("MgmtSrvr::MgmtSrvr"); @@ -672,23 +678,16 @@ MgmtSrvr::~MgmtSrvr() int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond) { - if(nodeId == 0) - return 0; - - if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB) + if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB) return WRONG_PROCESS_TYPE; - // Check if we have contact with it if(unCond){ if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected) return 0; - return NO_CONTACT_WITH_PROCESS; } - if (theFacade->get_node_alive(nodeId) == 0) { - return NO_CONTACT_WITH_PROCESS; - } else { + else if (theFacade->get_node_alive(nodeId) == true) return 0; - } + return NO_CONTACT_WITH_PROCESS; } void report_unknown_signal(SimpleSignal *signal) @@ -930,7 +929,7 @@ int MgmtSrvr::sendStopMgmd(NodeId nodeId, * distributed communication up. */ -int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, +int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids, NodeBitmask &stoppedNodes, Uint32 singleUserNodeId, bool abort, @@ -940,6 +939,12 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, bool initialStart) { int error = 0; + DBUG_ENTER("MgmtSrvr::sendSTOP_REQ"); + DBUG_PRINT("enter", ("no of nodes: %d singleUseNodeId: %d " + "abort: %d stop: %d restart: %d " + "nostart: %d initialStart: %d", + node_ids.size(), singleUserNodeId, + abort, stop, restart, nostart, initialStart)); stoppedNodes.clear(); @@ -977,36 +982,49 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, // send the signals NodeBitmask nodes; - if (nodeId) + NodeId nodeId= 0; + int use_master_node= 0; + int do_send= 0; + int do_stop_self= 0; + NdbNodeBitmask nodes_to_stop; { - if(nodeId==getOwnNodeId()) - { - if(restart) - g_RestartServer= true; - g_StopServer= true; - return 0; - } - if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB) + for (unsigned i= 0; i < node_ids.size(); i++) { - int r; - if((r= okToSendTo(nodeId, true)) != 0) - return r; - if (ss.sendSignal(nodeId, &ssig) != SEND_OK) - return SEND_OR_RECEIVE_FAILED; + nodeId= node_ids[i]; + if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_MGM) + nodes_to_stop.set(nodeId); + else if (nodeId != getOwnNodeId()) + { + error= sendStopMgmd(nodeId, abort, stop, restart, + nostart, initialStart); + if (error == 0) + stoppedNodes.set(nodeId); + } + else + do_stop_self= 1;; } - else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM) + } + int no_of_nodes_to_stop= nodes_to_stop.count(); + if (node_ids.size()) + { + if (no_of_nodes_to_stop) { - error= sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart); - if(error==0) - stoppedNodes.set(nodeId); - return error; + do_send= 1; + if (no_of_nodes_to_stop == 1) + { + nodeId= nodes_to_stop.find(0); + } + else // multi node stop, send to master + { + use_master_node= 1; + nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes); + StopReq::setStopNodes(stopReq->requestInfo, 1); + } } - else - return WRONG_PROCESS_TYPE; - nodes.set(nodeId); } else { + nodeId= 0; while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) { if(okToSendTo(nodeId, true) == 0) @@ -1027,8 +1045,30 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, } // now wait for the replies - while (!nodes.isclear()) + while (!nodes.isclear() || do_send) { + if (do_send) + { + int r; + assert(nodes.count() == 0); + if (use_master_node) + nodeId= m_master_node; + if ((r= okToSendTo(nodeId, true)) != 0) + { + bool next; + if (!use_master_node) + DBUG_RETURN(r); + m_master_node= nodeId= 0; + while((next= getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && + (r= okToSendTo(nodeId, true)) != 0); + if (!next) + DBUG_RETURN(NO_CONTACT_WITH_DB_NODES); + } + if (ss.sendSignal(nodeId, &ssig) != SEND_OK) + DBUG_RETURN(SEND_OR_RECEIVE_FAILED); + nodes.set(nodeId); + do_send= 0; + } SimpleSignal *signal = ss.waitFor(); int gsn = signal->readSignalNumber(); switch (gsn) { @@ -1040,6 +1080,13 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, #endif assert(nodes.get(nodeId)); nodes.clear(nodeId); + if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster) + { + assert(use_master_node); + m_master_node= ref->masterNodeId; + do_send= 1; + continue; + } error = translateStopRef(ref->errorCode); break; } @@ -1050,9 +1097,16 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ndbout_c("Node %d single user mode", nodeId); #endif assert(nodes.get(nodeId)); - assert(singleUserNodeId != 0); + if (singleUserNodeId != 0) + { + stoppedNodes.set(nodeId); + } + else + { + assert(no_of_nodes_to_stop > 1); + stoppedNodes.bitOR(nodes_to_stop); + } nodes.clear(nodeId); - stoppedNodes.set(nodeId); break; } case GSN_NF_COMPLETEREP:{ @@ -1091,17 +1145,24 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, #ifdef VM_TRACE ndbout_c("Unknown signal %d", gsn); #endif - return SEND_OR_RECEIVE_FAILED; + DBUG_RETURN(SEND_OR_RECEIVE_FAILED); } } - return error; + if (!error && do_stop_self) + { + if (restart) + g_RestartServer= true; + g_StopServer= true; + } + DBUG_RETURN(error); } /* - * Stop one node + * Stop one nodes */ -int MgmtSrvr::stopNode(int nodeId, bool abort) +int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids, + int *stopCount, bool abort) { if (!abort) { @@ -1116,14 +1177,17 @@ int MgmtSrvr::stopNode(int nodeId, bool abort) } } NodeBitmask nodes; - return sendSTOP_REQ(nodeId, - nodes, - 0, - abort, - false, - false, - false, - false); + int ret= sendSTOP_REQ(node_ids, + nodes, + 0, + abort, + false, + false, + false, + false); + if (stopCount) + *stopCount= nodes.count(); + return ret; } /* @@ -1133,7 +1197,8 @@ int MgmtSrvr::stopNode(int nodeId, bool abort) int MgmtSrvr::stop(int * stopCount, bool abort) { NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector<NodeId> node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, 0, abort, @@ -1164,7 +1229,8 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) return OPERATION_NOT_ALLOWED_START_STOP; } NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector<NodeId> node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, singleUserNodeId, false, @@ -1181,18 +1247,22 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) * Perform node restart */ -int MgmtSrvr::restartNode(int nodeId, bool nostart, bool initialStart, - bool abort) +int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids, + int * stopCount, bool nostart, + bool initialStart, bool abort) { NodeBitmask nodes; - return sendSTOP_REQ(nodeId, - nodes, - 0, - abort, - false, - true, - nostart, - initialStart); + int ret= sendSTOP_REQ(node_ids, + nodes, + 0, + abort, + false, + true, + nostart, + initialStart); + if (stopCount) + *stopCount = nodes.count(); + return ret; } /* @@ -1203,7 +1273,8 @@ int MgmtSrvr::restart(bool nostart, bool initialStart, bool abort, int * stopCount ) { NodeBitmask nodes; - int ret = sendSTOP_REQ(0, + Vector<NodeId> node_ids; + int ret = sendSTOP_REQ(node_ids, nodes, 0, abort, @@ -2241,12 +2312,16 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) SignalSender ss(theFacade); ss.lock(); // lock will be released on exit - bool next; - NodeId nodeId = 0; - while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && - theFacade->get_node_alive(nodeId) == false); - - if(!next) return NO_CONTACT_WITH_DB_NODES; + NodeId nodeId = m_master_node; + if (okToSendTo(nodeId, false) != 0) + { + bool next; + nodeId = m_master_node = 0; + while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && + okToSendTo(nodeId, false) != 0); + if(!next) + return NO_CONTACT_WITH_DB_NODES; + } SimpleSignal ssig; BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend()); @@ -2314,7 +2389,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) const BackupRef * const ref = CAST_CONSTPTR(BackupRef, signal->getDataPtr()); if(ref->errorCode == BackupRef::IAmNotMaster){ - nodeId = refToNode(ref->masterRef); + m_master_node = nodeId = refToNode(ref->masterRef); #ifdef VM_TRACE ndbout_c("I'm not master resending to %d", nodeId); #endif diff --git a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp index 20e6ff1bc43..ddd5247b446 100644 --- a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp +++ b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp @@ -176,6 +176,7 @@ public: STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 ); STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 ); + STATIC_CONST( UNSUPPORTED_NODE_SHUTDOWN = 5031 ); STATIC_CONST( NODE_NOT_API_NODE = 5062 ); STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 ); @@ -252,7 +253,7 @@ public: * @param processId: Id of the DB process to stop * @return 0 if succeeded, otherwise: as stated above, plus: */ - int stopNode(int nodeId, bool abort = false); + int stopNodes(const Vector<NodeId> &node_ids, int *stopCount, bool abort); /** * Stop the system @@ -286,11 +287,12 @@ public: int start(int processId); /** - * Restart a node + * Restart nodes * @param processId: Id of the DB process to start */ - int restartNode(int processId, bool nostart, bool initialStart, - bool abort = false); + int restartNodes(const Vector<NodeId> &node_ids, + int *stopCount, bool nostart, + bool initialStart, bool abort); /** * Restart the system @@ -489,7 +491,7 @@ private: bool nostart, bool initialStart); - int sendSTOP_REQ(NodeId nodeId, + int sendSTOP_REQ(const Vector<NodeId> &node_ids, NodeBitmask &stoppedNodes, Uint32 singleUserNodeId, bool abort, @@ -649,6 +651,8 @@ private: friend class Ndb_mgmd_event_service; Ndb_mgmd_event_service m_event_listner; + NodeId m_master_node; + /** * Handles the thread wich upon a 'Node is started' event will * set the node's previous loglevel settings. diff --git a/storage/ndb/src/mgmsrv/Services.cpp b/storage/ndb/src/mgmsrv/Services.cpp index aebadec933c..7b17cfa21e2 100644 --- a/storage/ndb/src/mgmsrv/Services.cpp +++ b/storage/ndb/src/mgmsrv/Services.cpp @@ -866,14 +866,11 @@ MgmApiSession::restart(Parser<MgmApiSession>::Context &, } int restarted = 0; - int result = 0; - - for(size_t i = 0; i < nodes.size(); i++) - if((result = m_mgmsrv.restartNode(nodes[i], - nostart != 0, - initialstart != 0, - abort != 0)) == 0) - restarted++; + int result= m_mgmsrv.restartNodes(nodes, + &restarted, + nostart != 0, + initialstart != 0, + abort != 0); m_output->println("restart reply"); if(result != 0){ @@ -998,7 +995,12 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, args.get("node", (const char **)&nodes_str); if(nodes_str == NULL) + { + m_output->println("stop reply"); + m_output->println("result: empty node list"); + m_output->println(""); return; + } args.get("abort", &abort); char *p, *last; @@ -1008,29 +1010,10 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, nodes.push_back(atoi(p)); } - int stop_self= 0; - size_t i; - - for(i=0; i < nodes.size(); i++) { - if (nodes[i] == m_mgmsrv.getOwnNodeId()) { - stop_self= 1; - if (i != nodes.size()-1) { - m_output->println("stop reply"); - m_output->println("result: server must be stopped last"); - m_output->println(""); - return; - } - } - } - - int stopped = 0, result = 0; - - for(i=0; i < nodes.size(); i++) - if (nodes[i] != m_mgmsrv.getOwnNodeId()) { - if((result = m_mgmsrv.stopNode(nodes[i], abort != 0)) == 0) - stopped++; - } else - stopped++; + int stopped= 0; + int result= 0; + if (nodes.size()) + result= m_mgmsrv.stopNodes(nodes, &stopped, abort != 0); m_output->println("stop reply"); if(result != 0) @@ -1039,9 +1022,6 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, m_output->println("result: Ok"); m_output->println("stopped: %d", stopped); m_output->println(""); - - if (stop_self) - g_StopServer= true; } diff --git a/storage/ndb/test/ndbapi/testNodeRestart.cpp b/storage/ndb/test/ndbapi/testNodeRestart.cpp index ba195290e9e..aed0b39f196 100644 --- a/storage/ndb/test/ndbapi/testNodeRestart.cpp +++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp @@ -22,7 +22,7 @@ #include <NdbRestarts.hpp> #include <Vector.hpp> #include <signaldata/DumpStateOrd.hpp> - +#include <Bitmask.hpp> int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ @@ -669,6 +669,206 @@ err: return NDBT_FAILED; } +int +runBug18612(NDBT_Context* ctx, NDBT_Step* step){ + + // Assume two replicas + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Uint32 cnt = restarter.getNumDbNodes(); + + for(int loop = 0; loop < ctx->getNumLoops(); loop++) + { + int partition0[256]; + int partition1[256]; + bzero(partition0, sizeof(partition0)); + bzero(partition1, sizeof(partition1)); + Bitmask<4> nodesmask; + + Uint32 node1 = restarter.getDbNodeId(rand()%cnt); + for (Uint32 i = 0; i<cnt/2; i++) + { + do { + int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand()); + if (tmp == -1) + break; + node1 = tmp; + } while(nodesmask.get(node1)); + + partition0[i] = node1; + partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + ndbout_c("nodes %d %d", node1, partition1[i]); + + assert(!nodesmask.get(node1)); + assert(!nodesmask.get(partition1[i])); + nodesmask.set(node1); + nodesmask.set(partition1[i]); + } + + ndbout_c("done"); + + int dump[255]; + dump[0] = DumpStateOrd::NdbcntrStopNodes; + memcpy(dump + 1, partition0, sizeof(int)*cnt/2); + + Uint32 master = restarter.getMasterNodeId(); + + if (restarter.dumpStateOneNode(master, dump, 1+cnt/2)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(partition0, cnt/2)) + return NDBT_FAILED; + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + + if (restarter.dumpStateAllNodes(val2, 2)) + return NDBT_FAILED; + + if (restarter.insertErrorInAllNodes(932)) + return NDBT_FAILED; + + dump[0] = 9000; + memcpy(dump + 1, partition0, sizeof(int)*cnt/2); + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2)) + return NDBT_FAILED; + + dump[0] = 9000; + memcpy(dump + 1, partition1, sizeof(int)*cnt/2); + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2)) + return NDBT_FAILED; + + if (restarter.startNodes(partition0, cnt/2)) + return NDBT_FAILED; + + if (restarter.waitNodesStartPhase(partition0, cnt/2, 2)) + return NDBT_FAILED; + + dump[0] = 9001; + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateAllNodes(dump, 2)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(partition0, cnt/2)) + return NDBT_FAILED; + + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.restartOneDbNode(partition0[i], true, true, true)) + return NDBT_FAILED; + + if (restarter.waitNodesNoStart(partition0, cnt/2)) + return NDBT_FAILED; + + if (restarter.startAll()) + return NDBT_FAILED; + + if (restarter.waitClusterStarted()) + return NDBT_FAILED; + } + return NDBT_OK; +} + +int +runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){ + + // Assume two replicas + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Uint32 cnt = restarter.getNumDbNodes(); + + for(int loop = 0; loop < ctx->getNumLoops(); loop++) + { + int partition0[256]; + int partition1[256]; + bzero(partition0, sizeof(partition0)); + bzero(partition1, sizeof(partition1)); + Bitmask<4> nodesmask; + + Uint32 node1 = restarter.getDbNodeId(rand()%cnt); + for (Uint32 i = 0; i<cnt/2; i++) + { + do { + int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand()); + if (tmp == -1) + break; + node1 = tmp; + } while(nodesmask.get(node1)); + + partition0[i] = node1; + partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + ndbout_c("nodes %d %d", node1, partition1[i]); + + assert(!nodesmask.get(node1)); + assert(!nodesmask.get(partition1[i])); + nodesmask.set(node1); + nodesmask.set(partition1[i]); + } + + ndbout_c("done"); + + if (restarter.restartAll(false, true, false)) + return NDBT_FAILED; + + int dump[255]; + dump[0] = 9000; + memcpy(dump + 1, partition0, sizeof(int)*cnt/2); + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2)) + return NDBT_FAILED; + + dump[0] = 9000; + memcpy(dump + 1, partition1, sizeof(int)*cnt/2); + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2)) + return NDBT_FAILED; + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + + if (restarter.dumpStateAllNodes(val2, 2)) + return NDBT_FAILED; + + if (restarter.insertErrorInAllNodes(932)) + return NDBT_FAILED; + + if (restarter.startAll()) + return NDBT_FAILED; + + if (restarter.waitClusterStartPhase(2)) + return NDBT_FAILED; + + dump[0] = 9001; + for (Uint32 i = 0; i<cnt/2; i++) + if (restarter.dumpStateAllNodes(dump, 2)) + return NDBT_FAILED; + + if (restarter.waitClusterNoStart(30)) + if (restarter.waitNodesNoStart(partition0, cnt/2, 10)) + if (restarter.waitNodesNoStart(partition1, cnt/2, 10)) + return NDBT_FAILED; + + if (restarter.startAll()) + return NDBT_FAILED; + + if (restarter.waitClusterStarted()) + return NDBT_FAILED; + } + return NDBT_OK; +} + + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", "Test that one node at a time can be stopped and then restarted "\ @@ -963,6 +1163,18 @@ TESTCASE("Bug18414", STEP(runBug18414); FINALIZER(runClearTable); } +TESTCASE("Bug18612", + "Test bug with partitioned clusters"){ + INITIALIZER(runLoadTable); + STEP(runBug18612); + FINALIZER(runClearTable); +} +TESTCASE("Bug18612SR", + "Test bug with partitioned clusters"){ + INITIALIZER(runLoadTable); + STEP(runBug18612SR); + FINALIZER(runClearTable); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/run-test/daily-basic-tests.txt b/storage/ndb/test/run-test/daily-basic-tests.txt index 5d5e70f7184..08467a652f0 100644 --- a/storage/ndb/test/run-test/daily-basic-tests.txt +++ b/storage/ndb/test/run-test/daily-basic-tests.txt @@ -453,10 +453,18 @@ args: -n Bug16772 T1 #cmd: testSystemRestart #args: -n Bug18385 T1 # -max-time: 500 +max-time: 1000 cmd: testNodeRestart args: -n Bug18414 T1 +max-time: 1000 +cmd: testNodeRestart +args: -n Bug18612 T1 + +max-time: 1000 +cmd: testNodeRestart +args: -n Bug18612SR T1 + # # DICT TESTS max-time: 1500 diff --git a/storage/ndb/test/src/NdbRestarts.cpp b/storage/ndb/test/src/NdbRestarts.cpp index eea4af437c4..8465caaab48 100644 --- a/storage/ndb/test/src/NdbRestarts.cpp +++ b/storage/ndb/test/src/NdbRestarts.cpp @@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter, << ") secs " << endl; NdbSleep_SecSleep(seconds); - randomId = (rand() % _restarter.getNumDbNodes()); - nodeId = _restarter.getDbNodeId(randomId); + nodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand()); g_info << _restart->m_name << ": node = "<< nodeId << endl; CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0, |